From d57c9d1941bd6b3606625e2752d3d1ced12ac257 Mon Sep 17 00:00:00 2001 From: Pascal Brogle Date: Thu, 12 Oct 2023 19:55:38 +0200 Subject: [PATCH 1/3] fix: prevent proxmox secret form being deleted too early --- api/v1beta1/proxmoxcluster_types.go | 3 ++- cloud/interfaces.go | 1 + cloud/scope/clients.go | 5 +++++ cloud/scope/cluster.go | 4 ++++ cloud/scope/machine.go | 4 ++++ controllers/proxmoxcluster_controller.go | 25 ++++++++++++++++++++++++ internal/fake/cluster_scope.go | 9 +++++++++ 7 files changed, 50 insertions(+), 1 deletion(-) diff --git a/api/v1beta1/proxmoxcluster_types.go b/api/v1beta1/proxmoxcluster_types.go index b1ea254..709138e 100644 --- a/api/v1beta1/proxmoxcluster_types.go +++ b/api/v1beta1/proxmoxcluster_types.go @@ -23,7 +23,8 @@ import ( const ( // ClusterFinalizer - ClusterFinalizer = "proxmoxcluster.infrastructure.cluster.x-k8s.io" + ClusterFinalizer = "proxmoxcluster.infrastructure.cluster.x-k8s.io" + ClusterSecretFinalizer = ClusterFinalizer + "/secret" ) // ProxmoxClusterSpec defines the desired state of ProxmoxCluster diff --git a/cloud/interfaces.go b/cloud/interfaces.go index 4da9507..853b703 100644 --- a/cloud/interfaces.go +++ b/cloud/interfaces.go @@ -17,6 +17,7 @@ type Reconciler interface { type Client interface { CloudClient() *proxmox.Service + K8sClient() *client.Client } type Cluster interface { diff --git a/cloud/scope/clients.go b/cloud/scope/clients.go index 8d74c11..9adb3c7 100644 --- a/cloud/scope/clients.go +++ b/cloud/scope/clients.go @@ -26,6 +26,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" infrav1 "github.com/sp-yduck/cluster-api-provider-proxmox/api/v1beta1" ) @@ -53,6 +54,10 @@ func newComputeService(ctx context.Context, cluster *infrav1.ProxmoxCluster, crC Name: cluster.Name, UID: cluster.UID, })) + + // Add finalizer to ensure secret remains present until we have cleaned up everything ourselves + controllerutil.AddFinalizer(&secret, infrav1.ClusterSecretFinalizer) + if err := crClient.Update(ctx, &secret); err != nil { return nil, fmt.Errorf("failed to set ownerReference to secret: %w", err) } diff --git a/cloud/scope/cluster.go b/cloud/scope/cluster.go index 231d0e6..f034c06 100644 --- a/cloud/scope/cluster.go +++ b/cloud/scope/cluster.go @@ -100,6 +100,10 @@ func (s *ClusterScope) CloudClient() *proxmox.Service { return s.ProxmoxServices.Compute } +func (m *ClusterScope) K8sClient() *client.Client { + return &m.client +} + func (s *ClusterScope) Close() error { return s.PatchObject() } diff --git a/cloud/scope/machine.go b/cloud/scope/machine.go index 43a50fb..b7badc9 100644 --- a/cloud/scope/machine.go +++ b/cloud/scope/machine.go @@ -83,6 +83,10 @@ func (m *MachineScope) CloudClient() *proxmox.Service { return m.ClusterGetter.CloudClient() } +func (m *MachineScope) K8sClient() *client.Client { + return &m.client +} + func (m *MachineScope) GetStorage() infrav1.Storage { return m.ClusterGetter.ProxmoxCluster.Spec.Storage } diff --git a/controllers/proxmoxcluster_controller.go b/controllers/proxmoxcluster_controller.go index fa9bbd0..fc6befd 100644 --- a/controllers/proxmoxcluster_controller.go +++ b/controllers/proxmoxcluster_controller.go @@ -19,6 +19,7 @@ package controllers import ( "context" "fmt" + corev1 "k8s.io/api/core/v1" "time" "github.com/pkg/errors" @@ -156,12 +157,36 @@ func (r *ProxmoxClusterReconciler) reconcileDelete(ctx context.Context, clusterS } } + if err := r.RemoveProxmoxSecretFinalizer(ctx, clusterScope); err != nil { + return ctrl.Result{RequeueAfter: 5 * time.Second}, err + } + log.Info("Reconciled ProxmoxCluster") controllerutil.RemoveFinalizer(clusterScope.ProxmoxCluster, infrav1.ClusterFinalizer) record.Event(clusterScope.ProxmoxCluster, "ProxmoxClusterReconcile", "Reconciled") return ctrl.Result{}, nil } +func (r *ProxmoxClusterReconciler) RemoveProxmoxSecretFinalizer(ctx context.Context, clusterScope *scope.ClusterScope) error { + serverRef := clusterScope.ProxmoxCluster.Spec.ServerRef + secretRef := serverRef.SecretRef + if secretRef != nil { + var secret corev1.Secret + key := client.ObjectKey{Namespace: secretRef.Namespace, Name: secretRef.Name} + k8sClient := *clusterScope.K8sClient() + if err := k8sClient.Get(ctx, key, &secret); err != nil { + return err + } + updated := controllerutil.RemoveFinalizer(&secret, infrav1.ClusterSecretFinalizer) + if updated { + if err := k8sClient.Update(ctx, &secret); err != nil { + return err + } + } + } + return nil +} + // SetupWithManager sets up the controller with the Manager. func (r *ProxmoxClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). diff --git a/internal/fake/cluster_scope.go b/internal/fake/cluster_scope.go index c3485cf..b9d2d8a 100644 --- a/internal/fake/cluster_scope.go +++ b/internal/fake/cluster_scope.go @@ -3,12 +3,14 @@ package fake import ( "github.com/sp-yduck/proxmox-go/proxmox" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client" infrav1 "github.com/sp-yduck/cluster-api-provider-proxmox/api/v1beta1" ) type FakeClusterScope struct { cloudClient *proxmox.Service + k8sClient *client.Client name string namespace string controlPlaneEndpoint clusterv1.APIEndpoint @@ -34,6 +36,9 @@ func (f *FakeClusterScope) Name() string { func (f *FakeClusterScope) Namespace() string { return f.namespace } +func (f *FakeClusterScope) FailureDomains() clusterv1.FailureDomains { + return clusterv1.FailureDomains{} +} func (f *FakeClusterScope) ControlPlaneEndpoint() clusterv1.APIEndpoint { return f.controlPlaneEndpoint @@ -47,6 +52,10 @@ func (f *FakeClusterScope) CloudClient() *proxmox.Service { return f.cloudClient } +func (f *FakeClusterScope) K8sClient() *client.Client { + return f.k8sClient +} + func (f *FakeClusterScope) SetControlPlaneEndpoint(endpoint clusterv1.APIEndpoint) { f.controlPlaneEndpoint = endpoint } From 7f14de80ff68d093f87b95b98edd10d04f349549 Mon Sep 17 00:00:00 2001 From: Pascal Brogle Date: Thu, 12 Oct 2023 19:59:52 +0200 Subject: [PATCH 2/3] fix: ensure vm instance is running as part of reconcilation --- cloud/scope/machine.go | 4 + cloud/services/compute/instance/reconcile.go | 20 +++-- cloud/services/reconcile_error.go | 85 ++++++++++++++++++++ controllers/proxmoxmachine_controller.go | 20 ++++- 4 files changed, 123 insertions(+), 6 deletions(-) create mode 100644 cloud/services/reconcile_error.go diff --git a/cloud/scope/machine.go b/cloud/scope/machine.go index b7badc9..0ae5578 100644 --- a/cloud/scope/machine.go +++ b/cloud/scope/machine.go @@ -214,6 +214,10 @@ func (m *MachineScope) SetReady() { m.ProxmoxMachine.Status.Ready = true } +func (m *MachineScope) SetNotReady() { + m.ProxmoxMachine.Status.Ready = false +} + func (m *MachineScope) SetFailureMessage(v error) { m.ProxmoxMachine.Status.FailureMessage = pointer.String(v.Error()) } diff --git a/cloud/services/compute/instance/reconcile.go b/cloud/services/compute/instance/reconcile.go index 6279841..074671f 100644 --- a/cloud/services/compute/instance/reconcile.go +++ b/cloud/services/compute/instance/reconcile.go @@ -37,7 +37,7 @@ func (s *Service) Reconcile(ctx context.Context) error { if err := s.scope.SetProviderID(*uuid); err != nil { return err } - s.scope.SetInstanceStatus(infrav1.InstanceStatus(instance.VM.Status)) + s.scope.SetNodeName(instance.Node) s.scope.SetVMID(instance.VM.VMID) @@ -46,6 +46,14 @@ func (s *Service) Reconcile(ctx context.Context) error { return err } s.scope.SetConfigStatus(*config) + + if instance.VM.Status != api.ProcessStatusRunning { + err = s.ensureRunning(ctx, *instance) + s.scope.SetInstanceStatus(infrav1.InstanceStatusRunning) + } else { + s.scope.SetInstanceStatus(infrav1.InstanceStatus(instance.VM.Status)) + } + return nil } @@ -65,7 +73,7 @@ func (s *Service) Delete(ctx context.Context) error { // must stop or pause instance before deletion // otherwise deletion will be fail - if err := ensureStoppedOrPaused(ctx, *instance); err != nil { + if err := s.ensureStoppedOrPaused(ctx, *instance); err != nil { return err } @@ -155,13 +163,13 @@ func (s *Service) createInstance(ctx context.Context) (*proxmox.VirtualMachine, } // vm status - if err := ensureRunning(ctx, *instance); err != nil { + if err := s.ensureRunning(ctx, *instance); err != nil { return nil, err } return instance, nil } -func ensureRunning(ctx context.Context, instance proxmox.VirtualMachine) error { +func (s *Service) ensureRunning(ctx context.Context, instance proxmox.VirtualMachine) error { log := log.FromContext(ctx) // ensure instance is running switch instance.VM.Status { @@ -172,18 +180,20 @@ func ensureRunning(ctx context.Context, instance proxmox.VirtualMachine) error { log.Error(err, "failed to start instance process") return err } + instance.VM.Status = api.ProcessStatusRunning case api.ProcessStatusPaused: if err := instance.Resume(ctx, api.VirtualMachineResumeOption{}); err != nil { log.Error(err, "failed to resume instance process") return err } + instance.VM.Status = api.ProcessStatusRunning default: return errors.Errorf("unexpected status : %s", instance.VM.Status) } return nil } -func ensureStoppedOrPaused(ctx context.Context, instance proxmox.VirtualMachine) error { +func (s *Service) ensureStoppedOrPaused(ctx context.Context, instance proxmox.VirtualMachine) error { log := log.FromContext(ctx) switch instance.VM.Status { case api.ProcessStatusRunning: diff --git a/cloud/services/reconcile_error.go b/cloud/services/reconcile_error.go new file mode 100644 index 0000000..6c2bf51 --- /dev/null +++ b/cloud/services/reconcile_error.go @@ -0,0 +1,85 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package services + +import ( + "fmt" + "time" +) + +// ReconcileError represents an generic error of Reconcile loop. ErrorType indicates what type +// of action is required to recover. It can take two values: +// 1. `Transient` - Can be recovered , will be requeued after. +// 2. `Terminal` - Cannot be recovered, will not be requeued. + +type ReconcileError struct { + error + errorType ReconcileErrorType + RequeueAfter time.Duration +} + +// ReconcileErrorType represents the type of a ReconcileError. +type ReconcileErrorType string + +const ( + // TransientErrorType can be recovered, will be requeued after a configured time interval. + TransientErrorType ReconcileErrorType = "Transient" + // TerminalErrorType cannot be recovered, will not be requeued. + TerminalErrorType ReconcileErrorType = "Terminal" +) + +// Error returns the error message for a ReconcileError. +func (e ReconcileError) Error() string { + var errStr string + if e.error != nil { + errStr = e.error.Error() + } + switch e.errorType { + case TransientErrorType: + return fmt.Sprintf("%s. Object will be requeued after %s", errStr, e.GetRequeueAfter()) + case TerminalErrorType: + return fmt.Sprintf("reconcile error that cannot be recovered occurred: %s. Object will not be requeued", errStr) + default: + return fmt.Sprintf("reconcile error occurred with unknown recovery type. The actual error is: %s", errStr) + } +} + +// GetRequeueAfter gets the duration to wait until the managed object is +// requeued for further processing. +func (e ReconcileError) GetRequeueAfter() time.Duration { + return e.RequeueAfter +} + +// IsTransient returns if the ReconcileError is recoverable. +func (e ReconcileError) IsTransient() bool { + return e.errorType == TransientErrorType +} + +// IsTerminal returns if the ReconcileError is non recoverable. +func (e ReconcileError) IsTerminal() bool { + return e.errorType == TerminalErrorType +} + +// WithTransientError wraps the error in a ReconcileError with errorType as `Transient`. +func WithTransientError(err error, requeueAfter time.Duration) ReconcileError { + return ReconcileError{error: err, errorType: TransientErrorType, RequeueAfter: requeueAfter} +} + +// WithTerminalError wraps the error in a ReconcileError with errorType as `Terminal`. +func WithTerminalError(err error) ReconcileError { + return ReconcileError{error: err, errorType: TerminalErrorType} +} diff --git a/controllers/proxmoxmachine_controller.go b/controllers/proxmoxmachine_controller.go index 0fad385..f758199 100644 --- a/controllers/proxmoxmachine_controller.go +++ b/controllers/proxmoxmachine_controller.go @@ -156,7 +156,7 @@ func (r *ProxmoxMachineReconciler) reconcile(ctx context.Context, machineScope * if err := r.Reconcile(ctx); err != nil { log.Error(err, "Reconcile error") record.Warnf(machineScope.ProxmoxMachine, "ProxmoxMachineReconcile", "Reconcile error - %v", err) - return ctrl.Result{RequeueAfter: 5 * time.Second}, err + return checkReconcileError(err, "Failed to reconcile machine") } } @@ -171,10 +171,12 @@ func (r *ProxmoxMachineReconciler) reconcile(ctx context.Context, machineScope * case infrav1.InstanceStatusStopped: log.Info("ProxmoxMachine instance is stopped", "instance-id", *machineScope.GetBiosUUID()) record.Eventf(machineScope.ProxmoxMachine, "ProxmoxMachineReconcile", "ProxmoxMachine instance is stopped - bios-uuid: %s", *machineScope.GetBiosUUID()) + machineScope.SetNotReady() return ctrl.Result{RequeueAfter: 5 * time.Second}, nil case infrav1.InstanceStatusPaused: log.Info("ProxmoxMachine instance is paused", "instance-id", *machineScope.GetBiosUUID()) record.Eventf(machineScope.ProxmoxMachine, "ProxmoxMachineReconcile", "ProxmoxMachine instance is paused - bios-uuid: %s", *machineScope.GetBiosUUID()) + machineScope.SetNotReady() return ctrl.Result{RequeueAfter: 5 * time.Second}, nil default: machineScope.SetFailureReason(capierrors.UpdateMachineError) @@ -211,3 +213,19 @@ func (r *ProxmoxMachineReconciler) SetupWithManager(mgr ctrl.Manager) error { For(&infrav1.ProxmoxMachine{}). Complete(r) } + +func checkReconcileError(err error, errMessage string) (ctrl.Result, error) { + if err == nil { + return ctrl.Result{}, nil + } + var reconcileError services.ReconcileError + if errors.As(err, &reconcileError) { + if reconcileError.IsTransient() { + return reconcile.Result{Requeue: true, RequeueAfter: reconcileError.GetRequeueAfter()}, nil + } + if reconcileError.IsTerminal() { + return reconcile.Result{}, nil + } + } + return ctrl.Result{}, errors.Wrap(err, errMessage) +} From cbb0f588d51b6a10883e6fcdcaf708339cea300e Mon Sep 17 00:00:00 2001 From: Pascal Brogle Date: Fri, 13 Oct 2023 00:46:53 +0200 Subject: [PATCH 3/3] wip --- api/v1beta1/proxmoxcluster_types.go | 12 + api/v1beta1/proxmoxmachine_types.go | 12 +- api/v1beta1/proxmoxmachinetemplate_types.go | 51 +++- api/v1beta1/type.go | 27 +- api/v1beta1/zz_generated.deepcopy.go | 113 +++++++- cloud/interfaces.go | 15 +- cloud/scope/cluster.go | 36 +++ cloud/scope/machine.go | 68 ++++- cloud/services/compute/instance/cloudinit.go | 10 +- cloud/services/compute/instance/image.go | 114 ++++++--- cloud/services/compute/instance/network.go | 242 ++++++++++++++++++ cloud/services/compute/instance/qemu.go | 154 +++++++++-- cloud/services/compute/storage/reconcile.go | 52 +++- cmd/main.go | 2 + ...ture.cluster.x-k8s.io_proxmoxclusters.yaml | 15 ++ ...ture.cluster.x-k8s.io_proxmoxmachines.yaml | 66 ++++- ...ster.x-k8s.io_proxmoxmachinetemplates.yaml | 98 +++++-- config/rbac/role.yaml | 33 +++ controllers/proxmoxcluster_controller.go | 4 + controllers/proxmoxmachine_controller.go | 15 ++ 20 files changed, 1022 insertions(+), 117 deletions(-) create mode 100644 cloud/services/compute/instance/network.go diff --git a/api/v1beta1/proxmoxcluster_types.go b/api/v1beta1/proxmoxcluster_types.go index 709138e..35852d6 100644 --- a/api/v1beta1/proxmoxcluster_types.go +++ b/api/v1beta1/proxmoxcluster_types.go @@ -39,6 +39,18 @@ type ProxmoxClusterSpec struct { // storage is for proxmox storage used by vm instances // +optional Storage Storage `json:"storage"` + + // Nodes to be used for vm instances + // +optional + Nodes []string `json:"nodes,omitempty"` + + // Provide config to enable failure domains + // +optional + FailureDomainConfig *ClusterFailureDomainConfig `json:"failureDomain,omitempty"` + + // Proxmox resource pool to be used for all vms created + // +optional + ResourcePool string `json:"resourcePool,omitempty"` } // ProxmoxClusterStatus defines the observed state of ProxmoxCluster diff --git a/api/v1beta1/proxmoxmachine_types.go b/api/v1beta1/proxmoxmachine_types.go index 0c17488..bc12158 100644 --- a/api/v1beta1/proxmoxmachine_types.go +++ b/api/v1beta1/proxmoxmachine_types.go @@ -25,7 +25,8 @@ import ( const ( // MachineFinalizer - MachineFinalizer = "proxmoxmachine.infrastructure.cluster.x-k8s.io" + MachineFinalizer = "proxmoxmachine.infrastructure.cluster.x-k8s.io" + MachineFinalizerIPAddressClaim = "proxmoxmachine.infrastructure.cluster.x-k8s.io/IPAddressClaim" ) // ProxmoxMachineSpec defines the desired state of ProxmoxMachine @@ -35,13 +36,17 @@ type ProxmoxMachineSpec struct { // Node is proxmox node hosting vm instance which used for ProxmoxMachine // +optional - Node string `json:"node,omitempty"` + Node *string `json:"node,omitempty"` // +kubebuilder:validation:Minimum:=0 // VMID is proxmox qemu's id // +optional VMID *int `json:"vmID,omitempty"` + // Pool the vm is attached to + // +optional + Pool *string `json:"pool,omitempty"` + // Image is the image to be provisioned Image Image `json:"image"` @@ -94,7 +99,8 @@ type ProxmoxMachineStatus struct { //+kubebuilder:subresource:status // +kubebuilder:printcolumn:name="Cluster",type="string",JSONPath=".metadata.labels.cluster\\.x-k8s\\.io/cluster-name",description="Cluster to which this VSphereMachine belongs" // +kubebuilder:printcolumn:name="Machine",type="string",JSONPath=".metadata.ownerReferences[?(@.kind==\"Machine\")].name",description="Machine object which owns with this ProxmoxMachine",priority=1 -// +kubebuilder:printcolumn:name="vmid",type=string,JSONPath=`.spec.vmID`,priority=1 +// +kubebuilder:printcolumn:name="VmID",type=string,JSONPath=`.spec.vmID`,priority=1 +// +kubebuilder:printcolumn:name="Node",type=string,JSONPath=`.spec.node`,priority=2 // +kubebuilder:printcolumn:name="ProviderID",type=string,JSONPath=`.spec.providerID` // +kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.instanceStatus` // +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time duration since creation of Machine" diff --git a/api/v1beta1/proxmoxmachinetemplate_types.go b/api/v1beta1/proxmoxmachinetemplate_types.go index 3b2b868..06803c0 100644 --- a/api/v1beta1/proxmoxmachinetemplate_types.go +++ b/api/v1beta1/proxmoxmachinetemplate_types.go @@ -1,5 +1,5 @@ /* -Copyright 2023 Teppei Sudo. +Copyright 2023 Simplysoft GmbH. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,18 +21,53 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ) -// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! -// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. - // ProxmoxMachineTemplateSpec defines the desired state of ProxmoxMachineTemplate type ProxmoxMachineTemplateSpec struct { - Template ProxmoxMachineTemplateResource `json:"template"` + Template ProxmoxMachineTemplateSpecTemplate `json:"template"` + + // VM ID Range that will be used for individual machines + // +optional + VMIDs *ProxmoxMachineTemplateVmIdRange `json:"vmIDs,omitempty"` + + // Restrict template to specific proxmox nodes. When failure domains are enabled, they will have + // priority the configured nodes in the template + // +optional + Nodes []string `json:"nodes,omitempty"` } -type ProxmoxMachineTemplateResource struct { +type ProxmoxMachineTemplateSpecTemplate struct { + // +optional + ObjectMeta clusterv1.ObjectMeta `json:"metadata.omitempty"` + Spec ProxmoxMachineTemplateSpecTemplateSpec `json:"spec"` +} + +type ProxmoxMachineTemplateSpecTemplateSpec struct { + // Image is the image to be provisioned + Image Image `json:"image"` + + // CloudInit defines options related to the bootstrapping systems where + // CloudInit is used. + // +optional + CloudInit CloudInit `json:"cloudInit,omitempty"` + + // Hardware + Hardware Hardware `json:"hardware,omitempty"` + + // Network + Network Network `json:"network,omitempty"` + + // Options + // +optional + Options Options `json:"options,omitempty"` +} + +type ProxmoxMachineTemplateVmIdRange struct { + // Start of VM ID range + Start int `json:"start"` + + // End of VM ID range // +optional - ObjectMeta clusterv1.ObjectMeta `json:"metadata.omitempty"` - Spec ProxmoxMachineSpec `json:"spec"` + End int `json:"end,omitempty"` } // ProxmoxMachineTemplateStatus defines the observed state of ProxmoxMachineTemplate diff --git a/api/v1beta1/type.go b/api/v1beta1/type.go index dc05933..6feab63 100644 --- a/api/v1beta1/type.go +++ b/api/v1beta1/type.go @@ -2,6 +2,7 @@ package v1beta1 import ( "fmt" + corev1 "k8s.io/api/core/v1" "strings" "github.com/sp-yduck/proxmox-go/api" @@ -83,10 +84,14 @@ type Hardware struct { // SCSI controller model // SCSIHardWare SCSIHardWare `json:"scsiHardWare,omitempty"` - // hard disk size + // boot disk size // +kubebuilder:validation:Pattern:=\+?\d+(\.\d+)?[KMGT]? // +kubebuilder:default:="50G" Disk string `json:"disk,omitempty"` + + // Storage name for the boot disk. If none is provided, the ProxmoxCluster storage name will be used + // +optional + StorageName string `json:"storage,omitempty"` } // Network @@ -101,6 +106,14 @@ type Network struct { // search domain SearchDomain string `json:"searchDomain,omitempty"` + + // +kubebuilder:default:="virtio" + Model string `json:"model,omitempty"` + + // +kubebuilder:default:="vmbr0" + Bridge string `json:"bridge,omitempty"` + + Tag int `json:"vlanTag,omitempty"` } // IPConfig defines IP addresses and gateways for corresponding interface. @@ -117,6 +130,12 @@ type IPConfig struct { // gateway IPv6 Gateway6 string `json:"gateway6,omitempty"` + + // IPv4FromPoolRef is a reference to an IP pool to allocate an address from. + IPv4FromPoolRef *corev1.TypedLocalObjectReference `json:"IPv4FromPoolRef,omitempty"` + + // IPv6FromPoolRef is a reference to an IP pool to allocate an address from. + IPv6FromPoolRef *corev1.TypedLocalObjectReference `json:"IPv6FromPoolRef,omitempty"` } func (c *IPConfig) String() string { @@ -156,3 +175,9 @@ var ( InstanceStatusRunning = InstanceStatus(api.ProcessStatusRunning) InstanceStatusStopped = InstanceStatus(api.ProcessStatusStopped) ) + +type ClusterFailureDomainConfig struct { + // Treat each node as a failure domain for cluster api + // +optional + NodeAsFailureDomain bool `json:"nodeAsFailureDomain,omitempty"` +} diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 00daeaf..4290235 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -22,6 +22,7 @@ limitations under the License. package v1beta1 import ( + "k8s.io/api/core/v1" runtime "k8s.io/apimachinery/pkg/runtime" apiv1beta1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/errors" @@ -82,6 +83,21 @@ func (in *CloudInit) DeepCopy() *CloudInit { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClusterFailureDomainConfig) DeepCopyInto(out *ClusterFailureDomainConfig) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterFailureDomainConfig. +func (in *ClusterFailureDomainConfig) DeepCopy() *ClusterFailureDomainConfig { + if in == nil { + return nil + } + out := new(ClusterFailureDomainConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Hardware) DeepCopyInto(out *Hardware) { *out = *in @@ -100,6 +116,16 @@ func (in *Hardware) DeepCopy() *Hardware { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *IPConfig) DeepCopyInto(out *IPConfig) { *out = *in + if in.IPv4FromPoolRef != nil { + in, out := &in.IPv4FromPoolRef, &out.IPv4FromPoolRef + *out = new(v1.TypedLocalObjectReference) + (*in).DeepCopyInto(*out) + } + if in.IPv6FromPoolRef != nil { + in, out := &in.IPv6FromPoolRef, &out.IPv6FromPoolRef + *out = new(v1.TypedLocalObjectReference) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IPConfig. @@ -135,7 +161,7 @@ func (in *Image) DeepCopy() *Image { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Network) DeepCopyInto(out *Network) { *out = *in - out.IPConfig = in.IPConfig + in.IPConfig.DeepCopyInto(&out.IPConfig) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Network. @@ -253,6 +279,16 @@ func (in *ProxmoxClusterSpec) DeepCopyInto(out *ProxmoxClusterSpec) { out.ControlPlaneEndpoint = in.ControlPlaneEndpoint in.ServerRef.DeepCopyInto(&out.ServerRef) out.Storage = in.Storage + if in.Nodes != nil { + in, out := &in.Nodes, &out.Nodes + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.FailureDomainConfig != nil { + in, out := &in.FailureDomainConfig, &out.FailureDomainConfig + *out = new(ClusterFailureDomainConfig) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxmoxClusterSpec. @@ -361,15 +397,25 @@ func (in *ProxmoxMachineSpec) DeepCopyInto(out *ProxmoxMachineSpec) { *out = new(string) **out = **in } + if in.Node != nil { + in, out := &in.Node, &out.Node + *out = new(string) + **out = **in + } if in.VMID != nil { in, out := &in.VMID, &out.VMID *out = new(int) **out = **in } + if in.Pool != nil { + in, out := &in.Pool, &out.Pool + *out = new(string) + **out = **in + } in.Image.DeepCopyInto(&out.Image) in.CloudInit.DeepCopyInto(&out.CloudInit) out.Hardware = in.Hardware - out.Network = in.Network + in.Network.DeepCopyInto(&out.Network) in.Options.DeepCopyInto(&out.Options) if in.FailureDomain != nil { in, out := &in.FailureDomain, &out.FailureDomain @@ -491,34 +537,64 @@ func (in *ProxmoxMachineTemplateList) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ProxmoxMachineTemplateResource) DeepCopyInto(out *ProxmoxMachineTemplateResource) { +func (in *ProxmoxMachineTemplateSpec) DeepCopyInto(out *ProxmoxMachineTemplateSpec) { + *out = *in + in.Template.DeepCopyInto(&out.Template) + if in.VMIDs != nil { + in, out := &in.VMIDs, &out.VMIDs + *out = new(ProxmoxMachineTemplateVmIdRange) + **out = **in + } + if in.Nodes != nil { + in, out := &in.Nodes, &out.Nodes + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxmoxMachineTemplateSpec. +func (in *ProxmoxMachineTemplateSpec) DeepCopy() *ProxmoxMachineTemplateSpec { + if in == nil { + return nil + } + out := new(ProxmoxMachineTemplateSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ProxmoxMachineTemplateSpecTemplate) DeepCopyInto(out *ProxmoxMachineTemplateSpecTemplate) { *out = *in in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxmoxMachineTemplateResource. -func (in *ProxmoxMachineTemplateResource) DeepCopy() *ProxmoxMachineTemplateResource { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxmoxMachineTemplateSpecTemplate. +func (in *ProxmoxMachineTemplateSpecTemplate) DeepCopy() *ProxmoxMachineTemplateSpecTemplate { if in == nil { return nil } - out := new(ProxmoxMachineTemplateResource) + out := new(ProxmoxMachineTemplateSpecTemplate) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ProxmoxMachineTemplateSpec) DeepCopyInto(out *ProxmoxMachineTemplateSpec) { +func (in *ProxmoxMachineTemplateSpecTemplateSpec) DeepCopyInto(out *ProxmoxMachineTemplateSpecTemplateSpec) { *out = *in - in.Template.DeepCopyInto(&out.Template) + in.Image.DeepCopyInto(&out.Image) + in.CloudInit.DeepCopyInto(&out.CloudInit) + out.Hardware = in.Hardware + in.Network.DeepCopyInto(&out.Network) + in.Options.DeepCopyInto(&out.Options) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxmoxMachineTemplateSpec. -func (in *ProxmoxMachineTemplateSpec) DeepCopy() *ProxmoxMachineTemplateSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxmoxMachineTemplateSpecTemplateSpec. +func (in *ProxmoxMachineTemplateSpecTemplateSpec) DeepCopy() *ProxmoxMachineTemplateSpecTemplateSpec { if in == nil { return nil } - out := new(ProxmoxMachineTemplateSpec) + out := new(ProxmoxMachineTemplateSpecTemplateSpec) in.DeepCopyInto(out) return out } @@ -538,6 +614,21 @@ func (in *ProxmoxMachineTemplateStatus) DeepCopy() *ProxmoxMachineTemplateStatus return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ProxmoxMachineTemplateVmIdRange) DeepCopyInto(out *ProxmoxMachineTemplateVmIdRange) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxmoxMachineTemplateVmIdRange. +func (in *ProxmoxMachineTemplateVmIdRange) DeepCopy() *ProxmoxMachineTemplateVmIdRange { + if in == nil { + return nil + } + out := new(ProxmoxMachineTemplateVmIdRange) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SSH) DeepCopyInto(out *SSH) { *out = *in diff --git a/cloud/interfaces.go b/cloud/interfaces.go index 853b703..65009b9 100644 --- a/cloud/interfaces.go +++ b/cloud/interfaces.go @@ -2,6 +2,7 @@ package cloud import ( "context" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/sp-yduck/proxmox-go/api" "github.com/sp-yduck/proxmox-go/proxmox" @@ -33,7 +34,7 @@ type ClusterGetter interface { // NetworkName() string // Network() *infrav1.Network // AdditionalLabels() infrav1.Labels - // FailureDomains() clusterv1.FailureDomains + FailureDomains() clusterv1.FailureDomains ControlPlaneEndpoint() clusterv1.APIEndpoint Storage() infrav1.Storage } @@ -46,24 +47,30 @@ type ClusterSettter interface { // MachineGetter is an interface which can get machine information. type MachineGetter interface { Client + GetProxmoxMachine() *infrav1.ProxmoxMachine Name() string Namespace() string // Zone() string // Role() string // IsControlPlane() bool // ControlPlaneGroupName() string - NodeName() string + NodeName() *string + GetPool() *string GetBiosUUID() *string GetImage() infrav1.Image GetProviderID() string GetBootstrapData() (string, error) GetInstanceStatus() *infrav1.InstanceStatus - GetStorage() infrav1.Storage + GetClusterStorage() infrav1.Storage GetCloudInit() infrav1.CloudInit GetNetwork() infrav1.Network GetHardware() infrav1.Hardware + GetBootDiskStorage() string GetVMID() *int GetOptions() infrav1.Options + + GetProxmoxMachineTemplate(context.Context) *infrav1.ProxmoxMachineTemplate + GetProxmoxCluster() *infrav1.ProxmoxCluster } // MachineSetter is an interface which can set machine information. @@ -72,11 +79,13 @@ type MachineSetter interface { SetInstanceStatus(v infrav1.InstanceStatus) SetNodeName(name string) SetVMID(vmid int) + SetPool(name string) SetConfigStatus(config api.VirtualMachineConfig) // SetFailureMessage(v error) // SetFailureReason(v capierrors.MachineStatusError) // SetAnnotation(key, value string) // SetAddresses(addressList []corev1.NodeAddress) + SetFailureDomain(failureDomain string) PatchObject() error } diff --git a/cloud/scope/cluster.go b/cloud/scope/cluster.go index f034c06..c049994 100644 --- a/cloud/scope/cluster.go +++ b/cloud/scope/cluster.go @@ -18,6 +18,8 @@ package scope import ( "context" + "fmt" + "slices" "github.com/pkg/errors" "github.com/sp-yduck/proxmox-go/proxmox" @@ -96,6 +98,10 @@ func (s *ClusterScope) Storage() infrav1.Storage { return s.ProxmoxCluster.Spec.Storage } +func (s *ClusterScope) FailureDomains() clusterv1.FailureDomains { + return s.ProxmoxCluster.Status.FailureDomains +} + func (s *ClusterScope) CloudClient() *proxmox.Service { return s.ProxmoxServices.Compute } @@ -124,3 +130,33 @@ func (s *ClusterScope) SetStorage(storage infrav1.Storage) { func (s *ClusterScope) PatchObject() error { return s.patchHelper.Patch(context.TODO(), s.ProxmoxCluster) } + +func (s *ClusterScope) SetFailureDomains(ctx context.Context) error { + if s.ProxmoxCluster.Spec.FailureDomainConfig == nil { + return nil + } + + config := *s.ProxmoxCluster.Spec.FailureDomainConfig + + if config.NodeAsFailureDomain { + nodes, err := s.Compute.Nodes(ctx) + if err != nil { + return fmt.Errorf("could not query nodes for failure domains: %v", err) + } + + nodesConfigured := len(s.ProxmoxCluster.Spec.Nodes) > 0 + domain := make(clusterv1.FailureDomains, len(nodes)) + for _, node := range nodes { + if nodesConfigured && !slices.Contains(s.ProxmoxCluster.Spec.Nodes, node.Node) { + continue + } + domain[node.Node] = clusterv1.FailureDomainSpec{ControlPlane: true} + } + + s.ProxmoxCluster.Status.FailureDomains = domain + return nil + } + + // TODO: some other strategy based on Proxmox HA groups + return nil +} diff --git a/cloud/scope/machine.go b/cloud/scope/machine.go index 0ae5578..053b6eb 100644 --- a/cloud/scope/machine.go +++ b/cloud/scope/machine.go @@ -30,6 +30,7 @@ import ( capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" infrav1 "github.com/sp-yduck/cluster-api-provider-proxmox/api/v1beta1" "github.com/sp-yduck/cluster-api-provider-proxmox/cloud/providerid" @@ -79,6 +80,14 @@ type MachineScope struct { ClusterGetter *ClusterScope } +func (m *MachineScope) GetProxmoxCluster() *infrav1.ProxmoxCluster { + return m.ClusterGetter.ProxmoxCluster +} + +func (m *MachineScope) GetProxmoxMachine() *infrav1.ProxmoxMachine { + return m.ProxmoxMachine +} + func (m *MachineScope) CloudClient() *proxmox.Service { return m.ClusterGetter.CloudClient() } @@ -87,7 +96,7 @@ func (m *MachineScope) K8sClient() *client.Client { return &m.client } -func (m *MachineScope) GetStorage() infrav1.Storage { +func (m *MachineScope) GetClusterStorage() infrav1.Storage { return m.ClusterGetter.ProxmoxCluster.Spec.Storage } @@ -99,12 +108,12 @@ func (m *MachineScope) Namespace() string { return m.ProxmoxMachine.Namespace } -func (m *MachineScope) NodeName() string { +func (m *MachineScope) NodeName() *string { return m.ProxmoxMachine.Spec.Node } func (m *MachineScope) SetNodeName(name string) { - m.ProxmoxMachine.Spec.Node = name + m.ProxmoxMachine.Spec.Node = &name } // func (m *MachineScope) Client() Compute { @@ -112,6 +121,11 @@ func (m *MachineScope) SetNodeName(name string) { // } func (m *MachineScope) GetBootstrapData() (string, error) { + + if !m.Machine.Status.BootstrapReady { + return "", errors.New("Bootstrap not yet ready") + } + if m.Machine.Spec.Bootstrap.DataSecretName == nil { return "", errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") } @@ -171,6 +185,13 @@ func (m *MachineScope) GetCloudInit() infrav1.CloudInit { } func (m *MachineScope) GetNetwork() infrav1.Network { + if m.ProxmoxMachine.Spec.Network.Bridge == "" { + m.ProxmoxMachine.Spec.Network.Bridge = "vmbr0" + } + if m.ProxmoxMachine.Spec.Network.Model == "" { + m.ProxmoxMachine.Spec.Network.Model = "virtio" + } + return m.ProxmoxMachine.Spec.Network } @@ -188,10 +209,43 @@ func (m *MachineScope) GetHardware() infrav1.Hardware { return m.ProxmoxMachine.Spec.Hardware } +func (m *MachineScope) GetBootDiskStorage() string { + if m.ProxmoxMachine.Spec.Hardware.StorageName != "" { + return m.ProxmoxMachine.Spec.Hardware.StorageName + } else { + return m.ClusterGetter.ProxmoxCluster.Spec.Storage.Name + } +} + func (m *MachineScope) GetOptions() infrav1.Options { return m.ProxmoxMachine.Spec.Options } +func (m *MachineScope) GetPool() *string { + return m.ProxmoxMachine.Spec.Pool +} + +func (m *MachineScope) GetProxmoxMachineTemplate(ctx context.Context) *infrav1.ProxmoxMachineTemplate { + log := log.FromContext(ctx) + + templateName := m.ProxmoxMachine.Annotations[clusterv1.TemplateClonedFromNameAnnotation] + templateKind := m.ProxmoxMachine.Annotations[clusterv1.TemplateClonedFromGroupKindAnnotation] + if templateKind != "ProxmoxMachineTemplate.infrastructure.cluster.x-k8s.io" { + return nil + } + + template := &infrav1.ProxmoxMachineTemplate{} + + objKey := types.NamespacedName{Namespace: m.Namespace(), Name: templateName} + + if err := m.client.Get(ctx, objKey, template); err != nil { + log.Info("ProxmoxCluster is not available yet") + return nil + } + + return template +} + // SetProviderID sets the ProxmoxMachine providerID in spec. func (m *MachineScope) SetProviderID(uuid string) error { providerid, err := providerid.New(uuid) @@ -206,6 +260,10 @@ func (m *MachineScope) SetVMID(vmid int) { m.ProxmoxMachine.Spec.VMID = &vmid } +func (m *MachineScope) SetPool(pool string) { + m.ProxmoxMachine.Spec.Pool = &pool +} + func (m *MachineScope) SetConfigStatus(config api.VirtualMachineConfig) { m.ProxmoxMachine.Status.Config = config } @@ -230,3 +288,7 @@ func (m *MachineScope) SetFailureReason(v capierrors.MachineStatusError) { func (s *MachineScope) PatchObject() error { return s.patchHelper.Patch(context.TODO(), s.ProxmoxMachine) } + +func (m *MachineScope) SetFailureDomain(failureDomain string) { + m.ProxmoxMachine.Spec.FailureDomain = &failureDomain +} diff --git a/cloud/services/compute/instance/cloudinit.go b/cloud/services/compute/instance/cloudinit.go index 8d3557a..d920ee7 100644 --- a/cloud/services/compute/instance/cloudinit.go +++ b/cloud/services/compute/instance/cloudinit.go @@ -3,7 +3,6 @@ package instance import ( "context" "fmt" - "github.com/pkg/errors" "sigs.k8s.io/controller-runtime/pkg/log" @@ -30,11 +29,11 @@ func (s *Service) reconcileCloudInit(ctx context.Context) error { // delete CloudConfig func (s *Service) deleteCloudConfig(ctx context.Context) error { - storageName := s.scope.GetStorage().Name + storageName := s.scope.GetClusterStorage().Name path := userSnippetPath(s.scope.Name()) volumeID := fmt.Sprintf("%s:%s", storageName, path) - node, err := s.client.Node(ctx, s.scope.NodeName()) + node, err := s.client.Node(ctx, *s.scope.NodeName()) if err != nil { return err } @@ -52,6 +51,7 @@ func (s *Service) reconcileCloudInitUser(ctx context.Context) error { log := log.FromContext(ctx) // cloud init from bootstrap provider + // check bootstrapReady: true bootstrap, err := s.scope.GetBootstrapData() if err != nil { log.Error(err, "Error getting bootstrap data for machine") @@ -74,12 +74,12 @@ func (s *Service) reconcileCloudInitUser(ctx context.Context) error { } // to do: should be set via API - vnc, err := s.vncClient(s.scope.NodeName()) + vnc, err := s.vncClient(*s.scope.NodeName()) if err != nil { return err } defer vnc.Close() - filePath := fmt.Sprintf("%s/%s", s.scope.GetStorage().Path, userSnippetPath(vmName)) + filePath := fmt.Sprintf("%s/%s", s.scope.GetClusterStorage().Path, userSnippetPath(vmName)) if err := vnc.WriteFile(context.TODO(), configYaml, filePath); err != nil { return errors.Errorf("failed to write file error : %v", err) } diff --git a/cloud/services/compute/instance/image.go b/cloud/services/compute/instance/image.go index 5316e36..f5734d5 100644 --- a/cloud/services/compute/instance/image.go +++ b/cloud/services/compute/instance/image.go @@ -3,6 +3,7 @@ package instance import ( "context" "fmt" + "github.com/sp-yduck/cluster-api-provider-proxmox/cloud" "path" "strings" @@ -22,61 +23,72 @@ func (s *Service) reconcileBootDevice(ctx context.Context, vm *proxmox.VirtualMa log := log.FromContext(ctx) log.Info("reconcile boot device") - // os image - if err := s.setCloudImage(ctx); err != nil { - return err - } - // volume - if err := vm.ResizeVolume(ctx, bootDvice, s.scope.GetHardware().Disk); err != nil { + if err := vm.ResizeVolume(ctx, bootDevice, s.scope.GetHardware().Disk); err != nil { return err } return nil } -// setCloudImage downloads OS image into Proxmox node -// and then sets it to specified storage -func (s *Service) setCloudImage(ctx context.Context) error { +// importCloudImage downloads OS image into Proxmox node, converts it to qcow2 format and returns proxmox "import-from" +// compatible string +func (s *Service) importCloudImage(ctx context.Context) (string, error) { log := log.FromContext(ctx) - log.Info("setting cloud image") + log.Info("importing cloud image") image := s.scope.GetImage() - rawImageFilePath := rawImageFilePath(image) + clusterStorageImagesBasePath := clusterStorageImagesBasePath(s.scope) + sourceImageFilePath := sourceImagePath(s.scope) + gcow2ImageFilePath := qcow2ImagePath(sourceImageFilePath) // workaround // API does not support something equivalent of "qm importdisk" - vnc, err := s.vncClient(s.scope.NodeName()) + vnc, err := s.vncClient(*s.scope.NodeName()) if err != nil { - return errors.Errorf("failed to create vnc client: %v", err) + return "", errors.Errorf("failed to create vnc client: %v", err) } defer vnc.Close() - // download image - ok, _ := isChecksumOK(vnc, image, rawImageFilePath) - if !ok { // if checksum is ok, it means the image is already there. skip installing - out, _, err := vnc.Exec(ctx, fmt.Sprintf("mkdir -p %s && mkdir -p %s", etcCAPPX, rawImageDirPath)) - if err != nil { - return errors.Errorf("failed to create dir %s: %s : %v", rawImageDirPath, out, err) + gcow2Present, _ := isFilePresent(vnc, gcow2ImageFilePath) + if !gcow2Present { + + sourceImagePresent, _ := isFilePresent(vnc, sourceImageFilePath) + if !sourceImagePresent { + out, _, err := vnc.Exec(ctx, fmt.Sprintf("mkdir -p %s", clusterStorageImagesBasePath)) + if err != nil { + return "", errors.Errorf("failed to create dir %s: %s : %v", clusterStorageImagesBasePath, out, err) + } + log.Info("downloading node image. this will take few mins.") + out, _, err = vnc.Exec(ctx, fmt.Sprintf("wget '%s' -O '%s'", image.URL, sourceImageFilePath)) + if err != nil { + return "", errors.Errorf("failed to download image: %s : %v", out, err) + } + log.Info("node image downloaded") } - log.Info("downloading node image. this will take few mins.") - out, _, err = vnc.Exec(ctx, fmt.Sprintf("wget %s -O %s", image.URL, rawImageFilePath)) + + if _, err = isChecksumOK(vnc, image, sourceImageFilePath); err != nil { + if _, err = deleteFile(vnc, sourceImageFilePath); err != nil { + return "", errors.Errorf("failed to delete source image after checksum failed: %v", err) + } else { + return "", errors.Errorf("failed to confirm checksum: %v", err) + } + } + log.Info("node image downloaded") + + out, _, err := vnc.Exec(context.TODO(), fmt.Sprintf("/usr/bin/qemu-img convert -O qcow2 '%s' '%s'", sourceImageFilePath, gcow2ImageFilePath)) if err != nil { - return errors.Errorf("failed to download image: %s : %v", out, err) + return "", errors.Errorf("failed to convert image : %s : %v", out, err) } - if _, err = isChecksumOK(vnc, image, rawImageFilePath); err != nil { - return errors.Errorf("failed to confirm checksum: %v", err) + log.Info("converted node image now available") + + if _, err = deleteFile(vnc, sourceImageFilePath); err != nil { + log.Info("failed to delete source image after conversion, ignoring error") } } - // convert downloaded image to raw format and set it to storage - vmid := s.scope.GetVMID() - destPath := fmt.Sprintf("%s/images/%d/vm-%d-disk-0.raw", s.scope.GetStorage().Path, *vmid, *vmid) - out, _, err := vnc.Exec(context.TODO(), fmt.Sprintf("/usr/bin/qemu-img convert -O raw %s %s", rawImageFilePath, destPath)) - if err != nil { - return errors.Errorf("failed to convert iamge : %s : %v", out, err) - } - return nil + // convert absolute path /images/0/ to :0/ + return strings.Replace(gcow2ImageFilePath, clusterStorageImagesBasePath, s.scope.GetClusterStorage().Name+":0", 1), nil } func findValidChecksumCommand(csType string) (string, error) { @@ -91,6 +103,27 @@ func findValidChecksumCommand(csType string) (string, error) { } } +func isFilePresent(client *proxmox.VNCWebSocketClient, path string) (bool, error) { + + cmd := fmt.Sprintf("test -f '%s'", path) + out, _, err := client.Exec(context.TODO(), cmd) + if err != nil { + return false, errors.Errorf("failed to find file: %s : %v", out, err) + } + + return true, nil +} + +func deleteFile(client *proxmox.VNCWebSocketClient, filePath string) (bool, error) { + cmd := fmt.Sprintf("rm '%s'", filePath) + out, _, err := client.Exec(context.TODO(), cmd) + if err != nil { + return false, errors.Errorf("failed to delete file: %s : %v", out, err) + } + + return true, nil +} + func isChecksumOK(client *proxmox.VNCWebSocketClient, image infrav1.Image, path string) (bool, error) { if image.Checksum != "" { cscmd, err := findValidChecksumCommand(*image.ChecksumType) @@ -107,10 +140,23 @@ func isChecksumOK(client *proxmox.VNCWebSocketClient, image infrav1.Image, path return false, nil } -func rawImageFilePath(image infrav1.Image) string { +func clusterStorageImagesBasePath(scope cloud.MachineGetter) string { + // we are using 0 as vm id to workaround limitation of import-from that expects vm id disks + return fmt.Sprintf("%s/images/0", scope.GetClusterStorage().Path) +} + +func sourceImagePath(scope cloud.MachineGetter) string { + image := scope.GetImage() + fileName := path.Base(image.URL) if image.Checksum != "" { fileName = image.Checksum + "." + fileName } - return fmt.Sprintf("%s/%s", rawImageDirPath, fileName) + + return fmt.Sprintf("%s/%s", clusterStorageImagesBasePath(scope), fileName) +} + +func qcow2ImagePath(sourceImageFilePath string) string { + ext := path.Base(path.Ext(sourceImageFilePath)) + return strings.Replace(sourceImageFilePath, ext, ".qcow2", 1) } diff --git a/cloud/services/compute/instance/network.go b/cloud/services/compute/instance/network.go new file mode 100644 index 0000000..dde5922 --- /dev/null +++ b/cloud/services/compute/instance/network.go @@ -0,0 +1,242 @@ +/* +Copyright 2023 Simplysoft GmbH. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package instance + +import ( + "context" + "fmt" + "github.com/pkg/errors" + infrav1 "github.com/sp-yduck/cluster-api-provider-proxmox/api/v1beta1" + "github.com/sp-yduck/cluster-api-provider-proxmox/cloud/services" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/pointer" + caipamv1 "sigs.k8s.io/cluster-api/exp/ipam/api/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" + "strings" + "time" +) + +// generates the primary network config. If IP pools are configured, ensures to claim an address from the pool, +// otherwise falls back to configured ip addresses on the IPConfig or DHCP if none are provided +func (s *Service) generateIpConfig0(ctx context.Context) (string, error) { + template := s.scope.GetProxmoxMachineTemplate(ctx) + machine := *s.scope.GetProxmoxMachine() + k8sClient := *s.scope.K8sClient() + + requeue := false + var ipv4Addresses *addressFromPool + var ipv6Addresses *addressFromPool + if template.Spec.Template.Spec.Network.IPConfig.IPv4FromPoolRef != nil && template.Spec.Template.Spec.Network.IPConfig.IPv4FromPoolRef.Name != "" { + ref := getRef(template.Spec.Template.Spec.Network.IPConfig.IPv4FromPoolRef) + + rc, err := ensureIPClaim(ctx, k8sClient, machine, *ref) + if err != nil { + return "", err + } + var itemRequeue bool + addr, itemRequeue, err := addressFromClaim(ctx, k8sClient, machine, *ref, rc.claim) + requeue = requeue || itemRequeue + if err != nil { + return "", err + } else { + ipv4Addresses = &addr + } + } + + if template.Spec.Template.Spec.Network.IPConfig.IPv6FromPoolRef != nil && template.Spec.Template.Spec.Network.IPConfig.IPv6FromPoolRef.Name != "" { + ref := getRef(template.Spec.Template.Spec.Network.IPConfig.IPv6FromPoolRef) + + rc, err := ensureIPClaim(ctx, k8sClient, machine, *ref) + if err != nil { + return "", err + } + var itemRequeue bool + addr, itemRequeue, err := addressFromClaim(ctx, k8sClient, machine, *ref, rc.claim) + requeue = requeue || itemRequeue + if err != nil { + return "", err + } else { + ipv6Addresses = &addr + } + } + + if requeue { + return "", services.WithTransientError(fmt.Errorf("not all ip addresses available"), time.Second*5) + } + + var configs []string + if ipv4Addresses != nil { + configs = append(configs, fmt.Sprintf("ip=%s/%d", ipv4Addresses.Address, ipv4Addresses.Prefix)) + if ipv4Addresses.Gateway != "" { + configs = append(configs, fmt.Sprintf("gw=%s", ipv4Addresses.Gateway)) + } + } + if ipv6Addresses != nil { + configs = append(configs, fmt.Sprintf("ip6=%s/%d", ipv6Addresses.Address, ipv6Addresses.Prefix)) + if ipv6Addresses.Gateway != "" { + configs = append(configs, fmt.Sprintf("gw6=%s", ipv6Addresses.Gateway)) + } + } + + if len(configs) > 0 { + return strings.Join(configs, ","), nil + } else { + return machine.Spec.Network.IPConfig.String(), nil + } +} + +func getRef(ref *corev1.TypedLocalObjectReference) *corev1.TypedLocalObjectReference { + if ref.APIGroup == nil || *ref.APIGroup == "" { + ref.APIGroup = pointer.String("ipam.cluster.x-k8s.io") + } + + return ref +} + +type addressFromPool struct { + Address string + Prefix int + Gateway string + //dnsServers []string +} + +type reconciledClaim struct { + claim *caipamv1.IPAddressClaim + fetchAgain bool +} + +func ensureIPClaim(ctx context.Context, client client.Client, m infrav1.ProxmoxMachine, poolRef corev1.TypedLocalObjectReference) (reconciledClaim, error) { + claim := &caipamv1.IPAddressClaim{} + nn := types.NamespacedName{ + Namespace: m.Namespace, + Name: m.Name + "-" + poolRef.Name, + } + + if err := client.Get(ctx, nn, claim); err != nil { + if !apierrors.IsNotFound(err) { + return reconciledClaim{claim: claim}, err + } + } + if claim.Name != "" { + return reconciledClaim{claim: claim}, nil + } + + // No claim exists, we create a new one + claim = &caipamv1.IPAddressClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: addressClaimName(m, poolRef), + Namespace: m.Namespace, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: m.APIVersion, + Kind: m.Kind, + Name: m.Name, + UID: m.UID, + Controller: pointer.BoolPtr(true), + }, + }, + Labels: m.Labels, + /*Finalizers: []string{ + infrav1.MachineFinalizerIPAddressClaim, + },*/ + }, + Spec: caipamv1.IPAddressClaimSpec{ + PoolRef: poolRef, + }, + } + + err := client.Create(ctx, claim) + // if the claim already exists we can try to fetch it again + if err == nil || apierrors.IsAlreadyExists(err) { + return reconciledClaim{claim: claim, fetchAgain: true}, nil + } + return reconciledClaim{claim: claim}, err +} + +func releaseAddressFromPool(ctx context.Context, client client.Client, m infrav1.ProxmoxMachine, poolRef corev1.TypedLocalObjectReference) error { + claim := &caipamv1.IPAddressClaim{} + nn := types.NamespacedName{ + Namespace: m.Namespace, + Name: addressClaimName(m, poolRef), + } + if err := client.Get(ctx, nn, claim); err != nil { + if !apierrors.IsNotFound(err) { + return err + } + return nil + } + + if controllerutil.RemoveFinalizer(claim, infrav1.MachineFinalizerIPAddressClaim) { + if err := client.Update(ctx, claim); err != nil { + return err + } + } + + err := client.Delete(ctx, claim) + if apierrors.IsNotFound(err) { + return nil + } + return err +} + +func addressClaimName(m infrav1.ProxmoxMachine, poolRef corev1.TypedLocalObjectReference) string { + return m.Name + "-" + poolRef.Name +} + +// addressFromClaim retrieves the IPAddress for a CAPI IPAddressClaim. +func addressFromClaim(ctx context.Context, client client.Client, m infrav1.ProxmoxMachine, _ corev1.TypedLocalObjectReference, claim *caipamv1.IPAddressClaim) (addressFromPool, bool, error) { + log := log.FromContext(ctx) + + if claim == nil { + return addressFromPool{}, true, errors.New("no claim provided") + } + if !claim.DeletionTimestamp.IsZero() { + // This IPClaim is about to be deleted, so we cannot use it. Requeue. + log.Info("Found IPClaim with deletion timestamp, requeuing.", "IPClaim", claim) + return addressFromPool{}, true, nil + } + + if claim.Status.AddressRef.Name == "" { + return addressFromPool{}, true, nil + } + + address := &caipamv1.IPAddress{} + addressNamespacedName := types.NamespacedName{ + Name: claim.Status.AddressRef.Name, + Namespace: m.Namespace, + } + + if err := client.Get(ctx, addressNamespacedName, address); err != nil { + if apierrors.IsNotFound(err) { + return addressFromPool{}, true, nil + } + return addressFromPool{}, false, err + } + + a := addressFromPool{ + Address: address.Spec.Address, + Prefix: address.Spec.Prefix, + Gateway: address.Spec.Gateway, + } + log.Info("allocating", "addr", a) + return a, false, nil +} diff --git a/cloud/services/compute/instance/qemu.go b/cloud/services/compute/instance/qemu.go index 2f790d9..4a28135 100644 --- a/cloud/services/compute/instance/qemu.go +++ b/cloud/services/compute/instance/qemu.go @@ -3,6 +3,8 @@ package instance import ( "context" "fmt" + infrav1 "github.com/sp-yduck/cluster-api-provider-proxmox/api/v1beta1" + "math" "math/rand" "time" @@ -14,7 +16,7 @@ import ( ) const ( - bootDvice = "scsi0" + bootDevice = "scsi0" ) func (s *Service) reconcileQEMU(ctx context.Context) (*proxmox.VirtualMachine, error) { @@ -28,7 +30,7 @@ func (s *Service) reconcileQEMU(ctx context.Context) (*proxmox.VirtualMachine, e return qemu, nil } if !rest.IsNotFound(err) { - log.Error(err, fmt.Sprintf("failed to get qemu: node=%s,vmid=%d", nodeName, *vmid)) + log.Error(err, fmt.Sprintf("failed to get qemu: node=%s,vmid=%d", *nodeName, *vmid)) return nil, err } @@ -44,44 +46,127 @@ func (s *Service) getQEMU(ctx context.Context, vmid *int) (*proxmox.VirtualMachi return nil, rest.NotFoundErr } -func (s *Service) createQEMU(ctx context.Context, nodeName string, vmid *int) (*proxmox.VirtualMachine, error) { +func (s *Service) createQEMU(ctx context.Context, nodeName *string, vmid *int) (*proxmox.VirtualMachine, error) { log := log.FromContext(ctx) + template := s.scope.GetProxmoxMachineTemplate(ctx) + cluster := s.scope.GetProxmoxCluster() + + if cluster.Spec.ResourcePool != "" { + s.scope.SetPool(cluster.Spec.ResourcePool) + if err := s.scope.PatchObject(); err != nil { + return nil, err + } + } // get node - if nodeName == "" { - // temp solution - node, err := s.getRandomNode(ctx) - if err != nil { - log.Error(err, "failed to get random node") + if nodeName == nil { + if template.Spec.Nodes != nil && len(template.Spec.Nodes) > 0 { + log.Info("selecting random node from configured nodes in template") + nodeName = &template.Spec.Nodes[rand.Intn(len(template.Spec.Nodes))] + } else if cluster.Spec.Nodes != nil && len(cluster.Spec.Nodes) > 0 { + log.Info("selecting random node from configured nodes in cluster") + nodeName = &cluster.Spec.Nodes[rand.Intn(len(cluster.Spec.Nodes))] + } else { + log.Info("selecting random node") + node, err := s.getRandomNode(ctx) + if err != nil { + log.Error(err, "failed to get random node") + return nil, err + } + nodeName = &node.Node + } + s.scope.SetNodeName(*nodeName) + if s.scope.GetProxmoxMachine().Spec.FailureDomain == nil && s.scope.GetProxmoxCluster().Spec.FailureDomainConfig != nil && s.scope.GetProxmoxCluster().Spec.FailureDomainConfig.NodeAsFailureDomain { + s.scope.SetFailureDomain(*nodeName) + } + if err := s.scope.PatchObject(); err != nil { return nil, err } - nodeName = node.Node - s.scope.SetNodeName(nodeName) } // if vmid is empty, generate new vmid if vmid == nil { - nextid, err := s.getNextID(ctx) - if err != nil { - log.Error(err, "failed to get available vmid") - return nil, err + if template != nil && template.Spec.VMIDs != nil { + nextid, err := s.getNextVmIdInConfiguredRange(ctx, template) + if err != nil { + return nil, err + } + vmid = &nextid + } else { + log.Info("using next id from proxmox cluster as VM ID") + nextid, err := s.getNextID(ctx) + if err != nil { + log.Error(err, "failed to get available vmid") + return nil, err + } + vmid = &nextid } - vmid = &nextid s.scope.SetVMID(*vmid) if err := s.scope.PatchObject(); err != nil { return nil, err } + log.Info(fmt.Sprintf("new vm id %d", *vmid)) + } + + // os image + image, err := s.importCloudImage(ctx) + if err != nil { + return nil, err + } + + config, err := s.generateIpConfig0(ctx) + if err != nil { + return nil, err } - vmoption := s.generateVMOptions() - vm, err := s.client.CreateVirtualMachine(ctx, nodeName, *vmid, vmoption) + vmoption := s.generateVMOptions(image, config) + vm, err := s.client.CreateVirtualMachine(ctx, *nodeName, *vmid, vmoption) if err != nil { - log.Error(err, fmt.Sprintf("failed to create qemu instance %s", vm.VM.Name)) + log.Error(err, fmt.Sprintf("failed to create qemu instance %d", &vmid)) return nil, err } return vm, nil } +func (s *Service) getNextVmIdInConfiguredRange(ctx context.Context, template *infrav1.ProxmoxMachineTemplate) (int, error) { + log := log.FromContext(ctx) + log.Info(fmt.Sprintf("generating VM ID based on range in configured range %d-%d", template.Spec.VMIDs.Start, template.Spec.VMIDs.End)) + + vms, err := s.client.VirtualMachines(ctx) + if err != nil { + log.Error(err, "failed to get virtual machines") + return 0, err + } + + usedVmIds := map[int]bool{} + for _, vm := range vms { + usedVmIds[vm.VMID] = true + } + + nextid := template.Spec.VMIDs.Start + + var maxId int + if template.Spec.VMIDs.End > 0 { + maxId = template.Spec.VMIDs.End + } else { + maxId = math.MaxInt32 - 1 + } + + for nextid <= maxId { + _, isUsed := usedVmIds[nextid] + if !isUsed { + break + } + nextid += 1 + } + + if nextid > maxId { + log.Error(err, "no available VM ID found") + return 0, err + } + return nextid, nil +} + func (s *Service) getNextID(ctx context.Context) (int, error) { return s.client.RESTClient().GetNextID(ctx) } @@ -104,27 +189,41 @@ func (s *Service) getRandomNode(ctx context.Context) (*api.Node, error) { return nodes[r.Intn(len(nodes))], nil } -func (s *Service) generateVMOptions() api.VirtualMachineCreateOptions { +func (s *Service) generateVMOptions(importFromImage string, ipconfig0 string) api.VirtualMachineCreateOptions { vmName := s.scope.Name() - storageName := s.scope.GetStorage().Name + pool := s.scope.GetPool() + clusterStorageName := s.scope.GetClusterStorage().Name network := s.scope.GetNetwork() hardware := s.scope.GetHardware() options := s.scope.GetOptions() + if ipconfig0 == "" { + ipconfig0 = network.IPConfig.String() + } + + net0 := fmt.Sprintf("model=%s,bridge=%s,firewall=1", network.Model, network.Bridge) + if network.Tag > 0 { + net0 += fmt.Sprintf(",tag=%d", network.Tag) + } + + cloudinitDiskConfig := fmt.Sprintf("file=%s:cloudinit,media=cdrom", s.scope.GetBootDiskStorage()) + bootDiskConfig := fmt.Sprintf("%s:0,import-from=%s", s.scope.GetBootDiskStorage(), importFromImage) + vmoptions := api.VirtualMachineCreateOptions{ ACPI: boolToInt8(options.ACPI), Agent: "enabled=1", Arch: api.Arch(options.Arch), + AutoStart: boolToInt8(true), Balloon: options.Balloon, BIOS: string(hardware.BIOS), - Boot: fmt.Sprintf("order=%s", bootDvice), - CiCustom: fmt.Sprintf("user=%s:%s", storageName, userSnippetPath(vmName)), + Boot: fmt.Sprintf("order=%s", bootDevice), + CiCustom: fmt.Sprintf("user=%s:%s", clusterStorageName, userSnippetPath(vmName)), Cores: hardware.CPU, CpuLimit: hardware.CPULimit, Description: options.Description, HugePages: options.HugePages.String(), - Ide: api.Ide{Ide2: fmt.Sprintf("file=%s:cloudinit,media=cdrom", storageName)}, - IPConfig: api.IPConfig{IPConfig0: network.IPConfig.String()}, + Ide: api.Ide{Ide2: cloudinitDiskConfig}, + IPConfig: api.IPConfig{IPConfig0: ipconfig0}, KeepHugePages: boolToInt8(options.KeepHugePages), KVM: boolToInt8(options.KVM), LocalTime: boolToInt8(options.LocalTime), @@ -132,13 +231,13 @@ func (s *Service) generateVMOptions() api.VirtualMachineCreateOptions { Memory: hardware.Memory, Name: vmName, NameServer: network.NameServer, - Net: api.Net{Net0: "model=virtio,bridge=vmbr0,firewall=1"}, + Net: api.Net{Net0: net0}, Numa: boolToInt8(options.NUMA), OnBoot: boolToInt8(options.OnBoot), OSType: api.OSType(options.OSType), Protection: boolToInt8(options.Protection), Reboot: int(boolToInt8(options.Reboot)), - Scsi: api.Scsi{Scsi0: fmt.Sprintf("file=%s:8", storageName)}, + Scsi: api.Scsi{Scsi0: bootDiskConfig}, ScsiHw: api.VirtioScsiPci, SearchDomain: network.SearchDomain, Serial: api.Serial{Serial0: "socket"}, @@ -152,6 +251,9 @@ func (s *Service) generateVMOptions() api.VirtualMachineCreateOptions { VMGenID: options.VMGenerationID, VGA: "serial0", } + if pool != nil { + vmoptions.Pool = *pool + } return vmoptions } diff --git a/cloud/services/compute/storage/reconcile.go b/cloud/services/compute/storage/reconcile.go index cd84472..eb84f2f 100644 --- a/cloud/services/compute/storage/reconcile.go +++ b/cloud/services/compute/storage/reconcile.go @@ -4,6 +4,8 @@ import ( "context" "errors" "fmt" + "k8s.io/utils/pointer" + "strings" "github.com/sp-yduck/proxmox-go/api" "github.com/sp-yduck/proxmox-go/proxmox" @@ -41,7 +43,7 @@ func (s *Service) createOrGetStorage(ctx context.Context) error { opts := generateVMStorageOptions(s.scope) if err := s.getStorage(ctx, opts.Storage); err != nil { if rest.IsNotFound(err) { - log.Info("storage %s not found. it will be created") + log.Info(fmt.Sprintf("storage %s not found. it will be created", opts.Storage)) return s.createStorage(ctx, opts) } return err @@ -59,12 +61,53 @@ func (s *Service) getStorage(ctx context.Context, name string) error { } func (s *Service) createStorage(ctx context.Context, options api.StorageCreateOptions) error { + + if options.Mkdir != nil && *options.Mkdir == false { + if err := s.createStorageDirs(ctx, options); err != nil { + return err + } + } + if _, err := s.client.CreateStorage(ctx, options.Storage, options.StorageType, options); err != nil { return err } + return nil } +func (s *Service) createStorageDirs(ctx context.Context, options api.StorageCreateOptions) error { + log := log.FromContext(ctx) + + nodes, err := s.client.Nodes(ctx) + if err != nil { + return err + } + + hasFailure := false + for _, node := range nodes { + vnc, err := s.client.NewNodeVNCWebSocketConnection(context.TODO(), node.Node) + if err != nil { + log.Error(err, fmt.Sprintf("Failed to create shell to node %s", node.Node)) + hasFailure = true + } + + for _, dir := range strings.Split(options.Content, ",") { + _, _, err := vnc.Exec(ctx, fmt.Sprintf("mkdir -p %s/%s", options.Path, dir)) + if err != nil { + log.Error(err, fmt.Sprintf("Failed creating content dir %s", dir)) + hasFailure = true + } + } + vnc.Close() + } + + if hasFailure { + return fmt.Errorf("failed creating content directories for storage %s", options.Storage) + } else { + return nil + } +} + func (s *Service) deleteStorage(ctx context.Context) error { log := log.FromContext(ctx) @@ -103,12 +146,17 @@ func (s *Service) deleteStorage(ctx context.Context) error { } func generateVMStorageOptions(scope Scope) api.StorageCreateOptions { + + // when using non-root user, we need to create directories ourselves. Otherwise they will be owned by root. + // We assume we have permission for base storage path. + mkdirs := scope.CloudClient().RESTClient().Credentials().Username == "root@pam" + storageSpec := scope.Storage() options := api.StorageCreateOptions{ Storage: storageSpec.Name, StorageType: "dir", Content: "images,snippets", - Mkdir: true, + Mkdir: pointer.Bool(mkdirs), Path: storageSpec.Path, } if options.Storage == "" { diff --git a/cmd/main.go b/cmd/main.go index 99458ff..12ce6db 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -19,6 +19,7 @@ package main import ( "flag" "os" + caipamv1 "sigs.k8s.io/cluster-api/exp/ipam/api/v1alpha1" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. @@ -48,6 +49,7 @@ func init() { utilruntime.Must(infrastructurev1beta1.AddToScheme(scheme)) utilruntime.Must(infrastructurev1beta1.AddToScheme(scheme)) _ = clusterv1.AddToScheme(scheme) + _ = caipamv1.AddToScheme(scheme) //+kubebuilder:scaffold:scheme } diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml index 49c4ce7..a212170 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml @@ -67,6 +67,21 @@ spec: - host - port type: object + failureDomain: + description: Provide config to enable failure domains + properties: + nodeAsFailureDomain: + description: Treat each node as a failure domain for cluster api + type: boolean + type: object + nodes: + description: Nodes to be used for vm instances + items: + type: string + type: array + resourcePool: + description: Proxmox resource pool to be used for all vms created + type: string serverRef: description: ServerRef is used for configuring Proxmox client properties: diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachines.yaml index ab7dd28..e058f8a 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachines.yaml @@ -26,9 +26,13 @@ spec: priority: 1 type: string - jsonPath: .spec.vmID - name: vmid + name: VmID priority: 1 type: string + - jsonPath: .spec.node + name: Node + priority: 2 + type: string - jsonPath: .spec.providerID name: ProviderID type: string @@ -232,7 +236,7 @@ spec: type: integer disk: default: 50G - description: hard disk size + description: boot disk size pattern: \+?\d+(\.\d+)?[KMGT]? type: string memory: @@ -244,6 +248,10 @@ spec: description: The number of CPU sockets. Defaults to 1. minimum: 1 type: integer + storage: + description: Storage name for the boot disk. If none is provided, + the ProxmoxCluster storage name will be used + type: string type: object image: description: Image is the image to be provisioned @@ -272,9 +280,54 @@ spec: network: description: Network properties: + bridge: + default: vmbr0 + type: string ipConfig: description: 'to do : should accept multiple IPConfig' properties: + IPv4FromPoolRef: + description: IPv4FromPoolRef is a reference to an IP pool + to allocate an address from. + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + IPv6FromPoolRef: + description: IPv6FromPoolRef is a reference to an IP pool + to allocate an address from. + properties: + apiGroup: + description: APIGroup is the group for the resource being + referenced. If APIGroup is not specified, the specified + Kind must be in the core API group. For any other third-party + types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic gateway: description: gateway IPv4 type: string @@ -288,12 +341,17 @@ spec: description: IPv6 with CIDR type: string type: object + model: + default: virtio + type: string nameServer: description: DNS server type: string searchDomain: description: search domain type: string + vlanTag: + type: integer type: object node: description: Node is proxmox node hosting vm instance which used for @@ -439,6 +497,9 @@ spec: pattern: (?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01]) type: string type: object + pool: + description: Pool the vm is attached to + type: string providerID: description: ProviderID type: string @@ -601,7 +662,6 @@ spec: hookscript: type: string hotplug: - description: HostPci type: string hugepages: type: string diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachinetemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachinetemplates.yaml index 400a9d7..f6642c8 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachinetemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachinetemplates.yaml @@ -36,6 +36,13 @@ spec: spec: description: ProxmoxMachineTemplateSpec defines the desired state of ProxmoxMachineTemplate properties: + nodes: + description: Restrict template to specific proxmox nodes. When failure + domains are enabled, they will have priority the configured nodes + in the template + items: + type: string + type: array template: properties: metadata.omitempty: @@ -81,7 +88,6 @@ spec: type: object type: object spec: - description: ProxmoxMachineSpec defines the desired state of ProxmoxMachine properties: cloudInit: description: CloudInit defines options related to the bootstrapping @@ -230,11 +236,6 @@ spec: type: array type: object type: object - failureDomain: - description: FailureDomain is the failure domain unique identifier - this Machine should be attached to, as defined in Cluster - API. - type: string hardware: description: Hardware properties: @@ -257,7 +258,7 @@ spec: type: integer disk: default: 50G - description: hard disk size + description: boot disk size pattern: \+?\d+(\.\d+)?[KMGT]? type: string memory: @@ -269,6 +270,10 @@ spec: description: The number of CPU sockets. Defaults to 1. minimum: 1 type: integer + storage: + description: Storage name for the boot disk. If none is + provided, the ProxmoxCluster storage name will be used + type: string type: object image: description: Image is the image to be provisioned @@ -298,9 +303,60 @@ spec: network: description: Network properties: + bridge: + default: vmbr0 + type: string ipConfig: description: 'to do : should accept multiple IPConfig' properties: + IPv4FromPoolRef: + description: IPv4FromPoolRef is a reference to an + IP pool to allocate an address from. + properties: + apiGroup: + description: APIGroup is the group for the resource + being referenced. If APIGroup is not specified, + the specified Kind must be in the core API group. + For any other third-party types, APIGroup is + required. + type: string + kind: + description: Kind is the type of resource being + referenced + type: string + name: + description: Name is the name of resource being + referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + IPv6FromPoolRef: + description: IPv6FromPoolRef is a reference to an + IP pool to allocate an address from. + properties: + apiGroup: + description: APIGroup is the group for the resource + being referenced. If APIGroup is not specified, + the specified Kind must be in the core API group. + For any other third-party types, APIGroup is + required. + type: string + kind: + description: Kind is the type of resource being + referenced + type: string + name: + description: Name is the name of resource being + referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic gateway: description: gateway IPv4 type: string @@ -314,17 +370,18 @@ spec: description: IPv6 with CIDR type: string type: object + model: + default: virtio + type: string nameServer: description: DNS server type: string searchDomain: description: search domain type: string + vlanTag: + type: integer type: object - node: - description: Node is proxmox node hosting vm instance which - used for ProxmoxMachine - type: string options: description: Options properties: @@ -471,19 +528,24 @@ spec: pattern: (?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01]) type: string type: object - providerID: - description: ProviderID - type: string - vmID: - description: VMID is proxmox qemu's id - minimum: 0 - type: integer required: - image type: object required: - spec type: object + vmIDs: + description: VM ID Range that will be used for individual machines + properties: + end: + description: End of VM ID range + type: integer + start: + description: Start of VM ID range + type: integer + required: + - start + type: object required: - template type: object diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index e0d5185..53f4f5b 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -96,3 +96,36 @@ rules: - get - patch - update +- apiGroups: + - ipam.cluster.x-k8s.io + resources: + - ipaddressclaims + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ipam.cluster.x-k8s.io + resources: + - ipaddressclaims/status + verbs: + - get + - watch +- apiGroups: + - ipam.cluster.x-k8s.io + resources: + - ipaddresses + verbs: + - get + - list + - watch +- apiGroups: + - ipam.cluster.x-k8s.io + resources: + - ipaddresses/status + verbs: + - get diff --git a/controllers/proxmoxcluster_controller.go b/controllers/proxmoxcluster_controller.go index fc6befd..363ceb6 100644 --- a/controllers/proxmoxcluster_controller.go +++ b/controllers/proxmoxcluster_controller.go @@ -134,6 +134,10 @@ func (r *ProxmoxClusterReconciler) reconcile(ctx context.Context, clusterScope * return ctrl.Result{RequeueAfter: 10 * time.Second}, nil } + if err := clusterScope.SetFailureDomains(ctx); err != nil { + return ctrl.Result{RequeueAfter: 5 * time.Second}, err + } + log.Info("Reconciled ProxmoxCluster") record.Eventf(clusterScope.ProxmoxCluster, "ProxmoxClusterReconcile", "Got control-plane endpoint - %s", controlPlaneEndpoint.Host) clusterScope.SetReady() diff --git a/controllers/proxmoxmachine_controller.go b/controllers/proxmoxmachine_controller.go index f758199..a9ffb10 100644 --- a/controllers/proxmoxmachine_controller.go +++ b/controllers/proxmoxmachine_controller.go @@ -31,10 +31,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" infrav1 "github.com/sp-yduck/cluster-api-provider-proxmox/api/v1beta1" "github.com/sp-yduck/cluster-api-provider-proxmox/cloud" "github.com/sp-yduck/cluster-api-provider-proxmox/cloud/scope" + services "github.com/sp-yduck/cluster-api-provider-proxmox/cloud/services" "github.com/sp-yduck/cluster-api-provider-proxmox/cloud/services/compute/instance" ) @@ -48,6 +50,12 @@ type ProxmoxMachineReconciler struct { //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=proxmoxmachines/status,verbs=get;update;patch //+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=proxmoxmachines/finalizers,verbs=update //+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch + +// +kubebuilder:rbac:groups=ipam.cluster.x-k8s.io,resources=ipaddressclaims,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=ipam.cluster.x-k8s.io,resources=ipaddressclaims/status,verbs=get;watch +// +kubebuilder:rbac:groups=ipam.cluster.x-k8s.io,resources=ipaddresses,verbs=get;list;watch +// +kubebuilder:rbac:groups=ipam.cluster.x-k8s.io,resources=ipaddresses/status,verbs=get + // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch @@ -152,6 +160,13 @@ func (r *ProxmoxMachineReconciler) reconcile(ctx context.Context, machineScope * instance.NewService(machineScope), } + if machineScope.Machine.Spec.FailureDomain != nil { + machineScope.SetFailureDomain(*machineScope.Machine.Spec.FailureDomain) + if machineScope.GetProxmoxCluster().Spec.FailureDomainConfig != nil && machineScope.GetProxmoxCluster().Spec.FailureDomainConfig.NodeAsFailureDomain && machineScope.NodeName() == nil { + machineScope.SetNodeName(*machineScope.Machine.Spec.FailureDomain) + } + } + for _, r := range reconcilers { if err := r.Reconcile(ctx); err != nil { log.Error(err, "Reconcile error")