Skip to content

Commit

Permalink
feat: handle MC force delete (#192)
Browse files Browse the repository at this point in the history
  • Loading branch information
Arvindthiru authored Sep 18, 2024
1 parent bb74f0a commit 5740632
Show file tree
Hide file tree
Showing 13 changed files with 681 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ spec:
- --leader-election-namespace={{ .Values.leaderElectionNamespace }}
- --v={{ .Values.logVerbosity }}
- --add_dir_header
- --force-delete-wait-time={{ .Values.forceDeleteWaitTime }}
ports:
- name: metrics
containerPort: 8080
Expand Down
8 changes: 8 additions & 0 deletions charts/hub-net-controller-manager/templates/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ rules:
- get
- patch
- update
- apiGroups:
- cluster.kubernetes-fleet.io
resources:
- memberclusters
verbs:
- get
- list
- watch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
Expand Down
3 changes: 2 additions & 1 deletion charts/hub-net-controller-manager/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@ replicaCount: 1

image:
repository: ghcr.io/azure/fleet-networking/hub-net-controller-manager
pullPolicy: IfNotPresent
pullPolicy: Always
# Overrides the image tag whose default is the chart appVersion.
tag: "v0.1.0"

logVerbosity: 2

leaderElectionNamespace: fleet-system
fleetSystemNamespace: fleet-system
forceDeleteWaitTime: 2m0s

resources:
limits:
Expand Down
15 changes: 15 additions & 0 deletions cmd/hub-net-controller-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@ import (
"sigs.k8s.io/controller-runtime/pkg/webhook"

//+kubebuilder:scaffold:imports
clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"

fleetnetv1alpha1 "go.goms.io/fleet-networking/api/v1alpha1"
"go.goms.io/fleet-networking/pkg/controllers/hub/endpointsliceexport"
"go.goms.io/fleet-networking/pkg/controllers/hub/internalserviceexport"
"go.goms.io/fleet-networking/pkg/controllers/hub/internalserviceimport"
"go.goms.io/fleet-networking/pkg/controllers/hub/membercluster"
"go.goms.io/fleet-networking/pkg/controllers/hub/serviceimport"
)

Expand All @@ -47,11 +49,14 @@ var (
internalServiceExportRetryInterval = flag.Duration("internalserviceexport-retry-interval", 2*time.Second,
"The wait time for the internalserviceexport controller to requeue the request and to wait for the"+
"ServiceImport controller to resolve the service Spec")

forceDeleteWaitTime = flag.Duration("force-delete-wait-time", 15*time.Minute, "The duration the fleet hub agent waits before trying to force delete a member cluster.")
)

func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(fleetnetv1alpha1.AddToScheme(scheme))
utilruntime.Must(clusterv1beta1.AddToScheme(scheme))
klog.InitFlags(nil)
//+kubebuilder:scaffold:scheme
}
Expand Down Expand Up @@ -140,6 +145,16 @@ func main() {
exitWithErrorFunc()
}

klog.V(1).InfoS("Start to setup MemberCluster controller")
if err := (&membercluster.Reconciler{
Client: mgr.GetClient(),
Recorder: mgr.GetEventRecorderFor(membercluster.ControllerName),
ForceDeleteWaitTime: *forceDeleteWaitTime,
}).SetupWithManager(mgr); err != nil {
klog.ErrorS(err, "Unable to create MemberCluster controller")
exitWithErrorFunc()
}

klog.V(1).InfoS("Starting ServiceExportImport controller manager")
if err := mgr.Start(ctx); err != nil {
klog.ErrorS(err, "Problem running manager")
Expand Down
2 changes: 2 additions & 0 deletions examples/getting-started/artifacts/hub-rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ rules:
- patch
- apiGroups:
- networking.fleet.azure.com
- cluster.kubernetes-fleet.io
resources: ["*"]
verbs: ["*"]
---
Expand Down Expand Up @@ -70,6 +71,7 @@ rules:
- patch
- apiGroups:
- networking.fleet.azure.com
- cluster.kubernetes-fleet.io
resources: ["*"]
verbs: ["*"]
---
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ require (
require (
github.com/stretchr/testify v1.9.0
go.goms.io/fleet v0.10.5
golang.org/x/sync v0.7.0
)

require (
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbht
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
4 changes: 2 additions & 2 deletions pkg/common/hubconfig/hubconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ const (

// Naming pattern of member cluster namespace in hub cluster, should be the same as envValue as defined in
// https://github.com/Azure/fleet/blob/main/pkg/utils/common.go
hubNamespaceNameFormat = "fleet-member-%s"
HubNamespaceNameFormat = "fleet-member-%s"
)

// PrepareHubConfig return the config holding attributes for a Kubernetes client to request hub cluster.
Expand Down Expand Up @@ -115,5 +115,5 @@ func FetchMemberClusterNamespace() (string, error) {
klog.ErrorS(err, "Member cluster name cannot be empty")
return "", err
}
return fmt.Sprintf(hubNamespaceNameFormat, mcName), nil
return fmt.Sprintf(HubNamespaceNameFormat, mcName), nil
}
2 changes: 1 addition & 1 deletion pkg/common/hubconfig/hubconfig_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func TestFetchMemberClusterNamespace(t *testing.T) {
name: "environment variable is present",
envKey: "MEMBER_CLUSTER_NAME",
envValue: memberCluster,
want: fmt.Sprintf(hubNamespaceNameFormat, memberCluster),
want: fmt.Sprintf(HubNamespaceNameFormat, memberCluster),
wantErr: false,
},
{
Expand Down
134 changes: 134 additions & 0 deletions pkg/controllers/hub/membercluster/controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
*/

// Package membercluster features the MemberCluster controller for watching
// update/delete events to the MemberCluster object and removes finalizers
// on all fleet networking resources in the fleet member cluster namespace.
package membercluster

import (
"context"
"fmt"
"time"

"golang.org/x/sync/errgroup"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"

clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"
"go.goms.io/fleet/pkg/utils/controller"

fleetnetv1alpha1 "go.goms.io/fleet-networking/api/v1alpha1"
"go.goms.io/fleet-networking/pkg/common/hubconfig"
)

const (
ControllerName = "membercluster-controller"
)

// Reconciler reconciles a MemberCluster object.
type Reconciler struct {
client.Client
Recorder record.EventRecorder
// the wait time in minutes before we need to force delete a member cluster.
ForceDeleteWaitTime time.Duration
}

// Reconcile watches the deletion of the member cluster and removes finalizers on fleet networking resources in the
// member cluster namespace.
func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
mcObjRef := klog.KRef(req.Namespace, req.Name)
startTime := time.Now()
klog.V(2).InfoS("Reconciliation starts", "memberCluster", mcObjRef)
defer func() {
latency := time.Since(startTime).Milliseconds()
klog.V(2).InfoS("Reconciliation ends", "memberCluster", mcObjRef, "latency", latency)
}()
var mc clusterv1beta1.MemberCluster
if err := r.Client.Get(ctx, req.NamespacedName, &mc); err != nil {
if errors.IsNotFound(err) {
klog.V(4).InfoS("Ignoring NotFound memberCluster", "memberCluster", mcObjRef)
return ctrl.Result{}, nil
}
klog.ErrorS(err, "Failed to get memberCluster", "memberCluster", mcObjRef)
return ctrl.Result{}, err
}
if mc.DeletionTimestamp.IsZero() {
klog.ErrorS(controller.NewUnexpectedBehaviorError(fmt.Errorf("member cluster %s is not being deleted",
mc.Name)), "The member cluster should have deletionTimeStamp set to a non-zero/non-nil value")
return ctrl.Result{}, nil // no need to retry.
}

// Handle deleting member cluster, removes finalizers on all the resources in the cluster namespace
// after member cluster force delete wait time.
if !mc.DeletionTimestamp.IsZero() && time.Since(mc.DeletionTimestamp.Time) >= r.ForceDeleteWaitTime {
klog.V(2).InfoS("The member cluster deletion is stuck removing the "+
"finalizers from all the resources in member cluster namespace", "memberCluster", mcObjRef)
return r.removeFinalizer(ctx, mc)
}
// we need to only wait for force delete wait time, if the update/delete member cluster event takes
// longer to be reconciled we need to account for that time.
return ctrl.Result{RequeueAfter: r.ForceDeleteWaitTime - time.Since(mc.DeletionTimestamp.Time)}, nil
}

// removeFinalizer removes finalizers on the resources in the member cluster namespace.
// For EndpointSliceExport, InternalServiceImport & InternalServiceExport resources, the finalizers should be
// removed by other hub networking controllers when leaving. So this MemberCluster controller only handles
// EndpointSliceImports here.
func (r *Reconciler) removeFinalizer(ctx context.Context, mc clusterv1beta1.MemberCluster) (ctrl.Result, error) {
// Remove finalizer for EndpointSliceImport resources in the cluster namespace.
mcObjRef := klog.KRef(mc.Namespace, mc.Name)
mcNamespace := fmt.Sprintf(hubconfig.HubNamespaceNameFormat, mc.Name)
var endpointSliceImportList fleetnetv1alpha1.EndpointSliceImportList
if err := r.Client.List(ctx, &endpointSliceImportList, client.InNamespace(mcNamespace)); err != nil {
klog.ErrorS(err, "Failed to list endpointSliceImports", "memberCluster", mcObjRef)
return ctrl.Result{}, err
}
errs, ctx := errgroup.WithContext(ctx)
for i := range endpointSliceImportList.Items {
esi := &endpointSliceImportList.Items[i]
errs.Go(func() error {
esiObjRef := klog.KRef(esi.Namespace, esi.Name)
esi.SetFinalizers(nil)
if err := r.Client.Update(ctx, esi); err != nil {
klog.ErrorS(err, "Failed to remove finalizers for endpointSliceImport",
"memberCluster", mcObjRef, "endpointSliceImport", esiObjRef)
return err
}
klog.V(2).InfoS("Removed finalizers for endpointSliceImport",
"memberCluster", mcObjRef, "endpointSliceImport", esiObjRef)
return nil
})
}
return ctrl.Result{}, errs.Wait()
}

// SetupWithManager sets up the controller with the Manager.
func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
customPredicate := predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool {
// Ignore creation events.
return false
},
DeleteFunc: func(e event.DeleteEvent) bool {
// trigger reconcile on delete event just in case update event is missed.
return true
},
UpdateFunc: func(e event.UpdateEvent) bool {
// If new object is being deleted, trigger reconcile.
return !e.ObjectNew.GetDeletionTimestamp().IsZero()
},
}
// Watch for changes to primary resource MemberCluster
return ctrl.NewControllerManagedBy(mgr).
For(&clusterv1beta1.MemberCluster{}).
WithEventFilter(customPredicate).
Complete(r)
}
Loading

0 comments on commit 5740632

Please sign in to comment.