Skip to content

Commit

Permalink
Fix bug upgrading from non-mesh to mesh (hashicorp#1136)
Browse files Browse the repository at this point in the history
When modifying a deployment to add it to the mesh, there will be a
time when the endpoints list has both non-injected and injected
pods. Previously we were short-circuiting in this case which
meant the injected pods never got registered.
  • Loading branch information
lkysow authored Apr 13, 2022
1 parent 75ad870 commit 4a4f555
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 83 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ BUG FIXES:
* Fix issue where clusters not in the same namespace as their deployment name could not be upgraded. [[GH-1115](https://github.com/hashicorp/consul-k8s/pull/1115)]
* Fix issue where the CLI was looking for secrets in namespaces other than the namespace targeted by the release. [[GH-1156](https://github.com/hashicorp/consul-k8s/pull/1156)]
* Fix issue where the federation secret was not being found in certain configurations. [[GH-1154](https://github.com/hashicorp/consul-k8s/issue/1154)]
* Control Plane
* Fix issue where upgrading a deployment from non-service mesh to service mesh would cause Pods to hang in init. [[GH-1136](https://github.com/hashicorp/consul-k8s/pull/1136)]

IMPROVEMENTS:
* Helm
Expand Down
5 changes: 0 additions & 5 deletions control-plane/connect-inject/endpoints_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,6 @@ func (r *EndpointsController) Reconcile(ctx context.Context, req ctrl.Request) (
r.Log.Error(err, "failed to register services or health check", "name", serviceEndpoints.Name, "ns", serviceEndpoints.Namespace)
errs = multierror.Append(errs, err)
}
} else {
// If this endpoints object points to a pod that has injection disabled,
// then we want to ignore it for any further processing and exit early.
r.Log.Info("ignoring because endpoints pods have not been injected", "name", serviceEndpoints.Name, "ns", serviceEndpoints.Namespace)
return ctrl.Result{}, nil
}
}
}
Expand Down
225 changes: 147 additions & 78 deletions control-plane/connect-inject/endpoints_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ package connectinject
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"

Expand Down Expand Up @@ -1368,6 +1365,89 @@ func TestReconcileCreateEndpoint(t *testing.T) {
},
},
},
// Test that if a user is updating their deployment from non-mesh to mesh that we
// register the mesh pods.
{
name: "Some endpoints injected, some not.",
consulSvcName: "service-created",
k8sObjects: func() []runtime.Object {
pod1 := createPod("pod1", "1.2.3.4", true, true)
pod2 := createPod("pod2", "2.3.4.5", false, false)

// NOTE: the order of the addresses is important. The non-mesh pod must be first to correctly
// reproduce the bug where we were exiting the loop early if any pod was non-mesh.
endpointWithTwoAddresses := &corev1.Endpoints{
ObjectMeta: metav1.ObjectMeta{
Name: "service-created",
Namespace: "default",
},
Subsets: []corev1.EndpointSubset{
{
Addresses: []corev1.EndpointAddress{
{
IP: "2.3.4.5",
NodeName: &nodeName,
TargetRef: &corev1.ObjectReference{
Kind: "Pod",
Name: "pod2",
Namespace: "default",
},
},
{
IP: "1.2.3.4",
NodeName: &nodeName,
TargetRef: &corev1.ObjectReference{
Kind: "Pod",
Name: "pod1",
Namespace: "default",
},
},
},
},
},
}
return []runtime.Object{pod1, pod2, endpointWithTwoAddresses}
},
initialConsulSvcs: []*api.AgentServiceRegistration{},
expectedNumSvcInstances: 1,
expectedConsulSvcInstances: []*api.CatalogService{
{
ServiceID: "pod1-service-created",
ServiceName: "service-created",
ServiceAddress: "1.2.3.4",
ServicePort: 0,
ServiceMeta: map[string]string{MetaKeyPodName: "pod1", MetaKeyKubeServiceName: "service-created", MetaKeyKubeNS: "default", MetaKeyManagedBy: managedByValue},
ServiceTags: []string{},
},
},
expectedProxySvcInstances: []*api.CatalogService{
{
ServiceID: "pod1-service-created-sidecar-proxy",
ServiceName: "service-created-sidecar-proxy",
ServiceAddress: "1.2.3.4",
ServicePort: 20000,
ServiceProxy: &api.AgentServiceConnectProxyConfig{
DestinationServiceName: "service-created",
DestinationServiceID: "pod1-service-created",
LocalServiceAddress: "",
LocalServicePort: 0,
},
ServiceMeta: map[string]string{MetaKeyPodName: "pod1", MetaKeyKubeServiceName: "service-created", MetaKeyKubeNS: "default", MetaKeyManagedBy: managedByValue},
ServiceTags: []string{},
},
},
expectedAgentHealthChecks: []*api.AgentCheck{
{
CheckID: "default/pod1-service-created/kubernetes-health-check",
ServiceName: "service-created",
ServiceID: "pod1-service-created",
Name: "Kubernetes Health Check",
Status: api.HealthPassing,
Output: kubernetesSuccessReasonMsg,
Type: ttl,
},
},
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -2709,6 +2789,70 @@ func TestReconcileUpdateEndpoint(t *testing.T) {
},
enableACLs: true,
},
// When a Deployment has the mesh annotation removed, Kube will delete the old pods. When it deletes the last Pod,
// the endpoints object will contain only non-mesh pods, but you'll still have one consul service instance to clean up.
{
name: "When a Deployment moves from mesh to non mesh its service instances should be deleted",
consulSvcName: "service-updated",
k8sObjects: func() []runtime.Object {
pod2 := createPod("pod2", "2.3.4.5", false, false)
endpoint := &corev1.Endpoints{
ObjectMeta: metav1.ObjectMeta{
Name: "service-updated",
Namespace: "default",
},
Subsets: []corev1.EndpointSubset{
{
Addresses: []corev1.EndpointAddress{
{
IP: "2.3.4.5",
NodeName: &nodeName,
TargetRef: &corev1.ObjectReference{
Kind: "Pod",
Name: "pod2",
Namespace: "default",
},
},
},
},
},
}
return []runtime.Object{pod2, endpoint}
},
initialConsulSvcs: []*api.AgentServiceRegistration{
{
ID: "pod1-service-updated",
Name: "service-updated",
Port: 80,
Address: "1.2.3.4",
Meta: map[string]string{
MetaKeyKubeServiceName: "service-updated",
MetaKeyKubeNS: "default",
MetaKeyManagedBy: managedByValue,
MetaKeyPodName: "pod1",
},
},
{
Kind: api.ServiceKindConnectProxy,
ID: "pod1-service-updated-sidecar-proxy",
Name: "service-updated-sidecar-proxy",
Port: 20000,
Address: "1.2.3.4",
Proxy: &api.AgentServiceConnectProxyConfig{
DestinationServiceName: "service-updated",
DestinationServiceID: "pod1-service-updated",
},
Meta: map[string]string{
MetaKeyKubeServiceName: "service-updated",
MetaKeyKubeNS: "default",
MetaKeyManagedBy: managedByValue,
MetaKeyPodName: "pod1",
},
},
},
expectedConsulSvcInstances: nil,
expectedProxySvcInstances: nil,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -3298,81 +3442,6 @@ func TestReconcileIgnoresServiceIgnoreLabel(t *testing.T) {
}
}

// Test that when endpoints pods have not been connect-injected (i.e. not in the service mesh)
// we don't make any API calls to Consul.
// This is because we want to avoid any unnecessary calls. Especially if the client agent is unreachable
// and any calls to it will result in an i/o timeout errors, it will
// slow down processing of the events by the endpoints controller making unnecessary calls and waiting for ~30sec.
func TestReconcile_endpointsIgnoredWhenNotInjected(t *testing.T) {
nodeName := "test-node"
namespace := "default"

// Set up the fake Kubernetes client with an endpoint, pod, consul client, and the default namespace.
endpoint := &corev1.Endpoints{
ObjectMeta: metav1.ObjectMeta{
Name: "not-in-mesh",
Namespace: namespace,
},
Subsets: []corev1.EndpointSubset{
{
Addresses: []corev1.EndpointAddress{
{
IP: "1.2.3.4",
NodeName: &nodeName,
TargetRef: &corev1.ObjectReference{
Kind: "Pod",
Name: "pod1",
Namespace: namespace,
},
},
},
},
},
}
pod1 := createPod("pod1", "1.2.3.4", false, true)
fakeClientPod := createPod("fake-consul-client", "127.0.0.1", false, true)
fakeClientPod.Labels = map[string]string{"component": "client", "app": "consul", "release": "consul"}
ns := corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}}
k8sObjects := []runtime.Object{endpoint, pod1, fakeClientPod, &ns}
fakeClient := fake.NewClientBuilder().WithRuntimeObjects(k8sObjects...).Build()

// Create test Consul server.
consul := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("content-type", "application/json")
if r != nil {
t.Fatalf("should not receive any calls to Consul client")
}
}))
t.Cleanup(consul.Close)

cfg := &api.Config{Address: consul.URL}
consulClient, err := api.NewClient(cfg)
require.NoError(t, err)
parsedURL, err := url.Parse(consul.URL)
require.NoError(t, err)
consulPort := parsedURL.Port()

// Create the endpoints controller.
ep := &EndpointsController{
Client: fakeClient,
Log: logrtest.TestLogger{T: t},
ConsulClient: consulClient,
ConsulPort: consulPort,
ConsulScheme: "http",
AllowK8sNamespacesSet: mapset.NewSetWith("*"),
DenyK8sNamespacesSet: mapset.NewSetWith(),
ReleaseName: "consul",
ReleaseNamespace: namespace,
ConsulClientCfg: cfg,
}

// Run the reconcile process to deregister the service if it was registered before.
namespacedName := types.NamespacedName{Namespace: namespace, Name: "not-in-mesh"}
resp, err := ep.Reconcile(context.Background(), ctrl.Request{NamespacedName: namespacedName})
require.NoError(t, err)
require.False(t, resp.Requeue)
}

// Test that when an endpoints pod specifies the name for the Kubernetes service it wants to use
// for registration, all other endpoints for that pod are skipped.
func TestReconcile_podSpecifiesExplicitService(t *testing.T) {
Expand Down

0 comments on commit 4a4f555

Please sign in to comment.