From 30c7dbc7bdab7b29cc24c02d81cc2c2fc5d1a48f Mon Sep 17 00:00:00 2001 From: Robert Rati Date: Thu, 14 May 2015 09:18:24 -0400 Subject: [PATCH] Correct logic for failing after % of containers fail. #7790 --- test/e2e/util.go | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/test/e2e/util.go b/test/e2e/util.go index 0e3559d737dce..36252d0a77d05 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -68,6 +68,11 @@ type TestContextType struct { var testContext TestContextType +type ContainerFailures struct { + status *api.ContainerStateTerminated + restarts int +} + func Logf(format string, a ...interface{}) { fmt.Fprintf(GinkgoWriter, "INFO: "+format+"\n", a...) } @@ -567,6 +572,7 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error pending := 0 unknown := 0 inactive := 0 + failedContainers := 0 time.Sleep(10 * time.Second) // TODO: Use a reflector both to put less strain on the cluster and @@ -578,8 +584,8 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error for _, p := range currentPods.Items { if p.Status.Phase == api.PodRunning { current++ - if err := VerifyContainersAreNotFailed(p, maxContainerFailures); err != nil { - return err + for _, v := range FailedContainers(p) { + failedContainers = failedContainers + v.restarts } } else if p.Status.Phase == api.PodPending { if p.Spec.Host == "" { @@ -630,6 +636,10 @@ func RunRC(c *client.Client, name string, ns, image string, replicas int) error } last = current oldPods = currentPods + + if failedContainers > maxContainerFailures { + return fmt.Errorf("%d containers failed which is more than allowed %d", failedContainers, maxContainerFailures) + } } if current != replicas { return fmt.Errorf("Only %d pods started out of %d", current, replicas) @@ -698,35 +708,36 @@ func listPods(c *client.Client, namespace string, label labels.Selector, field f return pods, err } -//VerifyContainersAreNotFailed confirms that containers didn't enter an invalid state. -//For example, too many restarts, or non nill Termination, and so on. -func VerifyContainersAreNotFailed(pod api.Pod, restartMax int) error { - var errStrings []string +// FailedContainers inspects all containers in a pod and returns failure +// information for containers that have failed or been restarted. +// A map is returned where the key is the containerID and the value is a +// struct containing the restart and failure information +func FailedContainers(pod api.Pod) map[string]ContainerFailures { + var state ContainerFailures + states := make(map[string]ContainerFailures) statuses := pod.Status.ContainerStatuses if len(statuses) == 0 { return nil } else { for _, status := range statuses { - var errormsg string = "" if status.State.Termination != nil { - errormsg = "status.State.Termination was nil" + states[status.ContainerID] = ContainerFailures{status: status.State.Termination} } else if status.LastTerminationState.Termination != nil { - errormsg = "status.LastTerminationState.Termination was nil" - } else if status.RestartCount > restartMax { - errormsg = fmt.Sprintf("restarted %d times", restartMax) + states[status.ContainerID] = ContainerFailures{status: status.LastTerminationState.Termination} } - - if len(errormsg) != 0 { - errStrings = append(errStrings, fmt.Sprintf("Error: Pod %s (host: %s) : Container w/ name %s status was bad (%v).", pod.Name, pod.Spec.Host, status.Name, errormsg)) + if status.RestartCount > 0 { + var ok bool + if state, ok = states[status.ContainerID]; !ok { + state = ContainerFailures{} + } + state.restarts = status.RestartCount + states[status.ContainerID] = state } } } - if len(errStrings) > 0 { - return fmt.Errorf(strings.Join(errStrings, "\n")) - } - return nil + return states } // Prints the histogram of the events and returns the number of bad events.