Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix pods and runner api not in sync and other improvments #496

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions controllers/githubactionrunner_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,24 @@ func (r *GithubActionRunnerReconciler) handleScaling(ctx context.Context, instan

// safety guard - always look for finalizers in order to unregister runners for pods about to delete
// pods could have been deleted by user directly and not through operator
if err = r.handleFinalization(ctx, instance, podRunnerPairs); err != nil {
// if some pods are deleted then it's better to requeue immediately, because we have to reload podRunnerPairs.
numDeleted, err := r.handleFinalization(ctx, instance, podRunnerPairs)
if err != nil {
return r.manageOutcome(ctx, instance, err)
}
if numDeleted > 0 {
return r.ManageSuccess(ctx, instance)
}

if !podRunnerPairs.inSync() {
logger.Info("Pods and runner API not in sync, returning early")
return r.manageOutcome(ctx, instance, nil)
logger.Info("Pods and runner API not in sync, returning early", "Pods", podRunnerPairs.numPods(), "Runners", podRunnerPairs.numRunners())
return r.ManageSuccessWithRequeue(ctx, instance, 3*time.Second)
}

if shouldScaleUp(podRunnerPairs, instance) {
instance.Status.CurrentSize = podRunnerPairs.numPods()
scale := funk.MaxInt([]int{instance.Spec.MinRunners - podRunnerPairs.numRunners(), 1})
scale := funk.MaxInt([]int{instance.Spec.MinRunners - podRunnerPairs.numRunners(), instance.Spec.MinRunners - podRunnerPairs.numIdle(), 1})
scale = funk.MinInt([]int{scale, instance.Spec.MaxRunners - podRunnerPairs.numRunners()})
logger.Info("Scaling up", "numInstances", scale)

if err := r.scaleUp(ctx, scale, instance); err != nil {
Expand Down Expand Up @@ -173,11 +179,11 @@ func (r *GithubActionRunnerReconciler) scaleDown(ctx context.Context, podRunnerP
}

func shouldScaleUp(podRunnerPairs podRunnerPairList, instance *garov1alpha1.GithubActionRunner) bool {
return podRunnerPairs.numRunners() < instance.Spec.MinRunners || (podRunnerPairs.allBusy() && podRunnerPairs.numRunners() < instance.Spec.MaxRunners)
return podRunnerPairs.numIdle() < instance.Spec.MinRunners && podRunnerPairs.numRunners() < instance.Spec.MaxRunners
}

func shouldScaleDown(podRunnerPairs podRunnerPairList, instance *garov1alpha1.GithubActionRunner) bool {
return podRunnerPairs.numRunners() > instance.Spec.MaxRunners || (podRunnerPairs.numIdle() > 1 && (podRunnerPairs.numRunners() > instance.Spec.MinRunners))
return podRunnerPairs.numRunners() > instance.Spec.MaxRunners || podRunnerPairs.numIdle() > instance.Spec.MinRunners
}

func (r *GithubActionRunnerReconciler) manageOutcome(ctx context.Context, instance *garov1alpha1.GithubActionRunner, issue error) (reconcile.Result, error) {
Expand Down Expand Up @@ -362,29 +368,31 @@ func (r *GithubActionRunnerReconciler) unregisterRunner(ctx context.Context, cr
}

// handleFinalization will remove runner from github based on presence of finalizer
func (r *GithubActionRunnerReconciler) handleFinalization(ctx context.Context, cr *garov1alpha1.GithubActionRunner, list podRunnerPairList) error {
func (r *GithubActionRunnerReconciler) handleFinalization(ctx context.Context, cr *garov1alpha1.GithubActionRunner, list podRunnerPairList) (int, error) {
numDeleted := 0
for _, item := range list.getPodsBeingDeletedOrEvictedOrCompleted() {
numDeleted++
// TODO - cause of failure should be checked more closely, if it does not exist we can ignore it. If it is a comms error we should stick around
if err := r.unregisterRunner(ctx, cr, item); err != nil {
return err
return numDeleted, err
}
if isEvicted(&item.pod) && env.GetBoolDefault(deleteEvictedPodsEnvVarName, true) {
logr.FromContextOrDiscard(ctx).Info("Deleting evicted pod", "podname", item.pod.Name)
err := r.DeleteResourceIfExists(ctx, &item.pod)
if err != nil {
return err
return numDeleted, err
}
}
if isCompleted(&item.pod) {
logr.FromContextOrDiscard(ctx).Info("Deleting succeeded pod", "podname", item.pod.Name)
err := r.DeleteResourceIfExists(ctx, &item.pod)
if err != nil {
return err
return numDeleted, err
}
}
}

return nil
return numDeleted, nil
}

// tokenForRef returns the token referenced from the GithubActionRunner Spec.TokenRef
Expand Down
3 changes: 3 additions & 0 deletions controllers/githubapi/runnerapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package githubapi

import (
"context"
"github.com/go-logr/logr"
"github.com/google/go-github/v47/github"
"github.com/gregjones/httpcache"
"github.com/palantir/go-githubapp/githubapp"
Expand Down Expand Up @@ -57,6 +58,7 @@ func (r runnerAPI) getClient(ctx context.Context, organization string, token str

// Return all runners for the org
func (r runnerAPI) GetRunners(ctx context.Context, organization string, repository string, token string) ([]*github.Runner, error) {
logger := logr.FromContextOrDiscard(ctx)
client, err := r.getClient(ctx, organization, token)
if err != nil {
return nil, err
Expand All @@ -78,6 +80,7 @@ func (r runnerAPI) GetRunners(ctx context.Context, organization string, reposito
if err != nil {
return allRunners, err
}
logger.Info("GerRunners GitHub Api Rate limit", "Rate", response.Rate)
allRunners = append(allRunners, runners.Runners...)
if response.NextPage == 0 {
break
Expand Down