From f4c9f47e48c3815e95fe9574e824524d34a20219 Mon Sep 17 00:00:00 2001 From: "Vlad A. Ionescu" <446771+vladaionescu@users.noreply.github.com> Date: Mon, 23 Oct 2023 13:11:17 -0700 Subject: [PATCH] Fix for limited parallelism when the target is the same but has different args (#3406) Fixes #2377 The previous `VisitedCollection` implementation attempts to wait for ongoing targets with the same name to complete before comparing the inputs of the target. This has the advantage of being more precise with which ARGs actually influence the outcome of the targets, ignoring overriding ARGs that end up being unused within those targets. But it also has the disadvantage that all targets with the same name (but different overriding args) execute sequentially. This new implementation simplifies this greatly, by computing the target input hash upfront based on all overriding args, without knowing if there are any args that will end up being unused. The result is that we are able to run all targets with the same name but different args in parallel. But it also has the disadvantage that in some cases we would create duplicated LLBs for some targets when certain overriding args are different but they are unused. Buildkit will generally de-duplicate the LLB in these cases, although it's possible that there might be edge cases if the LLB construction is not consistent. --------- Co-authored-by: nacho Co-authored-by: Vlad A. Ionescu --- .earthly_version_flag_overrides | 2 +- Earthfile | 2 +- earthfile2llb/converter.go | 1 - earthfile2llb/earthfile2llb.go | 60 ++++++-- features/features.go | 19 +-- states/dedup/targetinput.go | 30 +--- states/states.go | 28 ++-- states/visited-legacy.go | 161 ++++++++++++++++++++ states/visited-upfront-hash.go | 56 +++++++ states/visited.go | 157 +------------------ tests/Earthfile | 53 ++----- tests/sequential-locally.earth | 33 ---- tests/visited-upfront-hash-collection.earth | 20 +++ 13 files changed, 339 insertions(+), 283 deletions(-) create mode 100644 states/visited-legacy.go create mode 100644 states/visited-upfront-hash.go delete mode 100644 tests/sequential-locally.earth create mode 100644 tests/visited-upfront-hash-collection.earth diff --git a/.earthly_version_flag_overrides b/.earthly_version_flag_overrides index 878cb439..bc37a6b0 100644 --- a/.earthly_version_flag_overrides +++ b/.earthly_version_flag_overrides @@ -1 +1 @@ -referenced-save-only,use-copy-include-patterns,earthly-version-arg,use-cache-command,use-host-command,check-duplicate-images,use-copy-link,parallel-load,shell-out-anywhere,new-platform,no-tar-build-output,wait-block +referenced-save-only,use-copy-include-patterns,earthly-version-arg,use-cache-command,use-host-command,check-duplicate-images,use-copy-link,parallel-load,shell-out-anywhere,new-platform,no-tar-build-output,wait-block,use-visited-upfront-hash-collection diff --git a/Earthfile b/Earthfile index ffdcf495..f371f7bb 100644 --- a/Earthfile +++ b/Earthfile @@ -466,7 +466,7 @@ earthly-docker: # Otherwise, it will attempt to login to the docker hub mirror using the provided username and password earthly-integration-test-base: FROM +earthly-docker - RUN apk update && apk add pcre-tools curl python3 bash perl findutils expect yq + RUN apk update && apk add pcre-tools curl python3 bash perl findutils expect yq && apk add --upgrade sed COPY scripts/acbtest/acbtest scripts/acbtest/acbgrep /bin/ ENV NO_DOCKER=1 ENV NETWORK_MODE=host # Note that this breaks access to embedded registry in WITH DOCKER. diff --git a/earthfile2llb/converter.go b/earthfile2llb/converter.go index 93add1c4..5875d512 100644 --- a/earthfile2llb/converter.go +++ b/earthfile2llb/converter.go @@ -124,7 +124,6 @@ func NewConverter(ctx context.Context, target domain.Target, bc *buildcontext.Da Final: sts, Visited: opt.Visited, } - sts.AddOverridingVarsAsBuildArgInputs(opt.OverridingVars) newCollOpt := variables.NewCollectionOpt{ Console: opt.Console, Target: target, diff --git a/earthfile2llb/earthfile2llb.go b/earthfile2llb/earthfile2llb.go index 2700e21c..dc997082 100644 --- a/earthfile2llb/earthfile2llb.go +++ b/earthfile2llb/earthfile2llb.go @@ -48,7 +48,7 @@ type ConvertOpt struct { CleanCollection *cleanup.Collection // Visited is a collection of target states which have been converted to LLB. // This is used for deduplication and infinite cycle detection. - Visited *states.VisitedCollection + Visited states.VisitedCollection // PlatformResolver is a platform resolver, which keeps track of // the current platform, the native platform, the user platform, and // the default platform. @@ -132,6 +132,10 @@ type ConvertOpt struct { // parentDepSub is a channel informing of any new dependencies from the parent. parentDepSub chan string // chan of sts IDs. + // TargetInputHashStackSet is a set of target input hashes that are currently in the call stack. + // This is used to detect infinite cycles. + TargetInputHashStackSet map[string]bool + // ContainerFrontend is the currently used container frontend, as detected by Earthly at app start. It provides info // and access to commands to manipulate the current container frontend. ContainerFrontend containerutil.ContainerFrontend @@ -189,12 +193,19 @@ func Earthfile2LLB(ctx context.Context, target domain.Target, opt ConvertOpt, in if opt.SolveCache == nil { opt.SolveCache = states.NewSolveCache() } - if opt.Visited == nil { - opt.Visited = states.NewVisitedCollection() - } if opt.MetaResolver == nil { opt.MetaResolver = NewCachedMetaResolver(opt.GwClient) } + if opt.TargetInputHashStackSet == nil { + opt.TargetInputHashStackSet = make(map[string]bool) + } else { + // We are in a recursive call. Copy the stack set. + newMap := make(map[string]bool) + for k, v := range opt.TargetInputHashStackSet { + newMap[k] = v + } + opt.TargetInputHashStackSet = newMap + } egWait := false if opt.ErrorGroup == nil { opt.ErrorGroup, ctx = serrgroup.WithContext(ctx) @@ -225,6 +236,14 @@ func Earthfile2LLB(ctx context.Context, target domain.Target, opt ConvertOpt, in return nil, errors.Wrapf(err, "resolve build context for target %s", target.String()) } + if opt.Visited == nil { + if bc.Features.UseVisitedUpfrontHashCollection { + opt.Visited = states.NewVisitedUpfrontHashCollection() + } else { + opt.Visited = states.NewLegacyVisitedCollection() + } + } + opt.Features = bc.Features if initialCall && !bc.Features.ReferencedSaveOnly { opt.DoSaves = !target.IsRemote() // legacy mode only saves artifacts that are locally referenced @@ -241,17 +260,21 @@ func Earthfile2LLB(ctx context.Context, target domain.Target, opt ConvertOpt, in if err != nil { return nil, err } - if opt.MainTargetDetailsFunc != nil { - err := opt.MainTargetDetailsFunc(TargetDetails{ - EarthlyOrgName: bc.EarthlyOrgName, - EarthlyProjectName: bc.EarthlyProjectName, - }) - if err != nil { - return nil, errors.Wrapf(err, "target details handler error: %v", err) - } - opt.MainTargetDetailsFunc = nil + tiHash, err := sts.TargetInput().Hash() + if err != nil { + return nil, err } if found { + if opt.TargetInputHashStackSet[tiHash] { + return nil, errors.Errorf("infinite cycle detected for target %s", target.String()) + } + // Wait for the existing sts to complete first. + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-sts.Done(): + } + // The found target may have initially been created by a FROM or a COPY; // however, if it is referenced a second time by a BUILD, it may contain items that // require a save (export to the local host) or a push @@ -275,6 +298,17 @@ func Earthfile2LLB(ctx context.Context, target domain.Target, opt ConvertOpt, in Visited: opt.Visited, }, nil } + opt.TargetInputHashStackSet[tiHash] = true + if opt.MainTargetDetailsFunc != nil { + err := opt.MainTargetDetailsFunc(TargetDetails{ + EarthlyOrgName: bc.EarthlyOrgName, + EarthlyProjectName: bc.EarthlyProjectName, + }) + if err != nil { + return nil, errors.Wrapf(err, "target details handler error: %v", err) + } + opt.MainTargetDetailsFunc = nil + } converter, err := NewConverter(ctx, targetWithMetadata, bc, sts, opt) if err != nil { return nil, err diff --git a/features/features.go b/features/features.go index 7b2f4cab..e49607aa 100644 --- a/features/features.go +++ b/features/features.go @@ -53,15 +53,16 @@ type Features struct { WaitBlock bool `long:"wait-block" description:"enable WITH/END feature, also allows RUN --push mixed with non-push commands"` // unreleased - NoUseRegistryForWithDocker bool `long:"no-use-registry-for-with-docker" description:"disable use-registry-for-with-docker"` - TryFinally bool `long:"try" description:"allow the use of the TRY/FINALLY commands"` - NoNetwork bool `long:"no-network" description:"allow the use of RUN --network=none commands"` - ArgScopeSet bool `long:"arg-scope-and-set" description:"enable SET to reassign ARGs and prevent ARGs from being redeclared in the same scope"` - EarthlyCIRunnerArg bool `long:"earthly-ci-runner-arg" description:"includes EARTHLY_CI_RUNNER ARG"` - UseDockerIgnore bool `long:"use-docker-ignore" description:"fallback to .dockerignore incase .earthlyignore or .earthignore do not exist in a local \"FROM DOCKERFILE\" target"` - PassArgs bool `long:"pass-args" description:"Allow the use of the --pass-arg flag in FROM, BUILD, COPY, WITH DOCKER, and DO commands"` - GlobalCache bool `long:"global-cache" description:"enable global caches (shared across different Earthfiles), for cache mounts and CACHEs having an ID"` - GitRefs bool `long:"git-refs" description:"includes EARTHLY_GIT_REFS ARG"` + NoUseRegistryForWithDocker bool `long:"no-use-registry-for-with-docker" description:"disable use-registry-for-with-docker"` + TryFinally bool `long:"try" description:"allow the use of the TRY/FINALLY commands"` + NoNetwork bool `long:"no-network" description:"allow the use of RUN --network=none commands"` + ArgScopeSet bool `long:"arg-scope-and-set" description:"enable SET to reassign ARGs and prevent ARGs from being redeclared in the same scope"` + EarthlyCIRunnerArg bool `long:"earthly-ci-runner-arg" description:"includes EARTHLY_CI_RUNNER ARG"` + UseDockerIgnore bool `long:"use-docker-ignore" description:"fallback to .dockerignore incase .earthlyignore or .earthignore do not exist in a local \"FROM DOCKERFILE\" target"` + PassArgs bool `long:"pass-args" description:"Allow the use of the --pass-arg flag in FROM, BUILD, COPY, WITH DOCKER, and DO commands"` + GlobalCache bool `long:"global-cache" description:"enable global caches (shared across different Earthfiles), for cache mounts and CACHEs having an ID"` + GitRefs bool `long:"git-refs" description:"includes EARTHLY_GIT_REFS ARG"` + UseVisitedUpfrontHashCollection bool `long:"use-visited-upfront-hash-collection" description:"Uses a new target visitor implementation that computes upfront the hash of the visited targets and adds support for running all targets with the same name but different args in parallel"` Major int Minor int diff --git a/states/dedup/targetinput.go b/states/dedup/targetinput.go index c5055908..00794f34 100644 --- a/states/dedup/targetinput.go +++ b/states/dedup/targetinput.go @@ -51,28 +51,6 @@ func (ti TargetInput) WithFilterBuildArgs(buildArgNames map[string]bool) TargetI return tiClone } -// Equals compares to another TargetInput for equality. -func (ti TargetInput) Equals(other TargetInput) bool { - if ti.TargetCanonical != other.TargetCanonical { - return false - } - if ti.Platform != other.Platform { - return false - } - if ti.AllowPrivileged != other.AllowPrivileged { - return false - } - if len(ti.BuildArgs) != len(other.BuildArgs) { - return false - } - for index := range ti.BuildArgs { - if !ti.BuildArgs[index].Equals(other.BuildArgs[index]) { - return false - } - } - return true -} - func (ti TargetInput) clone() TargetInput { tiCopy := TargetInput{ TargetCanonical: ti.TargetCanonical, @@ -139,7 +117,8 @@ type BuildArgInput struct { // ConstantValue is the constant value of this build arg. ConstantValue string `json:"constantValue"` // DefaultValue represents the default value of the build arg. - DefaultValue string `json:"defaultConstant"` + // Not used as part of the hashing. + DefaultValue string `json:"-"` } // IsDefaultValue returns whether the value of the BuildArgInput @@ -159,9 +138,8 @@ func (bai BuildArgInput) Equals(other BuildArgInput) bool { if bai.ConstantValue != other.ConstantValue { return false } - if bai.DefaultValue != other.DefaultValue { - return false - } + // Ignoring the default value comparison - it is not used for + // deduplication. return true } diff --git a/states/states.go b/states/states.go index ae3dde6e..8386ae89 100644 --- a/states/states.go +++ b/states/states.go @@ -20,7 +20,7 @@ type MultiTarget struct { // Visited represents the previously visited states, grouped by target // name. Duplicate targets are possible if same target is called with different // build args. - Visited *VisitedCollection + Visited VisitedCollection // Final is the main target to be built. Final *SingleTarget } @@ -108,6 +108,11 @@ func newSingleTarget(ctx context.Context, target domain.Target, platr *platutil. doneCh: make(chan struct{}), incomingNewSubscriptions: make(chan string, 1024), } + sts.addOverridingVarsAsBuildArgInputs(overridingVars) + if parentDepSub == nil { + // New simplified algorithm. + return sts, nil + } // Consume all items from the parent subscription before returning control. OuterLoop: for { @@ -221,17 +226,6 @@ func (sts *SingleTarget) AddBuildArgInput(bai dedup.BuildArgInput) { sts.targetInput = sts.targetInput.WithBuildArgInput(bai) } -// AddOverridingVarsAsBuildArgInputs adds some vars to the sts's target input. -func (sts *SingleTarget) AddOverridingVarsAsBuildArgInputs(overridingVars *variables.Scope) { - sts.tiMu.Lock() - defer sts.tiMu.Unlock() - for _, key := range overridingVars.Sorted() { - ovVar, _ := overridingVars.Get(key) - sts.targetInput = sts.targetInput.WithBuildArgInput( - dedup.BuildArgInput{ConstantValue: ovVar, Name: key}) - } -} - // LastSaveImage returns the last save image available (if any). func (sts *SingleTarget) LastSaveImage() SaveImage { if len(sts.SaveImages) == 0 { @@ -297,6 +291,16 @@ func (sts *SingleTarget) Done() chan struct{} { return sts.doneCh } +func (sts *SingleTarget) addOverridingVarsAsBuildArgInputs(overridingVars *variables.Scope) { + sts.tiMu.Lock() + defer sts.tiMu.Unlock() + for _, key := range overridingVars.Sorted() { + ovVar, _ := overridingVars.Get(key) + sts.targetInput = sts.targetInput.WithBuildArgInput( + dedup.BuildArgInput{ConstantValue: ovVar, Name: key}) + } +} + // SaveLocal is an artifact path to be saved to local disk. type SaveLocal struct { // DestPath is the local dest path to copy the artifact to. diff --git a/states/visited-legacy.go b/states/visited-legacy.go new file mode 100644 index 00000000..21ec72f8 --- /dev/null +++ b/states/visited-legacy.go @@ -0,0 +1,161 @@ +package states + +import ( + "context" + "sync" + + "github.com/earthly/earthly/domain" + "github.com/earthly/earthly/states/dedup" + "github.com/earthly/earthly/util/platutil" + "github.com/earthly/earthly/variables" + "github.com/pkg/errors" +) + +// legacyVisitedCollection is a collection of visited targets. +type legacyVisitedCollection struct { + mu sync.Mutex + visited map[string][]*SingleTarget // targetStr -> sts list + // Same collection as above, but as a list, to make the ordering consistent. + visitedList []*SingleTarget +} + +// NewLegacyVisitedCollection returns a collection of visited targets. +func NewLegacyVisitedCollection() VisitedCollection { + return &legacyVisitedCollection{ + visited: make(map[string][]*SingleTarget), + } +} + +// All returns all visited items. +func (vc *legacyVisitedCollection) All() []*SingleTarget { + vc.mu.Lock() + defer vc.mu.Unlock() + return append([]*SingleTarget{}, vc.visitedList...) +} + +// Add adds a target to the collection, if it hasn't yet been visited. The returned sts is +// either the previously visited one or a brand new one. +func (vc *legacyVisitedCollection) Add(ctx context.Context, target domain.Target, platr *platutil.Resolver, allowPrivileged bool, overridingVars *variables.Scope, parentDepSub chan string) (*SingleTarget, bool, error) { + dependents, err := vc.waitAllDoneAndLock(ctx, target, parentDepSub) + if err != nil { + return nil, false, err + } + // Put the deps back into the channel for the new sts to consume. + for depID := range dependents { + parentDepSub <- depID + } + defer vc.mu.Unlock() + for _, sts := range vc.visited[target.StringCanonical()] { + same, err := compareTargetInputs(target, platr, allowPrivileged, overridingVars, sts.TargetInput()) + if err != nil { + return nil, false, err + } + if same { + // Existing sts. + if dependents[sts.ID] { + // Infinite recursion. The previously visited sts is a dependent of us. + return nil, false, errors.Errorf( + "infinite recursion detected for target %s", target.String()) + } + // If it's not a dependent, then it *has* to be done at this point. + // Sanity check. + select { + case <-sts.Done(): + default: + panic("same sts but not done") + } + // Subscribe that sts to the dependencies of our parent. + sts.MonitorDependencySubscription(ctx, parentDepSub) + return sts, true, nil + } + } + // None are the same. Create new sts. + sts, err := newSingleTarget(ctx, target, platr, allowPrivileged, overridingVars, parentDepSub) + if err != nil { + return nil, false, err + } + targetStr := target.StringCanonical() + vc.visited[targetStr] = append(vc.visited[targetStr], sts) + vc.visitedList = append(vc.visitedList, sts) + return sts, false, nil +} + +// waitAllDoneAndLock acquires mu at a point when all sts are done for a particular +// target, allowing for comparisons across the board while the lock is held. +func (vc *legacyVisitedCollection) waitAllDoneAndLock(ctx context.Context, target domain.Target, parentDepSub chan string) (map[string]bool, error) { + // Build up dependents from parentDepSub. The list needs to be complete when returning + // from this function for proper infinite loop detection. + dependents := make(map[string]bool) + // wait all done & lock loop + prevLenList := 0 + for { + vc.mu.Lock() + list := append([]*SingleTarget{}, vc.visited[target.StringCanonical()]...) + if prevLenList == len(list) { + // The list we have now is the same we just checked if it's done or waiting on us. + // We are finished. + return dependents, nil // no unlocking on purpose + } + prevLenList = len(list) + vc.mu.Unlock() + // Wait for sts's to be done outside of the mu lock. + stsLoop: + for _, sts := range list { + if dependents[sts.ID] { + // No need to wait if it's a dependent, because the sts is waiting on us. + // It's safe to perform comparison if they are waiting on us. + continue + } + for { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case newID := <-parentDepSub: + dependents[newID] = true + if newID == sts.ID { + // Just the one we were waiting for. It seems that it is + // now waiting on us. + continue stsLoop + } + case <-sts.Done(): + continue stsLoop + } + } + } + } +} + +// compareTargetInputs compares two targets and their inputs to check if they are the same. +func compareTargetInputs(target domain.Target, platr *platutil.Resolver, allowPrivileged bool, overridingVars *variables.Scope, other dedup.TargetInput) (bool, error) { + if target.StringCanonical() != other.TargetCanonical { + return false, nil + } + if allowPrivileged != other.AllowPrivileged { + return false, nil + } + stsPlat, err := platr.ParseAllowNativeAndUser(other.Platform) + if err != nil { + return false, err + } + if !platr.PlatformEquals(platr.Current(), stsPlat) { + return false, nil + } + for _, bai := range other.BuildArgs { + variable, found := overridingVars.Get(bai.Name) + if found { + baiVariable := dedup.BuildArgInput{ + Name: bai.Name, + DefaultValue: bai.DefaultValue, + ConstantValue: variable, + } + if !baiVariable.Equals(bai) { + return false, nil + } + } else { + if !bai.IsDefaultValue() { + return false, nil + } + } + } + return true, nil +} diff --git a/states/visited-upfront-hash.go b/states/visited-upfront-hash.go new file mode 100644 index 00000000..700bc1e2 --- /dev/null +++ b/states/visited-upfront-hash.go @@ -0,0 +1,56 @@ +package states + +import ( + "context" + "sync" + + "github.com/earthly/earthly/domain" + "github.com/earthly/earthly/util/platutil" + "github.com/earthly/earthly/variables" +) + +// visitedUpfrontHashCollection is a collection of visited targets. +type visitedUpfrontHashCollection struct { + mu sync.Mutex + visited map[string]*SingleTarget // targetInputHash -> sts + // visitedList is the same collection as above, but as a list, + // to make the ordering consistent. + visitedList []*SingleTarget +} + +// NewVisitedUpfrontHashCollection returns a collection of visited targets. +func NewVisitedUpfrontHashCollection() VisitedCollection { + return &visitedUpfrontHashCollection{ + visited: make(map[string]*SingleTarget), + } +} + +// All returns all visited items. +func (vc *visitedUpfrontHashCollection) All() []*SingleTarget { + vc.mu.Lock() + defer vc.mu.Unlock() + return append([]*SingleTarget{}, vc.visitedList...) +} + +// Add adds a target to the collection, if it hasn't yet been visited. The returned sts is +// either the previously visited one or a brand new one. +func (vc *visitedUpfrontHashCollection) Add(ctx context.Context, target domain.Target, platr *platutil.Resolver, allowPrivileged bool, overridingVars *variables.Scope, parentDepSub chan string) (*SingleTarget, bool, error) { + // Constructing a new sts early to be able to compute its target input hash. + newSts, err := newSingleTarget(ctx, target, platr, allowPrivileged, overridingVars, nil) + if err != nil { + return nil, false, err + } + newKey, err := newSts.targetInput.Hash() + if err != nil { + return nil, false, err + } + vc.mu.Lock() + defer vc.mu.Unlock() + sts, found := vc.visited[newKey] + if found { + return sts, true, nil + } + vc.visited[newKey] = newSts + vc.visitedList = append(vc.visitedList, newSts) + return newSts, false, nil +} diff --git a/states/visited.go b/states/visited.go index 7fee27fa..e3fba7c1 100644 --- a/states/visited.go +++ b/states/visited.go @@ -2,160 +2,17 @@ package states import ( "context" - "sync" "github.com/earthly/earthly/domain" - "github.com/earthly/earthly/states/dedup" "github.com/earthly/earthly/util/platutil" "github.com/earthly/earthly/variables" - "github.com/pkg/errors" ) -// VisitedCollection is a collection of visited targets. -type VisitedCollection struct { - mu sync.Mutex - visited map[string][]*SingleTarget // targetStr -> sts list - // Same collection as above, but as a list, to make the ordering consistent. - visitedList []*SingleTarget -} - -// NewVisitedCollection returns a collection of visited targets. -func NewVisitedCollection() *VisitedCollection { - return &VisitedCollection{ - visited: make(map[string][]*SingleTarget), - } -} - -// All returns all visited items. -func (vc *VisitedCollection) All() []*SingleTarget { - vc.mu.Lock() - defer vc.mu.Unlock() - return append([]*SingleTarget{}, vc.visitedList...) -} - -// Add adds a target to the collection, if it hasn't yet been visited. The returned sts is -// either the previously visited one or a brand new one. -func (vc *VisitedCollection) Add(ctx context.Context, target domain.Target, platr *platutil.Resolver, allowPrivileged bool, overridingVars *variables.Scope, parentDepSub chan string) (*SingleTarget, bool, error) { - dependents, err := vc.waitAllDoneAndLock(ctx, target, parentDepSub) - if err != nil { - return nil, false, err - } - // Put the deps back into the channel for the new sts to consume. - for depID := range dependents { - parentDepSub <- depID - } - defer vc.mu.Unlock() - for _, sts := range vc.visited[target.StringCanonical()] { - same, err := CompareTargetInputs(target, platr, allowPrivileged, overridingVars, sts.TargetInput()) - if err != nil { - return nil, false, err - } - if same { - // Existing sts. - if dependents[sts.ID] { - // Infinite recursion. The previously visited sts is a dependent of us. - return nil, false, errors.Errorf( - "infinite recursion detected for target %s", target.String()) - } - // If it's not a dependent, then it *has* to be done at this point. - // Sanity check. - select { - case <-sts.Done(): - default: - panic("same sts but not done") - } - // Subscribe that sts to the dependencies of our parent. - sts.MonitorDependencySubscription(ctx, parentDepSub) - return sts, true, nil - } - } - // None are the same. Create new sts. - sts, err := newSingleTarget(ctx, target, platr, allowPrivileged, overridingVars, parentDepSub) - if err != nil { - return nil, false, err - } - targetStr := target.StringCanonical() - vc.visited[targetStr] = append(vc.visited[targetStr], sts) - vc.visitedList = append(vc.visitedList, sts) - return sts, false, nil -} - -// waitAllDoneAndLock acquires mu at a point when all sts are done for a particular -// target, allowing for comparisons across the board while the lock is held. -func (vc *VisitedCollection) waitAllDoneAndLock(ctx context.Context, target domain.Target, parentDepSub chan string) (map[string]bool, error) { - // Build up dependents from parentDepSub. The list needs to be complete when returning - // from this function for proper infinite loop detection. - dependents := make(map[string]bool) - // wait all done & lock loop - prevLenList := 0 - for { - vc.mu.Lock() - list := append([]*SingleTarget{}, vc.visited[target.StringCanonical()]...) - if prevLenList == len(list) { - // The list we have now is the same we just checked if it's done or waiting on us. - // We are finished. - return dependents, nil // no unlocking on purpose - } - prevLenList = len(list) - vc.mu.Unlock() - // Wait for sts's to be done outside of the mu lock. - stsLoop: - for _, sts := range list { - if dependents[sts.ID] { - // No need to wait if it's a dependent, because the sts is waiting on us. - // It's safe to perform comparison if they are waiting on us. - continue - } - for { - select { - case <-ctx.Done(): - return nil, ctx.Err() - case newID := <-parentDepSub: - dependents[newID] = true - if newID == sts.ID { - // Just the one we were waiting for. It seems that it is - // now waiting on us. - continue stsLoop - } - case <-sts.Done(): - continue stsLoop - } - } - } - } -} - -// CompareTargetInputs compares two targets and their inputs to check if they are the same. -func CompareTargetInputs(target domain.Target, platr *platutil.Resolver, allowPrivileged bool, overridingVars *variables.Scope, other dedup.TargetInput) (bool, error) { - if target.StringCanonical() != other.TargetCanonical { - return false, nil - } - if allowPrivileged != other.AllowPrivileged { - return false, nil - } - stsPlat, err := platr.ParseAllowNativeAndUser(other.Platform) - if err != nil { - return false, err - } - if !platr.PlatformEquals(platr.Current(), stsPlat) { - return false, nil - } - for _, bai := range other.BuildArgs { - variable, found := overridingVars.Get(bai.Name) - if found { - baiVariable := dedup.BuildArgInput{ - Name: bai.Name, - DefaultValue: bai.DefaultValue, - ConstantValue: variable, - } - if !baiVariable.Equals(bai) { - return false, nil - } - } else { - if !bai.IsDefaultValue() { - return false, nil - } - } - } - return true, nil +// VisitedCollection represents a collection of visited targets. +type VisitedCollection interface { + // All returns all visited items. + All() []*SingleTarget + // Add adds a target to the collection, if it hasn't yet been visited. The returned sts is + // either the previously visited one or a brand new one. + Add(ctx context.Context, target domain.Target, platr *platutil.Resolver, allowPrivileged bool, overridingVars *variables.Scope, parentDepSub chan string) (*SingleTarget, bool, error) } diff --git a/tests/Earthfile b/tests/Earthfile index 4e80c2ab..76927562 100644 --- a/tests/Earthfile +++ b/tests/Earthfile @@ -101,7 +101,6 @@ ga-no-qemu-quick: BUILD +true-false-flag-invalid BUILD +dont-save-indirect-remote-artifact BUILD +remote-earthfile-must-have-version - BUILD +sequential-locally-test BUILD +version-flag-test BUILD +implicit-ignores BUILD +help @@ -123,6 +122,7 @@ ga-no-qemu-quick: BUILD +test-cache-mount-mode BUILD +test-cache-mode BUILD +test-shared-cache + BUILD +test-visited-upfront-hash-collection # Forcing the implicit global wait/end block, causes some tests, which rely # on the ability to have two different targets issue the same SAVE IMAGE tag name @@ -1142,42 +1142,6 @@ true-false-flag-invalid: RUN cat output3.txt RUN cat output3.txt | grep 'invalid argument for flag .*--no-cache.*expected bool' -sequential-locally-test: - DO +RUN_EARTHLY --earthfile=sequential-locally.earth --use_tmpfs=false --target="+run-lots" --post_command="2>output.txt" - RUN cat output.txt | grep -vw RUN | grep -vw char | grep -o '\(start\|mid\|end\)\s[a-d]$' > output-filtered.txt - RUN echo "set -e -expectedmode=\"start\" -expectedchar=\"?\" -while read line; do - mode=\$(echo \"\$line\" | cut -d \" \" -f1) - char=\$(echo \"\$line\" | cut -d \" \" -f2) - echo \"mode=\$mode char=\$char\" - case \"\$mode\" in - start) - test \"\$expectedmode\" = \"start\" || (echo \"expected \$expectedmode; got \$mode\" && exit 1) - expectedmode=\"mid\" - expectedchar=\"$char\" - ;; - mid) - test \"\$expectedmode\" = \"mid\" || (echo \"expected \$expectedmode; got \$mode\" && exit 1) - test \"\$expectedchar\" = \"$char\" || (echo \"expected \$expectedchar; got \$char\" && exit 1) - # we don't change expectedmode to end, because we can have multiple 'mid's - ;; - end) - test \"\$expectedmode\" = \"mid\" || (echo \"expected \$expectedmode; got \$mode\" && exit 1) - test \"\$expectedchar\" = \"$char\" || (echo \"expected \$expectedchar; got \$char\" && exit 1) - expectedmode=\"start\" - expectedchar=\"?\" - ;; - *) - echo unhandled mode: $mode - exit 1 - esac -done -echo test passed: RUNs were sequentially grouped -" > test-output.sh && chmod +x test-output.sh - RUN cat output-filtered.txt | ./test-output.sh - version-flag-test: # In addition to testing the --version flag works; this flag is critical for the homebrew deployment test, which # performs an assert in https://github.com/earthly/homebrew-earthly/blob/main/Formula/earthly.rb @@ -1473,6 +1437,21 @@ test-shared-cache: --extra_args=" --secret=content=filecontents " \ --output_contains="filecontents" +test-visited-upfront-hash-collection: + DO +RUN_EARTHLY \ + --earthfile=visited-upfront-hash-collection.earth \ + --target=+parallel + RUN order=$(cat earthly.output | grep -Eo "### [[:digit:]]+" | cut -d' ' -f2- | tr '\n' ' ' | tr -d '[:space:]'); \ + echo "order: $order"; \ + # This regex matches successions of number trios like: 222111444333555, happening when the builds didn't run in parallel + match=$(echo $order | sed -nr '/^([[:digit:]])\1\1([[:digit:]])\2\2([[:digit:]])\3\3([[:digit:]])\4\4([[:digit:]])\5\5$/p'); \ + if [[ $match ]]; then \ + echo "no parallelization observed"; \ + exit 1; \ + else \ + echo "parallelization checked"; \ + fi + RUN_EARTHLY: COMMAND ARG earthfile= diff --git a/tests/sequential-locally.earth b/tests/sequential-locally.earth deleted file mode 100644 index 84f9fc33..00000000 --- a/tests/sequential-locally.earth +++ /dev/null @@ -1,33 +0,0 @@ -VERSION 0.7 -lots: - LOCALLY - ARG char - RUN echo "start $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "mid $char" - RUN echo "end $char" - -run-lots: - BUILD +lots \ - --char=a \ - --char=b \ - --char=c \ - --char=d diff --git a/tests/visited-upfront-hash-collection.earth b/tests/visited-upfront-hash-collection.earth new file mode 100644 index 00000000..cacaeb83 --- /dev/null +++ b/tests/visited-upfront-hash-collection.earth @@ -0,0 +1,20 @@ +VERSION --use-visited-upfront-hash-collection 0.7 + +FROM earthly/dind:alpine + +parallel: + BUILD +test-executor --INDEX=1 + BUILD +test-executor --INDEX=2 + BUILD +test-executor --INDEX=3 + BUILD +test-executor --INDEX=4 + BUILD +test-executor --INDEX=5 + +test-executor: + ARG INDEX + RUN --no-cache for i in $(seq 1 3); do \ + echo "### $INDEX"; \ + sleep 0.3; \ + done; + IF true + RUN true + END