From 2cfdeb66d597cae9e639425b908bc690e540cf88 Mon Sep 17 00:00:00 2001 From: Andrey Date: Wed, 4 Sep 2024 15:14:51 +0200 Subject: [PATCH] add a first version of retry strategy for new tests --- lib/datadog/ci/test_retries/component.rb | 30 ++++++++++++-- lib/datadog/ci/test_retries/strategy/base.rb | 4 ++ .../ci/test_retries/strategy/retry_new.rb | 41 +++++++++++++++++++ sig/datadog/ci/test_retries/component.rbs | 6 ++- sig/datadog/ci/test_retries/strategy/base.rbs | 2 + .../ci/test_retries/strategy/retry_new.rbs | 16 ++++++++ .../datadog/ci/test_retries/component_spec.rb | 6 ++- 7 files changed, 98 insertions(+), 7 deletions(-) create mode 100644 lib/datadog/ci/test_retries/strategy/retry_new.rb create mode 100644 sig/datadog/ci/test_retries/strategy/retry_new.rbs diff --git a/lib/datadog/ci/test_retries/component.rb b/lib/datadog/ci/test_retries/component.rb index 23ef9328..286461f2 100644 --- a/lib/datadog/ci/test_retries/component.rb +++ b/lib/datadog/ci/test_retries/component.rb @@ -2,6 +2,7 @@ require_relative "strategy/no_retry" require_relative "strategy/retry_failed" +require_relative "strategy/retry_new" require_relative "../ext/telemetry" require_relative "../utils/telemetry" @@ -17,7 +18,8 @@ class Component attr_reader :retry_failed_tests_enabled, :retry_failed_tests_max_attempts, :retry_failed_tests_total_limit, :retry_failed_tests_count, - :retry_new_tests_enabled, :retry_new_tests_duration_thresholds, :retry_new_tests_percentage_limit, + :retry_new_tests_enabled, :retry_new_tests_duration_thresholds, + :retry_new_tests_percentage_limit, :retry_new_tests_unique_tests_set, :retry_new_tests_fault_reason def initialize( @@ -83,7 +85,11 @@ def with_retries(&block) def build_strategy(test_span) @mutex.synchronize do - if should_retry_failed_test?(test_span) + if should_retry_new_test?(test_span) + Datadog.logger.debug("New test retry starts") + + Strategy::RetryNew.new(duration_thresholds: @retry_new_tests_duration_thresholds) + elsif should_retry_failed_test?(test_span) Datadog.logger.debug("Failed test retry starts") @retry_failed_tests_count += 1 @@ -105,7 +111,7 @@ def record_test_finished(test_span) end def record_test_span_duration(tracer_span) - # noop + current_retry_strategy&.record_duration(tracer_span.duration) end private @@ -119,12 +125,28 @@ def current_retry_strategy=(strategy) end def should_retry_failed_test?(test_span) - @retry_failed_tests_enabled && !!test_span&.failed? && @retry_failed_tests_count < @retry_failed_tests_total_limit + if @retry_failed_tests_count >= @retry_failed_tests_total_limit + @retry_failed_tests_enabled = false + end + + @retry_failed_tests_enabled && !!test_span&.failed? + end + + def should_retry_new_test?(test_span) + # TODO: check if EFD is faulty here + + @retry_new_tests_enabled && is_new_test?(test_span) end def test_visibility_component Datadog.send(:components).test_visibility end + + def is_new_test?(test_span) + test_id = Utils::TestRun.datadog_test_id(test_span.name, test_span.test_suite_name) + + @retry_new_tests_unique_tests_set.include?(test_id) + end end end end diff --git a/lib/datadog/ci/test_retries/strategy/base.rb b/lib/datadog/ci/test_retries/strategy/base.rb index 3c69fd84..e91c6115 100644 --- a/lib/datadog/ci/test_retries/strategy/base.rb +++ b/lib/datadog/ci/test_retries/strategy/base.rb @@ -12,6 +12,10 @@ def should_retry? def record_retry(test_span) test_span&.set_tag(Ext::Test::TAG_IS_RETRY, "true") end + + # duration in float seconds + def record_duration(duration) + end end end end diff --git a/lib/datadog/ci/test_retries/strategy/retry_new.rb b/lib/datadog/ci/test_retries/strategy/retry_new.rb new file mode 100644 index 00000000..806fbb04 --- /dev/null +++ b/lib/datadog/ci/test_retries/strategy/retry_new.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require_relative "base" + +require_relative "../../ext/test" + +module Datadog + module CI + module TestRetries + module Strategy + # retry every new test up to 10 times (early flake detection) + class RetryNew < Base + def initialize(duration_thresholds:) + @duration_thresholds = duration_thresholds + @attempts = 0 + # will be changed based on test span duration + @max_attempts = 10 + end + + def should_retry? + @attempts < @max_attempts + end + + def record_retry(test_span) + super + + @attempts += 1 + + Datadog.logger.debug { "Retry Attempts [#{@attempts} / #{@max_attempts}]" } + end + + def record_duration(duration) + @max_attempts = @duration_thresholds.max_attempts_for_duration(duration) + + Datadog.logger.debug { "Recorded test duration of [#{@duration}], new Max Attempts value is [#{@max_attempts}]" } + end + end + end + end + end +end diff --git a/sig/datadog/ci/test_retries/component.rbs b/sig/datadog/ci/test_retries/component.rbs index 56704119..d820a09a 100644 --- a/sig/datadog/ci/test_retries/component.rbs +++ b/sig/datadog/ci/test_retries/component.rbs @@ -14,7 +14,7 @@ module Datadog attr_reader retry_new_tests_enabled: bool - attr_reader retry_new_tests_duration_thresholds: Datadog::CI::Remote::SlowTestRetries? + attr_reader retry_new_tests_duration_thresholds: Datadog::CI::Remote::SlowTestRetries attr_reader retry_new_tests_percentage_limit: Integer @@ -46,6 +46,10 @@ module Datadog def should_retry_failed_test?: (Datadog::CI::Test test) -> bool + def should_retry_new_test?: (Datadog::CI::Test test) -> bool + + def is_new_test?: (Datadog::CI::Test test) -> bool + def test_visibility_component: () -> Datadog::CI::TestVisibility::Component end end diff --git a/sig/datadog/ci/test_retries/strategy/base.rbs b/sig/datadog/ci/test_retries/strategy/base.rbs index 8440b3e3..af71436a 100644 --- a/sig/datadog/ci/test_retries/strategy/base.rbs +++ b/sig/datadog/ci/test_retries/strategy/base.rbs @@ -6,6 +6,8 @@ module Datadog def should_retry?: () -> bool def record_retry: (Datadog::CI::Test test_span) -> void + + def record_duration: (Float duration) -> void end end end diff --git a/sig/datadog/ci/test_retries/strategy/retry_new.rbs b/sig/datadog/ci/test_retries/strategy/retry_new.rbs new file mode 100644 index 00000000..6968b3ce --- /dev/null +++ b/sig/datadog/ci/test_retries/strategy/retry_new.rbs @@ -0,0 +1,16 @@ +module Datadog + module CI + module TestRetries + module Strategy + class RetryNew < Base + @duration_thresholds: Datadog::CI::Remote::SlowTestRetries + + @attempts: Integer + @max_attempts: Integer + + def initialize: (duration_thresholds: Datadog::CI::Remote::SlowTestRetries) -> void + end + end + end + end +end diff --git a/spec/datadog/ci/test_retries/component_spec.rb b/spec/datadog/ci/test_retries/component_spec.rb index 3a346eba..eb78da29 100644 --- a/spec/datadog/ci/test_retries/component_spec.rb +++ b/spec/datadog/ci/test_retries/component_spec.rb @@ -152,7 +152,7 @@ subject { component.build_strategy(test_span) } let(:test_failed) { false } - let(:test_span) { instance_double(Datadog::CI::Test, failed?: test_failed) } + let(:test_span) { instance_double(Datadog::CI::Test, failed?: test_failed, name: "test", test_suite_name: "suite") } before do component.configure(library_settings, test_session) @@ -227,7 +227,9 @@ set_tag: true, get_tag: true, skipped?: false, - type: "test" + type: "test", + name: "mytest", + test_suite_name: "mysuite" ) end