From 74e72fe37aa9da8020d272e742d4bf48c79a59c8 Mon Sep 17 00:00:00 2001 From: Andrey Date: Mon, 16 Sep 2024 16:08:51 +0200 Subject: [PATCH] create test retries strategies and move component's logic there (they are not actually used at this point) --- lib/datadog/ci/test_retries/component.rb | 3 +- .../ci/test_retries/driver/retry_new.rb | 6 +- lib/datadog/ci/test_retries/strategy/base.rb | 26 ++++ .../ci/test_retries/strategy/no_retry.rb | 12 ++ .../ci/test_retries/strategy/retry_failed.rb | 52 ++++++++ .../ci/test_retries/strategy/retry_new.rb | 123 ++++++++++++++++++ .../ci/test_retries/driver/retry_new.rbs | 4 +- sig/datadog/ci/test_retries/strategy/base.rbs | 15 +++ .../ci/test_retries/strategy/no_retry.rbs | 10 ++ .../ci/test_retries/strategy/retry_failed.rbs | 33 +++++ .../ci/test_retries/strategy/retry_new.rbs | 47 +++++++ .../ci/test_retries/driver/retry_new_spec.rb | 4 +- 12 files changed, 326 insertions(+), 9 deletions(-) create mode 100644 lib/datadog/ci/test_retries/strategy/base.rb create mode 100644 lib/datadog/ci/test_retries/strategy/no_retry.rb create mode 100644 lib/datadog/ci/test_retries/strategy/retry_failed.rb create mode 100644 lib/datadog/ci/test_retries/strategy/retry_new.rb create mode 100644 sig/datadog/ci/test_retries/strategy/base.rbs create mode 100644 sig/datadog/ci/test_retries/strategy/no_retry.rbs create mode 100644 sig/datadog/ci/test_retries/strategy/retry_failed.rbs create mode 100644 sig/datadog/ci/test_retries/strategy/retry_new.rbs diff --git a/lib/datadog/ci/test_retries/component.rb b/lib/datadog/ci/test_retries/component.rb index bb34fcc3..e2daea63 100644 --- a/lib/datadog/ci/test_retries/component.rb +++ b/lib/datadog/ci/test_retries/component.rb @@ -39,7 +39,6 @@ def initialize( @retry_failed_tests_count = 0 @retry_new_tests_enabled = retry_new_tests_enabled - @retry_new_tests_duration_thresholds = nil @retry_new_tests_unique_tests_set = Set.new @unique_tests_client = unique_tests_client # total maximum number of new tests to retry (will be set based on the total number of tests in the session) @@ -120,7 +119,7 @@ def build_strategy(test_span) end @retry_new_tests_count += 1 - Driver::RetryNew.new(test_span, duration_thresholds: @retry_new_tests_duration_thresholds) + Driver::RetryNew.new(test_span, max_attempts_thresholds: @retry_new_tests_duration_thresholds) elsif should_retry_failed_test?(test_span) Datadog.logger.debug do "#{test_span.name} failed, will be retried" diff --git a/lib/datadog/ci/test_retries/driver/retry_new.rb b/lib/datadog/ci/test_retries/driver/retry_new.rb index 3acd4a5d..4b5553ca 100644 --- a/lib/datadog/ci/test_retries/driver/retry_new.rb +++ b/lib/datadog/ci/test_retries/driver/retry_new.rb @@ -10,8 +10,8 @@ module TestRetries module Driver # retry every new test up to 10 times (early flake detection) class RetryNew < Base - def initialize(test_span, duration_thresholds:) - @duration_thresholds = duration_thresholds + def initialize(test_span, max_attempts_thresholds:) + @max_attempts_thresholds = max_attempts_thresholds @attempts = 0 # will be changed based on test span duration @max_attempts = 10 @@ -33,7 +33,7 @@ def record_retry(test_span) end def record_duration(duration) - @max_attempts = @duration_thresholds.max_attempts_for_duration(duration) + @max_attempts = @max_attempts_thresholds.max_attempts_for_duration(duration) Datadog.logger.debug { "Recorded test duration of [#{duration}], new Max Attempts value is [#{@max_attempts}]" } end diff --git a/lib/datadog/ci/test_retries/strategy/base.rb b/lib/datadog/ci/test_retries/strategy/base.rb new file mode 100644 index 00000000..b103ef76 --- /dev/null +++ b/lib/datadog/ci/test_retries/strategy/base.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +require_relative "../driver/no_retry" + +module Datadog + module CI + module TestRetries + module Strategy + # Strategies are mojor subcomponents of the retry mechanism. They are responsible for + # determining which tests should be retried and how. + class Base + def covers?(test_span) + false + end + + def configure(_library_settings, _test_session) + end + + def build_driver(_test_span) + Driver::NoRetry.new + end + end + end + end + end +end diff --git a/lib/datadog/ci/test_retries/strategy/no_retry.rb b/lib/datadog/ci/test_retries/strategy/no_retry.rb new file mode 100644 index 00000000..81fe8622 --- /dev/null +++ b/lib/datadog/ci/test_retries/strategy/no_retry.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +module Datadog + module CI + module TestRetries + module Strategy + class NoRetry < Base + end + end + end + end +end diff --git a/lib/datadog/ci/test_retries/strategy/retry_failed.rb b/lib/datadog/ci/test_retries/strategy/retry_failed.rb new file mode 100644 index 00000000..dd78e6f3 --- /dev/null +++ b/lib/datadog/ci/test_retries/strategy/retry_failed.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require_relative "../driver/no_retry" + +module Datadog + module CI + module TestRetries + module Strategy + class RetryFailed < Base + attr_reader :enabled, :max_attempts, + :total_limit, :retried_count + + def initialize( + enabled:, + max_attempts:, + total_limit: + ) + @enabled = enabled + @max_attempts = max_attempts + @total_limit = total_limit + @retried_count = 0 + end + + def covers?(test_span) + return false unless @enabled + + if @retried_count >= @total_limit + Datadog.logger.debug do + "Retry failed tests limit reached: [#{@retried_count}] out of [#{@total_limit}]" + end + @enabled = false + end + + @enabled && !!test_span&.failed? + end + + def configure(library_settings, test_session) + @enabled &&= library_settings.flaky_test_retries_enabled? + end + + def build_driver(test_span) + Datadog.logger.debug { "#{test_span.name} failed, will be retried" } + + @retried_count += 1 + + Driver::RetryFailed.new(max_attempts: max_attempts) + end + end + end + end + end +end diff --git a/lib/datadog/ci/test_retries/strategy/retry_new.rb b/lib/datadog/ci/test_retries/strategy/retry_new.rb new file mode 100644 index 00000000..1201300a --- /dev/null +++ b/lib/datadog/ci/test_retries/strategy/retry_new.rb @@ -0,0 +1,123 @@ +# frozen_string_literal: true + +require_relative "../driver/no_retry" + +module Datadog + module CI + module TestRetries + module Strategy + class RetryNew < Base + DEFAULT_TOTAL_TESTS_COUNT = 100 + + attr_reader :enabled, :max_attempts_thresholds, :unique_tests_set, :total_limit, :retried_count + + def initialize( + enabled:, + unique_tests_client: + ) + @enabled = enabled + @unique_tests_set = Set.new + # total maximum number of new tests to retry (will be set based on the total number of tests in the session) + @total_limit = 0 + @retried_count = 0 + + @unique_tests_client = unique_tests_client + end + + def covers?(test_span) + return false unless @enabled + + if @retried_count >= @total_limit + Datadog.logger.debug do + "Retry new tests limit reached: [#{@retried_count}] out of [#{@total_limit}]" + end + @enabled = false + mark_test_session_faulty(Datadog::CI.active_test_session) + end + + @enabled && !test_span.skipped? && is_new_test?(test_span) + end + + def configure(library_settings, test_session) + @enabled &&= library_settings.early_flake_detection_enabled? + + return unless @enabled + + # mark early flake detection enabled for test session + test_session.set_tag(Ext::Test::TAG_EARLY_FLAKE_ENABLED, "true") + + # parse the max attempts threshlods for each test duration + @max_attempts_thresholds = library_settings.slow_test_retries + Datadog.logger.debug do + "Slow test retries thresholds: #{@max_attempts_thresholds.entries}" + end + + # calculate what would be the total limit of new tests to retry + percentage_limit = library_settings.faulty_session_threshold + tests_count = test_session.total_tests_count.to_i + if tests_count.zero? + Datadog.logger.debug do + "Total tests count is zero, using default value for the total number of tests: [#{DEFAULT_TOTAL_TESTS_COUNT}]" + end + + tests_count = DEFAULT_TOTAL_TESTS_COUNT + end + @total_limit = (tests_count * percentage_limit / 100.0).ceil + Datadog.logger.debug do + "Retry new tests total limit is [#{@total_limit}] (#{percentage_limit}%) of #{tests_count}" + end + + # fetch a set of known unique tests + @unique_tests_set = @unique_tests_client.fetch_unique_tests(test_session) + if @unique_tests_set.empty? + @enabled = false + mark_test_session_faulty(test_session) + + Datadog.logger.warn( + "Disabling early flake detection because there are no known tests (possible reason: no test runs in default branch)" + ) + end + + # report how many unique tests were found + Datadog.logger.debug do + "Found [#{@unique_tests_set.size}] known unique tests" + end + Utils::Telemetry.distribution( + Ext::Telemetry::METRIC_EFD_UNIQUE_TESTS_RESPONSE_TESTS, + @unique_tests_set.size.to_f + ) + end + + def build_driver(test_span) + Datadog.logger.debug do + "#{test_span.name} is new, will be retried" + end + @retried_count += 1 + + Driver::RetryNew.new(test_span, max_attempts_thresholds: @max_attempts_thresholds) + end + + private + + def mark_test_session_faulty(test_session) + test_session&.set_tag(Ext::Test::TAG_EARLY_FLAKE_ABORT_REASON, Ext::Test::EARLY_FLAKE_FAULTY) + end + + def is_new_test?(test_span) + test_id = Utils::TestRun.datadog_test_id(test_span.name, test_span.test_suite_name) + + result = !@retry_new_tests_unique_tests_set.include?(test_id) + + if result + Datadog.logger.debug do + "#{test_id} is not found in the unique tests set, it is a new test" + end + end + + result + end + end + end + end + end +end diff --git a/sig/datadog/ci/test_retries/driver/retry_new.rbs b/sig/datadog/ci/test_retries/driver/retry_new.rbs index 9136cd6b..650019e0 100644 --- a/sig/datadog/ci/test_retries/driver/retry_new.rbs +++ b/sig/datadog/ci/test_retries/driver/retry_new.rbs @@ -3,12 +3,12 @@ module Datadog module TestRetries module Driver class RetryNew < Base - @duration_thresholds: Datadog::CI::Remote::SlowTestRetries + @max_attempts_thresholds: Datadog::CI::Remote::SlowTestRetries @attempts: Integer @max_attempts: Integer - def initialize: (Datadog::CI::Test test_span, duration_thresholds: Datadog::CI::Remote::SlowTestRetries) -> void + def initialize: (Datadog::CI::Test test_span, max_attempts_thresholds: Datadog::CI::Remote::SlowTestRetries) -> void private diff --git a/sig/datadog/ci/test_retries/strategy/base.rbs b/sig/datadog/ci/test_retries/strategy/base.rbs new file mode 100644 index 00000000..c43544db --- /dev/null +++ b/sig/datadog/ci/test_retries/strategy/base.rbs @@ -0,0 +1,15 @@ +module Datadog + module CI + module TestRetries + module Strategy + class Base + def covers?: (Datadog::CI::Test test_span) -> bool + + def configure: (Datadog::CI::Remote::LibrarySettings library_settings, Datadog::CI::TestSession test_session) -> void + + def build_driver: (Datadog::CI::Test test_span) -> Datadog::CI::TestRetries::Driver::Base + end + end + end + end +end diff --git a/sig/datadog/ci/test_retries/strategy/no_retry.rbs b/sig/datadog/ci/test_retries/strategy/no_retry.rbs new file mode 100644 index 00000000..56b3e07e --- /dev/null +++ b/sig/datadog/ci/test_retries/strategy/no_retry.rbs @@ -0,0 +1,10 @@ +module Datadog + module CI + module TestRetries + module Strategy + class NoRetry < Base + end + end + end + end +end diff --git a/sig/datadog/ci/test_retries/strategy/retry_failed.rbs b/sig/datadog/ci/test_retries/strategy/retry_failed.rbs new file mode 100644 index 00000000..1be9df9f --- /dev/null +++ b/sig/datadog/ci/test_retries/strategy/retry_failed.rbs @@ -0,0 +1,33 @@ +module Datadog + module CI + module TestRetries + module Strategy + class RetryFailed < Base + @enabled: bool + + @max_attempts: Integer + + @total_limit: Integer + + @retried_count: Integer + + attr_reader enabled: bool + + attr_reader max_attempts: Integer + + attr_reader total_limit: Integer + + attr_reader retried_count: Integer + + def initialize: (enabled: bool, max_attempts: Integer, total_limit: Integer) -> void + + def covers?: (Datadog::CI::Test test_span) -> bool + + def configure: (Datadog::CI::Remote::LibrarySettings library_settings, Datadog::CI::TestSession test_session) -> void + + def build_driver: (Datadog::CI::Test test_span) -> Datadog::CI::TestRetries::Driver::Base + end + end + end + end +end diff --git a/sig/datadog/ci/test_retries/strategy/retry_new.rbs b/sig/datadog/ci/test_retries/strategy/retry_new.rbs new file mode 100644 index 00000000..5dfe5e2e --- /dev/null +++ b/sig/datadog/ci/test_retries/strategy/retry_new.rbs @@ -0,0 +1,47 @@ +module Datadog + module CI + module TestRetries + module Strategy + class RetryNew < Base + @enabled: bool + + @unique_tests_set: Set[String] + + @total_limit: Integer + + @retried_count: Integer + + @unique_tests_client: Datadog::CI::TestRetries::UniqueTestsClient + + @max_attempts_thresholds: Datadog::CI::Remote::SlowTestRetries + + DEFAULT_TOTAL_TESTS_COUNT: 100 + + attr_reader enabled: bool + + attr_reader max_attempts_thresholds: Datadog::CI::Remote::SlowTestRetries + + attr_reader unique_tests_set: Set[String] + + attr_reader total_limit: Integer + + attr_reader retried_count: Integer + + def initialize: (enabled: bool, unique_tests_client: Datadog::CI::TestRetries::UniqueTestsClient) -> void + + def covers?: (Datadog::CI::Test test_span) -> bool + + def configure: (Datadog::CI::Remote::LibrarySettings library_settings, Datadog::CI::TestSession test_session) -> void + + def build_driver: (Datadog::CI::Test test_span) -> Datadog::CI::TestRetries::Driver::Base + + private + + def mark_test_session_faulty: (Datadog::CI::TestSession? test_session) -> void + + def is_new_test?: (Datadog::CI::Test test_span) -> bool + end + end + end + end +end diff --git a/spec/datadog/ci/test_retries/driver/retry_new_spec.rb b/spec/datadog/ci/test_retries/driver/retry_new_spec.rb index 82955236..282f8b8e 100644 --- a/spec/datadog/ci/test_retries/driver/retry_new_spec.rb +++ b/spec/datadog/ci/test_retries/driver/retry_new_spec.rb @@ -2,7 +2,7 @@ RSpec.describe Datadog::CI::TestRetries::Driver::RetryNew do let(:max_attempts) { 10 } - let(:duration_thresholds) { + let(:max_attempts_thresholds) { Datadog::CI::Remote::SlowTestRetries.new({ "5s" => 10, "10s" => 5, @@ -12,7 +12,7 @@ } let(:test_span) { double(:test_span, set_tag: true) } - subject(:driver) { described_class.new(test_span, duration_thresholds: duration_thresholds) } + subject(:driver) { described_class.new(test_span, max_attempts_thresholds: max_attempts_thresholds) } describe "#should_retry?" do subject { driver.should_retry? }