Skip to content

Commit

Permalink
create test retries strategies and move component's logic there (they…
Browse files Browse the repository at this point in the history
… are not actually used at this point)
  • Loading branch information
anmarchenko committed Sep 16, 2024
1 parent 754c176 commit 74e72fe
Show file tree
Hide file tree
Showing 12 changed files with 326 additions and 9 deletions.
3 changes: 1 addition & 2 deletions lib/datadog/ci/test_retries/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def initialize(
@retry_failed_tests_count = 0

@retry_new_tests_enabled = retry_new_tests_enabled
@retry_new_tests_duration_thresholds = nil
@retry_new_tests_unique_tests_set = Set.new
@unique_tests_client = unique_tests_client
# total maximum number of new tests to retry (will be set based on the total number of tests in the session)
Expand Down Expand Up @@ -120,7 +119,7 @@ def build_strategy(test_span)
end
@retry_new_tests_count += 1

Driver::RetryNew.new(test_span, duration_thresholds: @retry_new_tests_duration_thresholds)
Driver::RetryNew.new(test_span, max_attempts_thresholds: @retry_new_tests_duration_thresholds)
elsif should_retry_failed_test?(test_span)
Datadog.logger.debug do
"#{test_span.name} failed, will be retried"
Expand Down
6 changes: 3 additions & 3 deletions lib/datadog/ci/test_retries/driver/retry_new.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ module TestRetries
module Driver
# retry every new test up to 10 times (early flake detection)
class RetryNew < Base
def initialize(test_span, duration_thresholds:)
@duration_thresholds = duration_thresholds
def initialize(test_span, max_attempts_thresholds:)
@max_attempts_thresholds = max_attempts_thresholds
@attempts = 0
# will be changed based on test span duration
@max_attempts = 10
Expand All @@ -33,7 +33,7 @@ def record_retry(test_span)
end

def record_duration(duration)
@max_attempts = @duration_thresholds.max_attempts_for_duration(duration)
@max_attempts = @max_attempts_thresholds.max_attempts_for_duration(duration)

Datadog.logger.debug { "Recorded test duration of [#{duration}], new Max Attempts value is [#{@max_attempts}]" }
end
Expand Down
26 changes: 26 additions & 0 deletions lib/datadog/ci/test_retries/strategy/base.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# frozen_string_literal: true

require_relative "../driver/no_retry"

module Datadog
module CI
module TestRetries
module Strategy
# Strategies are mojor subcomponents of the retry mechanism. They are responsible for
# determining which tests should be retried and how.
class Base
def covers?(test_span)
false
end

def configure(_library_settings, _test_session)
end

def build_driver(_test_span)
Driver::NoRetry.new
end
end
end
end
end
end
12 changes: 12 additions & 0 deletions lib/datadog/ci/test_retries/strategy/no_retry.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

module Datadog
module CI
module TestRetries
module Strategy
class NoRetry < Base
end
end
end
end
end
52 changes: 52 additions & 0 deletions lib/datadog/ci/test_retries/strategy/retry_failed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# frozen_string_literal: true

require_relative "../driver/no_retry"

module Datadog
module CI
module TestRetries
module Strategy
class RetryFailed < Base
attr_reader :enabled, :max_attempts,
:total_limit, :retried_count

def initialize(
enabled:,
max_attempts:,
total_limit:
)
@enabled = enabled
@max_attempts = max_attempts
@total_limit = total_limit
@retried_count = 0
end

def covers?(test_span)
return false unless @enabled

if @retried_count >= @total_limit
Datadog.logger.debug do
"Retry failed tests limit reached: [#{@retried_count}] out of [#{@total_limit}]"
end
@enabled = false
end

@enabled && !!test_span&.failed?
end

def configure(library_settings, test_session)
@enabled &&= library_settings.flaky_test_retries_enabled?
end

def build_driver(test_span)
Datadog.logger.debug { "#{test_span.name} failed, will be retried" }

@retried_count += 1

Driver::RetryFailed.new(max_attempts: max_attempts)
end
end
end
end
end
end
123 changes: 123 additions & 0 deletions lib/datadog/ci/test_retries/strategy/retry_new.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# frozen_string_literal: true

require_relative "../driver/no_retry"

module Datadog
module CI
module TestRetries
module Strategy
class RetryNew < Base
DEFAULT_TOTAL_TESTS_COUNT = 100

attr_reader :enabled, :max_attempts_thresholds, :unique_tests_set, :total_limit, :retried_count

def initialize(
enabled:,
unique_tests_client:
)
@enabled = enabled
@unique_tests_set = Set.new
# total maximum number of new tests to retry (will be set based on the total number of tests in the session)
@total_limit = 0
@retried_count = 0

@unique_tests_client = unique_tests_client
end

def covers?(test_span)
return false unless @enabled

if @retried_count >= @total_limit
Datadog.logger.debug do
"Retry new tests limit reached: [#{@retried_count}] out of [#{@total_limit}]"
end
@enabled = false
mark_test_session_faulty(Datadog::CI.active_test_session)
end

@enabled && !test_span.skipped? && is_new_test?(test_span)
end

def configure(library_settings, test_session)
@enabled &&= library_settings.early_flake_detection_enabled?

return unless @enabled

# mark early flake detection enabled for test session
test_session.set_tag(Ext::Test::TAG_EARLY_FLAKE_ENABLED, "true")

# parse the max attempts threshlods for each test duration
@max_attempts_thresholds = library_settings.slow_test_retries
Datadog.logger.debug do
"Slow test retries thresholds: #{@max_attempts_thresholds.entries}"
end

# calculate what would be the total limit of new tests to retry
percentage_limit = library_settings.faulty_session_threshold
tests_count = test_session.total_tests_count.to_i
if tests_count.zero?
Datadog.logger.debug do
"Total tests count is zero, using default value for the total number of tests: [#{DEFAULT_TOTAL_TESTS_COUNT}]"
end

tests_count = DEFAULT_TOTAL_TESTS_COUNT
end
@total_limit = (tests_count * percentage_limit / 100.0).ceil
Datadog.logger.debug do
"Retry new tests total limit is [#{@total_limit}] (#{percentage_limit}%) of #{tests_count}"
end

# fetch a set of known unique tests
@unique_tests_set = @unique_tests_client.fetch_unique_tests(test_session)
if @unique_tests_set.empty?
@enabled = false
mark_test_session_faulty(test_session)

Datadog.logger.warn(
"Disabling early flake detection because there are no known tests (possible reason: no test runs in default branch)"
)
end

# report how many unique tests were found
Datadog.logger.debug do
"Found [#{@unique_tests_set.size}] known unique tests"
end
Utils::Telemetry.distribution(
Ext::Telemetry::METRIC_EFD_UNIQUE_TESTS_RESPONSE_TESTS,
@unique_tests_set.size.to_f
)
end

def build_driver(test_span)
Datadog.logger.debug do
"#{test_span.name} is new, will be retried"
end
@retried_count += 1

Driver::RetryNew.new(test_span, max_attempts_thresholds: @max_attempts_thresholds)
end

private

def mark_test_session_faulty(test_session)
test_session&.set_tag(Ext::Test::TAG_EARLY_FLAKE_ABORT_REASON, Ext::Test::EARLY_FLAKE_FAULTY)
end

def is_new_test?(test_span)
test_id = Utils::TestRun.datadog_test_id(test_span.name, test_span.test_suite_name)

result = !@retry_new_tests_unique_tests_set.include?(test_id)

if result
Datadog.logger.debug do
"#{test_id} is not found in the unique tests set, it is a new test"
end
end

result
end
end
end
end
end
end
4 changes: 2 additions & 2 deletions sig/datadog/ci/test_retries/driver/retry_new.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ module Datadog
module TestRetries
module Driver
class RetryNew < Base
@duration_thresholds: Datadog::CI::Remote::SlowTestRetries
@max_attempts_thresholds: Datadog::CI::Remote::SlowTestRetries

@attempts: Integer
@max_attempts: Integer

def initialize: (Datadog::CI::Test test_span, duration_thresholds: Datadog::CI::Remote::SlowTestRetries) -> void
def initialize: (Datadog::CI::Test test_span, max_attempts_thresholds: Datadog::CI::Remote::SlowTestRetries) -> void

private

Expand Down
15 changes: 15 additions & 0 deletions sig/datadog/ci/test_retries/strategy/base.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module Datadog
module CI
module TestRetries
module Strategy
class Base
def covers?: (Datadog::CI::Test test_span) -> bool

def configure: (Datadog::CI::Remote::LibrarySettings library_settings, Datadog::CI::TestSession test_session) -> void

def build_driver: (Datadog::CI::Test test_span) -> Datadog::CI::TestRetries::Driver::Base
end
end
end
end
end
10 changes: 10 additions & 0 deletions sig/datadog/ci/test_retries/strategy/no_retry.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module Datadog
module CI
module TestRetries
module Strategy
class NoRetry < Base
end
end
end
end
end
33 changes: 33 additions & 0 deletions sig/datadog/ci/test_retries/strategy/retry_failed.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
module Datadog
module CI
module TestRetries
module Strategy
class RetryFailed < Base
@enabled: bool

@max_attempts: Integer

@total_limit: Integer

@retried_count: Integer

attr_reader enabled: bool

attr_reader max_attempts: Integer

attr_reader total_limit: Integer

attr_reader retried_count: Integer

def initialize: (enabled: bool, max_attempts: Integer, total_limit: Integer) -> void

def covers?: (Datadog::CI::Test test_span) -> bool

def configure: (Datadog::CI::Remote::LibrarySettings library_settings, Datadog::CI::TestSession test_session) -> void

def build_driver: (Datadog::CI::Test test_span) -> Datadog::CI::TestRetries::Driver::Base
end
end
end
end
end
47 changes: 47 additions & 0 deletions sig/datadog/ci/test_retries/strategy/retry_new.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
module Datadog
module CI
module TestRetries
module Strategy
class RetryNew < Base
@enabled: bool

@unique_tests_set: Set[String]

@total_limit: Integer

@retried_count: Integer

@unique_tests_client: Datadog::CI::TestRetries::UniqueTestsClient

@max_attempts_thresholds: Datadog::CI::Remote::SlowTestRetries

DEFAULT_TOTAL_TESTS_COUNT: 100

attr_reader enabled: bool

attr_reader max_attempts_thresholds: Datadog::CI::Remote::SlowTestRetries

attr_reader unique_tests_set: Set[String]

attr_reader total_limit: Integer

attr_reader retried_count: Integer

def initialize: (enabled: bool, unique_tests_client: Datadog::CI::TestRetries::UniqueTestsClient) -> void

def covers?: (Datadog::CI::Test test_span) -> bool

def configure: (Datadog::CI::Remote::LibrarySettings library_settings, Datadog::CI::TestSession test_session) -> void

def build_driver: (Datadog::CI::Test test_span) -> Datadog::CI::TestRetries::Driver::Base

private

def mark_test_session_faulty: (Datadog::CI::TestSession? test_session) -> void

def is_new_test?: (Datadog::CI::Test test_span) -> bool
end
end
end
end
end
4 changes: 2 additions & 2 deletions spec/datadog/ci/test_retries/driver/retry_new_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

RSpec.describe Datadog::CI::TestRetries::Driver::RetryNew do
let(:max_attempts) { 10 }
let(:duration_thresholds) {
let(:max_attempts_thresholds) {
Datadog::CI::Remote::SlowTestRetries.new({
"5s" => 10,
"10s" => 5,
Expand All @@ -12,7 +12,7 @@
}
let(:test_span) { double(:test_span, set_tag: true) }

subject(:driver) { described_class.new(test_span, duration_thresholds: duration_thresholds) }
subject(:driver) { described_class.new(test_span, max_attempts_thresholds: max_attempts_thresholds) }

describe "#should_retry?" do
subject { driver.should_retry? }
Expand Down

0 comments on commit 74e72fe

Please sign in to comment.