Skip to content

Commit

Permalink
fix the new tests logic for retrying new tests, add faulty session th…
Browse files Browse the repository at this point in the history
…reshold
  • Loading branch information
anmarchenko committed Sep 9, 2024
1 parent cba919e commit 2c5f82d
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 17 deletions.
4 changes: 2 additions & 2 deletions lib/datadog/ci/contrib/rspec/runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def run_specs(*args)
CI::Ext::Test::TAG_FRAMEWORK => Ext::FRAMEWORK,
CI::Ext::Test::TAG_FRAMEWORK_VERSION => CI::Contrib::RSpec::Integration.version.to_s
},
service: datadog_configuration[:service_name]
service: datadog_configuration[:service_name],
total_tests_count: ::RSpec.world.example_count
)
test_session&.total_tests_count = ::RSpec.world.example_count

test_module = test_visibility_component.start_test_module(Ext::FRAMEWORK)

Expand Down
27 changes: 22 additions & 5 deletions lib/datadog/ci/test_retries/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ module TestRetries
class Component
FIBER_LOCAL_CURRENT_RETRY_STRATEGY_KEY = :__dd_current_retry_strategy

# there are clearly 2 different concepts mixed here, we should split them into separate components
# (high level strategies?) in the subsequent PR
attr_reader :retry_failed_tests_enabled, :retry_failed_tests_max_attempts,
:retry_failed_tests_total_limit, :retry_failed_tests_count,
:retry_new_tests_enabled, :retry_new_tests_duration_thresholds,
:retry_new_tests_percentage_limit, :retry_new_tests_unique_tests_set
:retry_new_tests_percentage_limit, :retry_new_tests_total_tests_count, :retry_new_tests_count,
:retry_new_tests_unique_tests_set

def initialize(
retry_failed_tests_enabled:,
Expand All @@ -36,6 +39,8 @@ def initialize(

@retry_new_tests_enabled = retry_new_tests_enabled
@retry_new_tests_duration_thresholds = nil
@retry_new_tests_total_tests_count = 0
@retry_new_tests_count = 0
@retry_new_tests_percentage_limit = 0
@retry_new_tests_unique_tests_set = Set.new
@unique_tests_client = unique_tests_client
Expand All @@ -56,11 +61,11 @@ def configure(library_settings, test_session)
@retry_new_tests_duration_thresholds = library_settings.slow_test_retries
@retry_new_tests_percentage_limit = library_settings.faulty_session_threshold
@retry_new_tests_unique_tests_set = @unique_tests_client.fetch_unique_tests(test_session)
@retry_new_tests_total_tests_count = test_session.total_tests_count.to_i

if @retry_new_tests_unique_tests_set.empty?
@retry_new_tests_enabled = false

test_session.set_tag(Ext::Test::TAG_EARLY_FLAKE_ABORT_REASON, Ext::Test::EARLY_FLAKE_FAULTY)
mark_test_session_faulty(test_session)

Datadog.logger.debug("Unique tests set is empty, retrying new tests disabled")
else
Expand All @@ -87,6 +92,7 @@ def build_strategy(test_span)
@mutex.synchronize do
if should_retry_new_test?(test_span)
Datadog.logger.debug("New test retry starts")
@retry_new_tests_count += 1

Strategy::RetryNew.new(test_span, duration_thresholds: @retry_new_tests_duration_thresholds)
elsif should_retry_failed_test?(test_span)
Expand Down Expand Up @@ -133,7 +139,14 @@ def should_retry_failed_test?(test_span)
end

def should_retry_new_test?(test_span)
# TODO: check if EFD is faulty here
if @retry_new_tests_count > @retry_new_tests_total_tests_count * @retry_new_tests_percentage_limit / 100.0
Datadog.logger.debug do
"Retry new tests limit reached: [#{@retry_new_tests_count}] out of [#{@retry_new_tests_total_tests_count}] " \
"with percentage limit [#{@retry_new_tests_percentage_limit}]"
end
@retry_new_tests_enabled = false
mark_test_session_faulty(Datadog::CI.active_test_session)
end

@retry_new_tests_enabled && is_new_test?(test_span)
end
Expand All @@ -145,7 +158,11 @@ def test_visibility_component
def is_new_test?(test_span)
test_id = Utils::TestRun.datadog_test_id(test_span.name, test_span.test_suite_name)

@retry_new_tests_unique_tests_set.include?(test_id)
!@retry_new_tests_unique_tests_set.include?(test_id)
end

def mark_test_session_faulty(test_session)
test_session&.set_tag(Ext::Test::TAG_EARLY_FLAKE_ABORT_REASON, Ext::Test::EARLY_FLAKE_FAULTY)
end
end
end
Expand Down
4 changes: 3 additions & 1 deletion lib/datadog/ci/test_visibility/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ def initialize(
@codeowners = codeowners
end

def start_test_session(service: nil, tags: {})
def start_test_session(service: nil, tags: {}, total_tests_count: 0)
return skip_tracing unless test_suite_level_visibility_enabled

test_session = @context.start_test_session(service: service, tags: tags)
test_session.total_tests_count = total_tests_count

on_test_session_started(test_session)
test_session
end
Expand Down
6 changes: 6 additions & 0 deletions sig/datadog/ci/test_retries/component.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ module Datadog

attr_reader retry_new_tests_unique_tests_set: Set[String]

attr_reader retry_new_tests_total_tests_count: Integer

attr_reader retry_new_tests_count: Integer

@mutex: Thread::Mutex

@unique_tests_client: Datadog::CI::TestRetries::UniqueTestsClient
Expand Down Expand Up @@ -49,6 +53,8 @@ module Datadog
def is_new_test?: (Datadog::CI::Test test) -> bool

def test_visibility_component: () -> Datadog::CI::TestVisibility::Component

def mark_test_session_faulty: (Datadog::CI::TestSession? test_session) -> void
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion sig/datadog/ci/test_visibility/component.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ module Datadog

def trace: (String span_name, ?type: String, ?tags: Hash[untyped, untyped]) ?{ (Datadog::CI::Span span) -> untyped } -> untyped

def start_test_session: (?service: String?, ?tags: Hash[untyped, untyped]) -> Datadog::CI::TestSession
def start_test_session: (?service: String?, ?tags: Hash[untyped, untyped], ?total_tests_count: Integer) -> Datadog::CI::TestSession

def start_test_module: (String test_module_name, ?service: String?, ?tags: Hash[untyped, untyped]) -> Datadog::CI::TestModule

Expand Down
17 changes: 11 additions & 6 deletions spec/datadog/ci/contrib/rspec/instrumentation_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def rspec_session_run(
let(:integration_options) { {service_name: "lspec"} }
end

before do
Datadog.send(:components).test_visibility.start_test_session
end

it "creates span for example" do
spec = with_new_rspec_environment do
RSpec.describe "some test" do
Expand Down Expand Up @@ -147,15 +151,15 @@ def rspec_session_run(
:source_file,
"spec/datadog/ci/contrib/rspec/instrumentation_spec.rb"
)
expect(first_test_span).to have_test_tag(:source_start, "120")
expect(first_test_span).to have_test_tag(:source_start, "124")
expect(first_test_span).to have_test_tag(
:codeowners,
"[\"@DataDog/ruby-guild\", \"@DataDog/ci-app-libraries\"]"
)
end

it "creates spans for several examples" do
expect(Datadog::CI::Ext::Environment).to receive(:tags).once.and_call_original
expect(Datadog::CI::Ext::Environment).to receive(:tags).never

num_examples = 20
with_new_rspec_environment do
Expand Down Expand Up @@ -964,7 +968,7 @@ def rspec_skipped_session_run
let(:integration_name) { :rspec }

let(:early_flake_detection_enabled) { true }
let(:unique_tests_set) { Set.new(["SomeTest at ./spec/datadog/ci/contrib/rspec/instrumentation_spec.rb.nested foo."]) }
let(:unique_tests_set) { Set.new(["SomeTest at ./spec/datadog/ci/contrib/rspec/instrumentation_spec.rb.nested fails."]) }
end

it "retries the new test 10 times" do
Expand Down Expand Up @@ -1090,7 +1094,7 @@ def rspec_skipped_session_run
let(:integration_name) { :rspec }

let(:early_flake_detection_enabled) { true }
let(:unique_tests_set) { Set.new(["SomeTest at ./spec/datadog/ci/contrib/rspec/instrumentation_spec.rb.nested foo."]) }
let(:unique_tests_set) { Set.new(["SomeTest at ./spec/datadog/ci/contrib/rspec/instrumentation_spec.rb.nested flaky."]) }

let(:flaky_test_retries_enabled) { true }
end
Expand Down Expand Up @@ -1130,11 +1134,12 @@ def rspec_skipped_session_run
let(:integration_name) { :rspec }

let(:early_flake_detection_enabled) { true }
# avoid bailing out of EFD
let(:faulty_session_threshold) { 75 }
let(:unique_tests_set) do
Set.new(
[
"SomeTest at ./spec/datadog/ci/contrib/rspec/instrumentation_spec.rb.nested foo.",
"SomeTest at ./spec/datadog/ci/contrib/rspec/instrumentation_spec.rb.nested flaky."
"SomeTest at ./spec/datadog/ci/contrib/rspec/instrumentation_spec.rb.nested x."
]
)
end
Expand Down
4 changes: 2 additions & 2 deletions spec/datadog/ci/test_retries/component_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
end

let(:tracer_span) { Datadog::Tracing::SpanOperation.new("session") }
let(:test_session) { Datadog::CI::TestSession.new(tracer_span) }
let(:test_session) { Datadog::CI::TestSession.new(tracer_span).tap { |test_session| test_session.total_tests_count = 30 } }

subject(:component) do
described_class.new(
Expand Down Expand Up @@ -281,7 +281,7 @@

context "when retry new test strategy is used" do
let(:remote_early_flake_detection_enabled) { true }
let(:unique_tests_set) { Set.new(["mysuite.mytest."]) }
let(:unique_tests_set) { Set.new(["mysuite.mytest2."]) }

it { is_expected.to eq(11) }

Expand Down

0 comments on commit 2c5f82d

Please sign in to comment.