DataDog · anmarchenko · Sep 10, 2024 · Sep 3, 2024 · Sep 4, 2024 · Sep 4, 2024
@@ -5,6 +5,9 @@ on:
 
 jobs:
   add_milestone:
+    permissions:
+      contents: read
+      pull-requests: write
     runs-on: ubuntu-latest
     if: github.event.pull_request.merged == true && github.event.pull_request.milestone == null
     steps:

@@ -27,7 +27,8 @@ class << self
       # ```
       # Datadog::CI.start_test_session(
       #   service: "my-web-site-tests",
-      #   tags: { Datadog::CI::Ext::Test::TAG_FRAMEWORK => "my-test-framework" }
+      #   tags: { Datadog::CI::Ext::Test::TAG_FRAMEWORK => "my-test-framework" },
+      #   total_tests_count: 100
       # )
       #
       # # Somewhere else after test run has ended
@@ -38,15 +39,16 @@ class << self
       #
       # @param [String] service the service name for this session (optional, defaults to DD_SERVICE or repository name)
       # @param [Hash<String,String>] tags extra tags which should be added to the test session.
+      # @param [Integer] total_tests_count the total number of tests in the test session (optional, defaults to 0) - it is used to limit the number of new tests retried within session if early flake detection is enabled
       # @return [Datadog::CI::TestSession] the active, running {Datadog::CI::TestSession}.
       # @return [nil] if test suite level visibility is disabled or CI mode is disabled.
-      def start_test_session(service: Utils::Configuration.fetch_service_name("test"), tags: {})
+      def start_test_session(service: Utils::Configuration.fetch_service_name("test"), tags: {}, total_tests_count: 0)
         Utils::Telemetry.inc(
           Ext::Telemetry::METRIC_MANUAL_API_EVENTS,
           1,
           {Ext::Telemetry::TAG_EVENT_TYPE => Ext::Telemetry::EventType::SESSION}
         )
-        test_visibility.start_test_session(service: service, tags: tags)
+        test_visibility.start_test_session(service: service, tags: tags, total_tests_count: total_tests_count)
       end
 
       # The active, unfinished test session.

@@ -8,6 +8,8 @@ module CI
     module Contrib
       module Minitest
         module Runner
+          DD_ESTIMATED_TESTS_PER_SUITE = 5
+
           def self.included(base)
             base.singleton_class.prepend(ClassMethods)
           end
@@ -18,12 +20,15 @@ def init_plugins(*args)
 
               return unless datadog_configuration[:enabled]
 
+              # minitest does not store the total number of tests, so we can't pass it to the test session
+              # instead, we use the number of test suites * DD_ESTIMATED_TESTS_PER_SUITE as a rough estimate
               test_visibility_component.start_test_session(
                 tags: {
                   CI::Ext::Test::TAG_FRAMEWORK => Ext::FRAMEWORK,
                   CI::Ext::Test::TAG_FRAMEWORK_VERSION => CI::Contrib::Minitest::Integration.version.to_s
                 },
-                service: datadog_configuration[:service_name]
+                service: datadog_configuration[:service_name],
+                total_tests_count: (DD_ESTIMATED_TESTS_PER_SUITE * ::Minitest::Runnable.runnables.size).to_i
               )
               test_visibility_component.start_test_module(Ext::FRAMEWORK)
             end

@@ -51,6 +51,10 @@ def after_teardown
               return super unless test_span
 
               finish_with_result(test_span, result_code)
+
+              # remove failures if test passed at least once on retries
+              self.failures = [] if test_span.any_retry_passed?
+
               if Helpers.parallel?(self.class)
                 finish_with_result(test_span.test_suite, result_code)
               end

@@ -64,16 +64,18 @@ def run(*args)
 
                   result = super
 
-                  # In case when test job is canceled and RSpec is quitting we don't want to report the last test
+                  # When test job is canceled and RSpec is quitting we don't want to report the last test
                   # before RSpec context unwinds. This test might have some unrelated errors that we don't want to
-                  # report.
+                  # see in Datadog.
                   return result if ::RSpec.world.wants_to_quit
 
                   case execution_result.status
                   when :passed
                     test_span&.passed!
                   when :failed
                     test_span&.failed!(exception: execution_result.exception)
+                    # if any of the retries passed, we don't fail the test run
+                    @exception = nil if test_span&.any_retry_passed?
                   else
                     # :pending or nil
                     test_span&.skipped!(
@@ -84,21 +86,20 @@ def run(*args)
                 end
               end
 
-              # after retries are done, we can report the test to RSpec
-              @skip_reporting = false
-
               # this is a special case for ci-queue, we need to finish the test suite span
               ci_queue_test_span&.finish
 
-              # Finish spec with latest retry's result
-              # TODO: when implementing new test retries make sure to clean @exception before calling this method
-              # if test passed at least once
+              # after retries are done, we can finally report the test to RSpec
+              @skip_reporting = false
               finish(reporter)
             end
 
             def finish(reporter)
-              # by default finish test but do not report it to RSpec::Core::Reporter
-              # it is going to be reported once after retries are done
+              # By default finish test but do not report it to RSpec::Core::Reporter
+              # it is going to be reported once after retries are done.
+              #
+              # We need to do this because RSpec breaks when we try to report the same example multiple times with different
+              # results.
               return super unless @skip_reporting
 
               super(::RSpec::Core::NullReporter)

@@ -23,7 +23,8 @@ def run_specs(*args)
                   CI::Ext::Test::TAG_FRAMEWORK => Ext::FRAMEWORK,
                   CI::Ext::Test::TAG_FRAMEWORK_VERSION => CI::Contrib::RSpec::Integration.version.to_s
                 },
-                service: datadog_configuration[:service_name]
+                service: datadog_configuration[:service_name],
+                total_tests_count: ::RSpec.world.example_count
               )
 
               test_module = test_visibility_component.start_test_module(Ext::FRAMEWORK)

@@ -69,6 +69,7 @@ module Telemetry
         TAG_BROWSER_DRIVER = "browser_driver"
         TAG_IS_RUM = "is_rum"
         TAG_IS_RETRY = "is_retry"
+        TAG_IS_NEW = "is_new"
         TAG_LIBRARY = "library"
         TAG_ENDPOINT = "endpoint"
         TAG_ERROR_TYPE = "error_type"
@@ -80,6 +81,7 @@ module Telemetry
         TAG_COVERAGE_ENABLED = "coverage_enabled"
         TAG_ITR_SKIP_ENABLED = "itrskip_enabled"
         TAG_EARLY_FLAKE_DETECTION_ENABLED = "early_flake_detection_enabled"
+        TAG_EARLY_FLAKE_DETECTION_ABORT_REASON = "early_flake_detection_abort_reason"
         TAG_PROVIDER = "provider"
         TAG_AUTO_INJECTED = "auto_injected"
 

@@ -58,8 +58,11 @@ module Test
         # version of the browser, if multiple browsers or multiple versions then this tag is empty
         TAG_BROWSER_VERSION = "test.browser.version"
 
-        # Tags for test retries
+        # Tags for retries
         TAG_IS_RETRY = "test.is_retry" # true if test was retried by datadog-ci library
+        TAG_IS_NEW = "test.is_new" # true if test was marked as new by new test retries (early flake detection)
+        TAG_EARLY_FLAKE_ENABLED = "test.early_flake.enabled" # true if early flake detection is enabled
+        TAG_EARLY_FLAKE_ABORT_REASON = "test.early_flake.abort_reason" # reason why early flake detection was aborted
 
         # internal APM tag to mark a span as a test span
         TAG_SPAN_KIND = "span.kind"
@@ -73,6 +76,8 @@ module Test
         ITR_TEST_SKIP_REASON = "Skipped by Datadog's intelligent test runner"
         ITR_UNSKIPPABLE_OPTION = :datadog_itr_unskippable
 
+        EARLY_FLAKE_FAULTY = "faulty"
+
         # test status as recognized by Datadog
         module Status
           PASS = "pass"

@@ -145,11 +145,18 @@ def parameters
         get_tag(Ext::Test::TAG_PARAMETERS)
       end
 
+      # @internal
+      def any_retry_passed?
+        !!test_suite&.any_test_retry_passed?(test_id)
+      end
+
       private
 
-      def record_test_result(datadog_status)
-        test_id = Utils::TestRun.datadog_test_id(name, test_suite_name, parameters)
+      def test_id
+        @test_id ||= Utils::TestRun.datadog_test_id(name, test_suite_name, parameters)
+      end
 
+      def record_test_result(datadog_status)
         # if this test was already executed in this test suite, mark it as retried
         if test_suite&.test_executed?(test_id)
           set_tag(Ext::Test::TAG_IS_RETRY, "true")

@@ -2,6 +2,7 @@
 
 require_relative "strategy/no_retry"
 require_relative "strategy/retry_failed"
+require_relative "strategy/retry_new"
 
 require_relative "../ext/telemetry"
 require_relative "../utils/telemetry"
@@ -13,10 +14,16 @@ module TestRetries
       # - retrying failed tests - improve success rate of CI pipelines
       # - retrying new tests - detect flaky tests as early as possible to prevent them from being merged
       class Component
+        FIBER_LOCAL_CURRENT_RETRY_STRATEGY_KEY = :__dd_current_retry_strategy
+
+        DEFAULT_TOTAL_TESTS_COUNT = 100
+
+        # there are clearly 2 different concepts mixed here, we should split them into separate components
+        # (high level strategies?) in the subsequent PR
         attr_reader :retry_failed_tests_enabled, :retry_failed_tests_max_attempts,
           :retry_failed_tests_total_limit, :retry_failed_tests_count,
-          :retry_new_tests_enabled, :retry_new_tests_duration_thresholds, :retry_new_tests_percentage_limit,
-          :retry_new_tests_unique_tests_set, :retry_new_tests_fault_reason
+          :retry_new_tests_enabled, :retry_new_tests_duration_thresholds, :retry_new_tests_unique_tests_set,
+          :retry_new_tests_total_limit, :retry_new_tests_count
 
         def initialize(
           retry_failed_tests_enabled:,
@@ -33,12 +40,12 @@ def initialize(
 
           @retry_new_tests_enabled = retry_new_tests_enabled
           @retry_new_tests_duration_thresholds = nil
-          @retry_new_tests_percentage_limit = 0
           @retry_new_tests_unique_tests_set = Set.new
-          # indicates that retrying new tests failed and was disabled
-          @retry_new_tests_fault_reason = nil
-
           @unique_tests_client = unique_tests_client
+          # total maximum number of new tests to retry (will be set based on the total number of tests in the session)
+          @retry_new_tests_total_limit = 0
+          # counter thate stores the current number of new tests retried
+          @retry_new_tests_count = 0
 
           @mutex = Mutex.new
         end
@@ -49,14 +56,30 @@ def configure(library_settings, test_session)
 
           return unless @retry_new_tests_enabled
 
+          # mark early flake detection enabled for test session
+          test_session.set_tag(Ext::Test::TAG_EARLY_FLAKE_ENABLED, "true")
+
           # configure retrying new tests
           @retry_new_tests_duration_thresholds = library_settings.slow_test_retries
-          @retry_new_tests_percentage_limit = library_settings.faulty_session_threshold
           @retry_new_tests_unique_tests_set = @unique_tests_client.fetch_unique_tests(test_session)
 
+          percentage_limit = library_settings.faulty_session_threshold
+          tests_count = test_session.total_tests_count.to_i
+          if tests_count.zero?
+            Datadog.logger.debug do
+              "Total tests count is zero, using default value for the total number of tests: [#{DEFAULT_TOTAL_TESTS_COUNT}]"
+            end
+
+            tests_count = DEFAULT_TOTAL_TESTS_COUNT
+          end
+          @retry_new_tests_total_limit = (tests_count * percentage_limit / 100.0).ceil
+          Datadog.logger.debug do
+            "Retry new tests total limit is [#{@retry_new_tests_total_limit}] (#{percentage_limit}%) of #{tests_count}"
+          end
+
           if @retry_new_tests_unique_tests_set.empty?
             @retry_new_tests_enabled = false
-            @retry_new_tests_fault_reason = "unique tests set is empty"
+            mark_test_session_faulty(test_session)
 
             Datadog.logger.debug("Unique tests set is empty, retrying new tests disabled")
           else
@@ -68,33 +91,25 @@ def configure(library_settings, test_session)
         end
 
         def with_retries(&block)
-          # @type var retry_strategy: Strategy::Base
-          retry_strategy = nil
-
-          test_finished_callback = lambda do |test_span|
-            if retry_strategy.nil?
-              # we always run test at least once and after first pass create a correct retry strategy
-              retry_strategy = build_strategy(test_span)
-            else
-              # after each retry we record the result, strategy will decide if we should retry again
-              retry_strategy&.record_retry(test_span)
-            end
-          end
-
-          test_visibility_component.set_test_finished_callback(test_finished_callback)
+          self.current_retry_strategy = nil
 
           loop do
             yield
 
-            break unless retry_strategy&.should_retry?
+            break unless current_retry_strategy&.should_retry?
           end
         ensure
-          test_visibility_component.remove_test_finished_callback
+          self.current_retry_strategy = nil
         end
 
         def build_strategy(test_span)
           @mutex.synchronize do
-            if should_retry_failed_test?(test_span)
+            if should_retry_new_test?(test_span)
+              Datadog.logger.debug("New test retry starts")
+              @retry_new_tests_count += 1
+
+              Strategy::RetryNew.new(test_span, duration_thresholds: @retry_new_tests_duration_thresholds)
+            elsif should_retry_failed_test?(test_span)
               Datadog.logger.debug("Failed test retry starts")
               @retry_failed_tests_count += 1
 
@@ -105,15 +120,63 @@ def build_strategy(test_span)
           end
         end
 
+        def record_test_finished(test_span)
+          if current_retry_strategy.nil?
+            # we always run test at least once and after the first pass create a correct retry strategy
+            self.current_retry_strategy = build_strategy(test_span)
+          else
+            # after each retry we record the result, strategy will decide if we should retry again
+            current_retry_strategy&.record_retry(test_span)
+          end
+        end
+
+        def record_test_span_duration(tracer_span)
+          current_retry_strategy&.record_duration(tracer_span.duration)
+        end
+
         private
 
+        def current_retry_strategy
+          Thread.current[FIBER_LOCAL_CURRENT_RETRY_STRATEGY_KEY]
+        end
+
+        def current_retry_strategy=(strategy)
+          Thread.current[FIBER_LOCAL_CURRENT_RETRY_STRATEGY_KEY] = strategy
+        end
+
         def should_retry_failed_test?(test_span)
-          @retry_failed_tests_enabled && !!test_span&.failed? && @retry_failed_tests_count < @retry_failed_tests_total_limit
+          if @retry_failed_tests_count >= @retry_failed_tests_total_limit
+            @retry_failed_tests_enabled = false
+          end
+
+          @retry_failed_tests_enabled && !!test_span&.failed?
+        end
+
+        def should_retry_new_test?(test_span)
+          if @retry_new_tests_count >= @retry_new_tests_total_limit
+            Datadog.logger.debug do
+              "Retry new tests limit reached: [#{@retry_new_tests_count}] out of [#{@retry_new_tests_total_limit}]"
+            end
+            @retry_new_tests_enabled = false
+            mark_test_session_faulty(Datadog::CI.active_test_session)
+          end
+
+          @retry_new_tests_enabled && is_new_test?(test_span)
         end
 
         def test_visibility_component
           Datadog.send(:components).test_visibility
         end
+
+        def is_new_test?(test_span)
+          test_id = Utils::TestRun.datadog_test_id(test_span.name, test_span.test_suite_name)
+
+          !@retry_new_tests_unique_tests_set.include?(test_id)
+        end
+
+        def mark_test_session_faulty(test_session)
+          test_session&.set_tag(Ext::Test::TAG_EARLY_FLAKE_ABORT_REASON, Ext::Test::EARLY_FLAKE_FAULTY)
+        end
       end
     end
   end

@@ -12,6 +12,10 @@ def should_retry?
           def record_retry(test_span)
             test_span&.set_tag(Ext::Test::TAG_IS_RETRY, "true")
           end
+
+          # duration in float seconds
+          def record_duration(duration)
+          end
         end
       end
     end