Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SDTEST-292] Retry HTTP requests on 429 and 5xx responses #243

Merged
merged 2 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/datadog/ci/ext/transport.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module Transport
HEADER_CONTENT_ENCODING = "Content-Encoding"
HEADER_EVP_SUBDOMAIN = "X-Datadog-EVP-Subdomain"
HEADER_CONTAINER_ID = "Datadog-Container-ID"
HEADER_RATELIMIT_RESET = "X-RateLimit-Reset"

EVP_PROXY_V2_PATH_PREFIX = "/evp_proxy/v2/"
EVP_PROXY_V4_PATH_PREFIX = "/evp_proxy/v4/"
Expand Down
59 changes: 46 additions & 13 deletions lib/datadog/ci/transport/http.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class HTTP
DEFAULT_TIMEOUT = 30
MAX_RETRIES = 3
INITIAL_BACKOFF = 1
MAX_BACKOFF = 30

def initialize(host:, port:, timeout: DEFAULT_TIMEOUT, ssl: true, compress: false)
@host = host
Expand Down Expand Up @@ -78,21 +79,53 @@ def request(
private

def perform_http_call(path:, payload:, headers:, verb:, retries: MAX_RETRIES, backoff: INITIAL_BACKOFF)
adapter.call(
path: path, payload: payload, headers: headers, verb: verb
)
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError, SocketError, Net::HTTPBadResponse => e
Datadog.logger.debug("Failed to send request with #{e} (#{e.message})")

if retries.positive?
sleep(backoff)
# retry logic as lambda to avoid duplication
retry_request = ->(previous_response, current_backoff) do
if retries.positive? && backoff <= MAX_BACKOFF
sleep(backoff)

perform_http_call(
path: path,
payload: payload,
headers: headers,
verb: verb,
retries: retries - 1,
backoff: current_backoff * 2
)
else
Datadog.logger.error(
"Failed to send request after #{MAX_RETRIES - retries} retries (current backoff value #{backoff})"
)

previous_response
end
end

perform_http_call(
path: path, payload: payload, headers: headers, verb: verb, retries: retries - 1, backoff: backoff * 2
begin
response = adapter.call(
path: path, payload: payload, headers: headers, verb: verb
)
else
Datadog.logger.error("Failed to send request after #{MAX_RETRIES} retries")
ErrorResponse.new(e)
return response if response.ok?

if response.code == 429
backoff = (response.header(Ext::Transport::HEADER_RATELIMIT_RESET) || 1).to_i

Datadog.logger.debug do
"Received rate limit response, retrying in #{backoff} seconds from X-RateLimit-Reset header"
end

retry_request.call(response, backoff)
elsif response.server_error?
Datadog.logger.debug { "Received server error response, retrying in #{backoff} seconds" }

retry_request.call(response, backoff)
else
response
end
rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError, SocketError, Net::HTTPBadResponse => e
Datadog.logger.debug { "Failed to send request with #{e} (#{e.message})" }

retry_request.call(ErrorResponse.new(e), backoff)
end
end

Expand Down
2 changes: 2 additions & 0 deletions sig/datadog/ci/ext/transport.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ module Datadog

HEADER_CONTAINER_ID: "Datadog-Container-ID"

HEADER_RATELIMIT_RESET: "X-RateLimit-Reset"

EVP_PROXY_V2_PATH_PREFIX: "/evp_proxy/v2/"

EVP_PROXY_V4_PATH_PREFIX: "/evp_proxy/v4/"
Expand Down
1 change: 1 addition & 0 deletions sig/datadog/ci/transport/http.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ module Datadog
DEFAULT_TIMEOUT: 30
MAX_RETRIES: 3
INITIAL_BACKOFF: 1
MAX_BACKOFF: 30

def initialize: (host: String, port: Integer, ?ssl: bool, ?timeout: Integer, ?compress: bool) -> void

Expand Down
107 changes: 93 additions & 14 deletions spec/datadog/ci/transport/http_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@
context "when request fails" do
let(:request_options) { {backoff: 0} }

context "when server returns error status code" do
context "when server returns 400" do
let(:net_http_response) { double("Net::HTTP::Response", code: 400, body: "error", "[]": nil) }

before do
Expand All @@ -178,27 +178,106 @@
end
end

context "when succeeds after retries" do
before do
expect(adapter).to receive(:call).and_raise(Errno::ECONNRESET).exactly(described_class::MAX_RETRIES).times
expect(adapter).to receive(:call).and_return(http_response)
context "when server returns 429" do
let(:no_backoff) { "0" }
let(:response_429_no_backoff) do
Datadog::CI::Transport::Adapters::Net::Response.new(
double("Net::HTTP::Response", code: 429, body: "error", "[]": no_backoff)
)
end

it "produces a response" do
is_expected.to be_a_kind_of(Datadog::CI::Transport::Adapters::Net::Response)
context "when backoff increases" do
let(:high_backoff) { "31" }
let(:response_429_high_backoff) do
Datadog::CI::Transport::Adapters::Net::Response.new(
double("Net::HTTP::Response", code: 429, body: "error", "[]": high_backoff)
)
end

before do
expect(adapter).to receive(:call).and_return(response_429_no_backoff).once
expect(adapter).to receive(:call).and_return(response_429_high_backoff).once
end

it "retries until backoff is over 30, afterwards returns failed response" do
is_expected.to be_a_kind_of(Datadog::CI::Transport::Adapters::Net::Response)

expect(response.code).to eq(429)
end
end

context "when the call eventually succeeds" do
before do
expect(adapter).to(
receive(:call).and_return(response_429_no_backoff).exactly(described_class::MAX_RETRIES).times
)
expect(adapter).to receive(:call).and_return(http_response).once
end

it "produces a response" do
is_expected.to be_a_kind_of(Datadog::CI::Transport::Adapters::Net::Response)

expect(response.code).to eq(200)
expect(response.code).to eq(200)
end
end
end

context "when retries are exhausted" do
before do
expect(adapter).to receive(:call).and_raise(Errno::ECONNRESET).exactly(described_class::MAX_RETRIES + 1).times
context "when server returns 503" do
let(:response_503) do
Datadog::CI::Transport::Adapters::Net::Response.new(
double("Net::HTTP::Response", code: 503, body: "error", "[]": nil)
)
end

context "when succeeds after retries" do
before do
expect(adapter).to receive(:call).and_return(response_503).exactly(described_class::MAX_RETRIES).times
expect(adapter).to receive(:call).and_return(http_response)
end

it "produces a response" do
is_expected.to be_a_kind_of(Datadog::CI::Transport::Adapters::Net::Response)

expect(response.code).to eq(200)
end
end

context "when retries are exhausted" do
before do
expect(adapter).to receive(:call).and_return(response_503).exactly(described_class::MAX_RETRIES + 1).times
end

it "returns failed response" do
is_expected.to be_a_kind_of(Datadog::CI::Transport::Adapters::Net::Response)

expect(response.code).to eq(503)
end
end
end

context "when network connection fails" do
context "when succeeds after retries" do
before do
expect(adapter).to receive(:call).and_raise(Errno::ECONNRESET).exactly(described_class::MAX_RETRIES).times
expect(adapter).to receive(:call).and_return(http_response)
end

it "produces a response" do
is_expected.to be_a_kind_of(Datadog::CI::Transport::Adapters::Net::Response)

expect(response.code).to eq(200)
end
end

context "when retries are exhausted" do
before do
expect(adapter).to receive(:call).and_raise(Errno::ECONNRESET).exactly(described_class::MAX_RETRIES + 1).times
end

it "returns ErrorRsponse" do
expect(response.error).to be_kind_of(Errno::ECONNRESET)
expect(response.telemetry_error_type).to eq(Datadog::CI::Ext::Telemetry::ErrorType::NETWORK)
it "returns ErrorRsponse" do
expect(response.error).to be_kind_of(Errno::ECONNRESET)
expect(response.telemetry_error_type).to eq(Datadog::CI::Ext::Telemetry::ErrorType::NETWORK)
end
end
end
end
Expand Down
Loading