From b168a9e9d9880b2e90181825d5021634ea3f5640 Mon Sep 17 00:00:00 2001 From: Omotola Akeredolu Date: Tue, 10 Oct 2023 09:55:20 -0700 Subject: [PATCH 1/2] throttle httpclient requests --- .../pip/IPyPiClient.cs | 7 +++++++ .../pip/SimplePypiClient.cs | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs index 49a5cc2db..d1c9abb1f 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs @@ -54,6 +54,9 @@ public sealed class PyPiClient : IPyPiClient, IDisposable private readonly IEnvironmentVariableService environmentVariableService; private readonly ILogger logger; + // Semaphore to limit the number of concurrent calls to pypi.org + private readonly SemaphoreSlim semaphore; + private bool checkedMaxEntriesVariable; // retries used so far for calls to pypi.org @@ -80,6 +83,7 @@ public PyPiClient(IEnvironmentVariableService environmentVariableService, ILogge FinalCacheSize = 0, }; this.logger = logger; + this.semaphore = new SemaphoreSlim(10, 10); } public static HttpClient HttpClient { get; internal set; } = new HttpClient(HttpClientHandler); @@ -246,11 +250,13 @@ private async Task GetAndCachePyPiResponseAsync(Uri uri) return result; } + await this.semaphore.WaitAsync(); this.logger.LogInformation("Getting Python data from {Uri}", uri); using var request = new HttpRequestMessage(HttpMethod.Get, uri); request.Headers.UserAgent.Add(ProductValue); request.Headers.UserAgent.Add(CommentValue); var response = await HttpClient.SendAsync(request); + this.semaphore.Release(); // The `first - wins` response accepted into the cache. This might be different from the input if another caller wins the race. return await this.cachedResponses.GetOrCreateAsync(uri, cacheEntry => @@ -282,5 +288,6 @@ public void Dispose() this.cacheTelemetry.FinalCacheSize = this.cachedResponses.Count; this.cacheTelemetry.Dispose(); this.cachedResponses.Dispose(); + this.semaphore.Dispose(); } } diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs index 72b9bb0e7..8696761a7 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs @@ -41,6 +41,9 @@ public sealed class SimplePyPiClient : ISimplePyPiClient, IDisposable // Keep telemetry on how the cache is being used for future refinements private readonly SimplePypiCacheTelemetryRecord cacheTelemetry = new SimplePypiCacheTelemetryRecord(); + // Semaphore to limit the number of concurrent calls to pypi.org + private readonly SemaphoreSlim semaphore; + /// /// A thread safe cache implementation which contains a mapping of URI -> SimpleProject for simplepypi api projects /// and has a limited number of entries which will expire after the cache fills or a specified interval. @@ -62,6 +65,7 @@ public SimplePyPiClient(IEnvironmentVariableService environmentVariableService, { this.environmentVariableService = environmentVariableService; this.logger = logger; + this.semaphore = new SemaphoreSlim(10, 10); } public static HttpClient HttpClient { get; internal set; } = new HttpClient(HttpClientHandler); @@ -265,12 +269,14 @@ private async Task RetryPypiRequestAsync(Uri uri, PipDepend /// Returns the httpresponsemessage. private async Task GetPypiResponseAsync(Uri uri) { + await this.semaphore.WaitAsync(); this.logger.LogInformation("Getting Python data from {Uri}", uri); using var request = new HttpRequestMessage(HttpMethod.Get, uri); request.Headers.UserAgent.Add(ProductValue); request.Headers.UserAgent.Add(CommentValue); request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/vnd.pypi.simple.v1+json")); var response = await HttpClient.SendAsync(request); + this.semaphore.Release(); return response; } @@ -282,5 +288,6 @@ public void Dispose() this.cachedProjectWheelFiles.Dispose(); this.cachedSimplePyPiProjects.Dispose(); HttpClient.Dispose(); + this.semaphore.Dispose(); } } From 099a53afff8656259fbfc1c6a362be11e81c8e72 Mon Sep 17 00:00:00 2001 From: Omotola Akeredolu Date: Fri, 20 Oct 2023 09:33:00 -0700 Subject: [PATCH 2/2] Reduce number of requests to 5 --- src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs | 2 +- .../pip/SimplePypiClient.cs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs index d1c9abb1f..553a7dc58 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs @@ -83,7 +83,7 @@ public PyPiClient(IEnvironmentVariableService environmentVariableService, ILogge FinalCacheSize = 0, }; this.logger = logger; - this.semaphore = new SemaphoreSlim(10, 10); + this.semaphore = new SemaphoreSlim(5); } public static HttpClient HttpClient { get; internal set; } = new HttpClient(HttpClientHandler); diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs index 8696761a7..8e58f1544 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs @@ -65,7 +65,7 @@ public SimplePyPiClient(IEnvironmentVariableService environmentVariableService, { this.environmentVariableService = environmentVariableService; this.logger = logger; - this.semaphore = new SemaphoreSlim(10, 10); + this.semaphore = new SemaphoreSlim(5); } public static HttpClient HttpClient { get; internal set; } = new HttpClient(HttpClientHandler); @@ -287,7 +287,6 @@ public void Dispose() this.cacheTelemetry.Dispose(); this.cachedProjectWheelFiles.Dispose(); this.cachedSimplePyPiProjects.Dispose(); - HttpClient.Dispose(); this.semaphore.Dispose(); } }