diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs index 49a5cc2db..553a7dc58 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs @@ -54,6 +54,9 @@ public sealed class PyPiClient : IPyPiClient, IDisposable private readonly IEnvironmentVariableService environmentVariableService; private readonly ILogger logger; + // Semaphore to limit the number of concurrent calls to pypi.org + private readonly SemaphoreSlim semaphore; + private bool checkedMaxEntriesVariable; // retries used so far for calls to pypi.org @@ -80,6 +83,7 @@ public PyPiClient(IEnvironmentVariableService environmentVariableService, ILogge FinalCacheSize = 0, }; this.logger = logger; + this.semaphore = new SemaphoreSlim(5); } public static HttpClient HttpClient { get; internal set; } = new HttpClient(HttpClientHandler); @@ -246,11 +250,13 @@ private async Task GetAndCachePyPiResponseAsync(Uri uri) return result; } + await this.semaphore.WaitAsync(); this.logger.LogInformation("Getting Python data from {Uri}", uri); using var request = new HttpRequestMessage(HttpMethod.Get, uri); request.Headers.UserAgent.Add(ProductValue); request.Headers.UserAgent.Add(CommentValue); var response = await HttpClient.SendAsync(request); + this.semaphore.Release(); // The `first - wins` response accepted into the cache. This might be different from the input if another caller wins the race. return await this.cachedResponses.GetOrCreateAsync(uri, cacheEntry => @@ -282,5 +288,6 @@ public void Dispose() this.cacheTelemetry.FinalCacheSize = this.cachedResponses.Count; this.cacheTelemetry.Dispose(); this.cachedResponses.Dispose(); + this.semaphore.Dispose(); } } diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs index 72b9bb0e7..8e58f1544 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/SimplePypiClient.cs @@ -41,6 +41,9 @@ public sealed class SimplePyPiClient : ISimplePyPiClient, IDisposable // Keep telemetry on how the cache is being used for future refinements private readonly SimplePypiCacheTelemetryRecord cacheTelemetry = new SimplePypiCacheTelemetryRecord(); + // Semaphore to limit the number of concurrent calls to pypi.org + private readonly SemaphoreSlim semaphore; + /// /// A thread safe cache implementation which contains a mapping of URI -> SimpleProject for simplepypi api projects /// and has a limited number of entries which will expire after the cache fills or a specified interval. @@ -62,6 +65,7 @@ public SimplePyPiClient(IEnvironmentVariableService environmentVariableService, { this.environmentVariableService = environmentVariableService; this.logger = logger; + this.semaphore = new SemaphoreSlim(5); } public static HttpClient HttpClient { get; internal set; } = new HttpClient(HttpClientHandler); @@ -265,12 +269,14 @@ private async Task RetryPypiRequestAsync(Uri uri, PipDepend /// Returns the httpresponsemessage. private async Task GetPypiResponseAsync(Uri uri) { + await this.semaphore.WaitAsync(); this.logger.LogInformation("Getting Python data from {Uri}", uri); using var request = new HttpRequestMessage(HttpMethod.Get, uri); request.Headers.UserAgent.Add(ProductValue); request.Headers.UserAgent.Add(CommentValue); request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/vnd.pypi.simple.v1+json")); var response = await HttpClient.SendAsync(request); + this.semaphore.Release(); return response; } @@ -281,6 +287,6 @@ public void Dispose() this.cacheTelemetry.Dispose(); this.cachedProjectWheelFiles.Dispose(); this.cachedSimplePyPiProjects.Dispose(); - HttpClient.Dispose(); + this.semaphore.Dispose(); } }