11using System ;
2- using System . Collections . Concurrent ;
32using System . Collections . Generic ;
43using System . Composition ;
54using System . IO ;
1110using System . Threading . Tasks ;
1211using Microsoft . ComponentDetection . Common . Telemetry . Records ;
1312using Microsoft . ComponentDetection . Contracts ;
13+ using Microsoft . Extensions . Caching . Memory ;
1414using Newtonsoft . Json ;
1515using Polly ;
1616
@@ -31,10 +31,18 @@ public class PyPiClient : IPyPiClient
3131 [ Import ]
3232 public ILogger Logger { get ; set ; }
3333
34+ [ Import ]
35+ public IEnvironmentVariableService EnvironmentVariableService { get ; set ; }
36+
3437 private static HttpClientHandler httpClientHandler = new HttpClientHandler ( ) { CheckCertificateRevocationList = true } ;
3538
3639 internal static HttpClient HttpClient = new HttpClient ( httpClientHandler ) ;
3740
41+ // Values used for cache creation
42+ private const long CACHEINTERVALSECONDS = 60 ;
43+ private const long DEFAULTCACHEENTRIES = 128 ;
44+ private bool checkedMaxEntriesVariable = false ;
45+
3846 // time to wait before retrying a failed call to pypi.org
3947 private static readonly TimeSpan RETRYDELAY = TimeSpan . FromSeconds ( 1 ) ;
4048
@@ -45,52 +53,83 @@ public class PyPiClient : IPyPiClient
4553 private long retries = 0 ;
4654
4755 /// <summary>
48- /// This cache is used mostly for consistency, to create a unified view of Pypi response.
56+ /// A thread safe cache implementation which contains a mapping of URI -> HttpResponseMessage
57+ /// and has a limited number of entries which will expire after the cache fills or a specified interval.
4958 /// </summary>
50- private readonly ConcurrentDictionary < string , Task < HttpResponseMessage > > cachedResponses = new ConcurrentDictionary < string , Task < HttpResponseMessage > > ( ) ;
59+ private MemoryCache cachedResponses = new MemoryCache ( new MemoryCacheOptions { SizeLimit = DEFAULTCACHEENTRIES } ) ;
60+
61+ // Keep telemetry on how the cache is being used for future refinements
62+ private PypiCacheTelemetryRecord cacheTelemetry ;
63+
64+ public PyPiClient ( )
65+ {
66+ cacheTelemetry = new PypiCacheTelemetryRecord ( )
67+ {
68+ NumCacheHits = 0 ,
69+ FinalCacheSize = 0 ,
70+ } ;
71+ }
72+
73+ ~ PyPiClient ( )
74+ {
75+ cacheTelemetry . FinalCacheSize = cachedResponses . Count ;
76+ cacheTelemetry . Dispose ( ) ;
77+ }
5178
5279 /// <summary>
53- /// Returns a cached response if it exists, otherwise returns the response from Pypi REST call.
54- /// The response from Pypi is not automatically added to the cache, to allow caller to make that decision .
80+ /// Returns a cached response if it exists, otherwise returns the response from PyPi REST call.
81+ /// The response from PyPi is automatically added to the cache.
5582 /// </summary>
5683 /// <param name="uri">The REST Uri to call.</param>
57- /// <returns>The cached response or a new result from Pypi .</returns>
58- private async Task < HttpResponseMessage > GetPypiResponse ( string uri )
84+ /// <returns>The cached response or a new result from PyPi .</returns>
85+ private async Task < HttpResponseMessage > GetAndCachePyPiResponse ( string uri )
5986 {
60- if ( cachedResponses . TryGetValue ( uri , out var value ) )
87+ if ( ! checkedMaxEntriesVariable )
88+ {
89+ InitializeNonDefaultMemoryCache ( ) ;
90+ }
91+
92+ if ( cachedResponses . TryGetValue ( uri , out HttpResponseMessage result ) )
6193 {
62- return await value ;
94+ cacheTelemetry . NumCacheHits ++ ;
95+ Logger . LogVerbose ( "Retrieved cached Python data from " + uri ) ;
96+ return result ;
6397 }
6498
6599 Logger . LogInfo ( "Getting Python data from " + uri ) ;
66- return await HttpClient . GetAsync ( uri ) ;
100+ var response = await HttpClient . GetAsync ( uri ) ;
101+
102+ // The `first - wins` response accepted into the cache. This might be different from the input if another caller wins the race.
103+ return await cachedResponses . GetOrCreateAsync ( uri , cacheEntry =>
104+ {
105+ cacheEntry . SlidingExpiration = TimeSpan . FromSeconds ( CACHEINTERVALSECONDS ) ; // This entry will expire after CACHEINTERVALSECONDS seconds from last use
106+ cacheEntry . Size = 1 ; // Specify a size of 1 so a set number of entries can always be in the cache
107+ return Task . FromResult ( response ) ;
108+ } ) ;
67109 }
68110
69111 /// <summary>
70- /// Used to update the consistency cache, decision has to be made by the caller to allow for retries!.
112+ /// On the initial caching attempt, see if the user specified an override for
113+ /// PyPiMaxCacheEntries and recreate the cache if needed.
71114 /// </summary>
72- /// <param name="uri">The REST Uri to call.</param>
73- /// <param name="message">The proposed response by the caller to store for this Uri.</param>
74- /// <returns>The `first-wins` response accepted into the cache.
75- /// This might be different from the input if another caller wins the race!.</returns>
76- private async Task < HttpResponseMessage > CachePypiResponse ( string uri , HttpResponseMessage message )
115+ private void InitializeNonDefaultMemoryCache ( )
77116 {
78- if ( ! cachedResponses . TryAdd ( uri , Task . FromResult ( message ) ) )
117+ var maxEntriesVariable = EnvironmentVariableService . GetEnvironmentVariable ( "PyPiMaxCacheEntries" ) ;
118+ if ( ! string . IsNullOrEmpty ( maxEntriesVariable ) && long . TryParse ( maxEntriesVariable , out var maxEntries ) )
79119 {
80- return await cachedResponses [ uri ] ;
120+ Logger . LogInfo ( $ "Setting IPyPiClient max cache entries to { maxEntries } ") ;
121+ cachedResponses = new MemoryCache ( new MemoryCacheOptions { SizeLimit = maxEntries } ) ;
81122 }
82123
83- return message ;
124+ checkedMaxEntriesVariable = true ;
84125 }
85126
86127 public async Task < IList < PipDependencySpecification > > FetchPackageDependencies ( string name , string version , PythonProjectRelease release )
87128 {
88129 var dependencies = new List < PipDependencySpecification > ( ) ;
89130
90131 var uri = release . Url . ToString ( ) ;
91- var response = await GetPypiResponse ( uri ) ;
92-
93- response = await CachePypiResponse ( uri , response ) ;
132+ var response = await GetAndCachePyPiResponse ( uri ) ;
94133
95134 if ( ! response . IsSuccessStatusCode )
96135 {
@@ -169,11 +208,9 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele
169208 return Task . FromResult < HttpResponseMessage > ( null ) ;
170209 }
171210
172- return GetPypiResponse ( requestUri ) ;
211+ return GetAndCachePyPiResponse ( requestUri ) ;
173212 } ) ;
174213
175- request = await CachePypiResponse ( requestUri , request ) ;
176-
177214 if ( request == null )
178215 {
179216 using var r = new PypiMaxRetriesReachedTelemetryRecord { Name = spec . Name , DependencySpecifiers = spec . DependencySpecifiers ? . ToArray ( ) } ;
0 commit comments