Skip to content

Commit ee44b89

Browse files
authored
Migrate IPyPiClient cache to LRU MemoryCache (#80)
* Migrate IPyPiClient cache to LRU MemoryCache * Update test formatting * Update Caching.Memory to 3.1.23 * Address PR comments * StyleCop
1 parent 2d5a418 commit ee44b89

8 files changed

Lines changed: 235 additions & 30 deletions

File tree

Directory.Packages.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
<PackageVersion Include="FluentAssertions" Version="6.1.0"/>
1515
<PackageVersion Include="Microsoft.AspNet.WebApi.Client" Version="5.2.7"/>
1616
<PackageVersion Include="Microsoft.CodeAnalysis.FxCopAnalyzers" Version="3.3.0"/>
17+
<PackageVersion Include="Microsoft.Extensions.Caching.Memory" Version="3.1.23" />
1718
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="16.9.4"/>
1819
<PackageVersion Include="Microsoft.SourceLink.GitHub" Version="1.0.0"/>
1920
<PackageVersion Include="DotNet.Glob" Version="2.1.1"/>

src/Microsoft.ComponentDetection.Common/EnvironmentVariableService.cs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,20 @@ namespace Microsoft.ComponentDetection.Common
99
public class EnvironmentVariableService : IEnvironmentVariableService
1010
{
1111
public bool DoesEnvironmentVariableExist(string name)
12+
{
13+
return GetEnvironmentVariable(name) != null;
14+
}
15+
16+
public string GetEnvironmentVariable(string name)
1217
{
1318
// Environment variables are case-insensitive on Windows, and case-sensitive on
1419
// Linux and MacOS.
1520
// https://docs.microsoft.com/en-us/dotnet/api/system.environment.getenvironmentvariable
16-
return Environment.GetEnvironmentVariables().Keys
21+
var caseInsensitiveName = Environment.GetEnvironmentVariables().Keys
1722
.OfType<string>()
18-
.FirstOrDefault(x => string.Compare(x, name, true) == 0) != null;
23+
.FirstOrDefault(x => string.Compare(x, name, true) == 0);
24+
25+
return caseInsensitiveName != null ? Environment.GetEnvironmentVariable(caseInsensitiveName) : null;
1926
}
2027
}
2128
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
using System.Net;
2+
3+
namespace Microsoft.ComponentDetection.Common.Telemetry.Records
4+
{
5+
public class PypiCacheTelemetryRecord : BaseDetectionTelemetryRecord
6+
{
7+
public override string RecordName => "PyPiCache";
8+
9+
/// <summary>
10+
/// Gets or sets total number of PyPi requests that hit the cache instead of PyPi APIs.
11+
/// </summary>
12+
public int NumCacheHits { get; set; }
13+
14+
/// <summary>
15+
/// Gets or sets the size of the PyPi cache at class destruction.
16+
/// </summary>
17+
public int FinalCacheSize { get; set; }
18+
}
19+
}

src/Microsoft.ComponentDetection.Contracts/IEnvironmentVariableService.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@ namespace Microsoft.ComponentDetection.Contracts
33
public interface IEnvironmentVariableService
44
{
55
bool DoesEnvironmentVariableExist(string name);
6+
7+
string GetEnvironmentVariable(string name);
68
}
79
}

src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
<PackageReference Include="Polly" />
1010
<PackageReference Include="Semver" />
1111
<PackageReference Include="yamldotnet" />
12+
<PackageReference Include="Microsoft.Extensions.Caching.Memory" />
1213
<PackageReference Include="Newtonsoft.Json" />
1314
<PackageReference Include="System.Composition.AttributedModel" />
1415
<PackageReference Include="System.Composition.Convention" />

src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using System;
2-
using System.Collections.Concurrent;
32
using System.Collections.Generic;
43
using System.Composition;
54
using System.IO;
@@ -11,6 +10,7 @@
1110
using System.Threading.Tasks;
1211
using Microsoft.ComponentDetection.Common.Telemetry.Records;
1312
using Microsoft.ComponentDetection.Contracts;
13+
using Microsoft.Extensions.Caching.Memory;
1414
using Newtonsoft.Json;
1515
using Polly;
1616

@@ -31,10 +31,18 @@ public class PyPiClient : IPyPiClient
3131
[Import]
3232
public ILogger Logger { get; set; }
3333

34+
[Import]
35+
public IEnvironmentVariableService EnvironmentVariableService { get; set; }
36+
3437
private static HttpClientHandler httpClientHandler = new HttpClientHandler() { CheckCertificateRevocationList = true };
3538

3639
internal static HttpClient HttpClient = new HttpClient(httpClientHandler);
3740

41+
// Values used for cache creation
42+
private const long CACHEINTERVALSECONDS = 60;
43+
private const long DEFAULTCACHEENTRIES = 128;
44+
private bool checkedMaxEntriesVariable = false;
45+
3846
// time to wait before retrying a failed call to pypi.org
3947
private static readonly TimeSpan RETRYDELAY = TimeSpan.FromSeconds(1);
4048

@@ -45,52 +53,83 @@ public class PyPiClient : IPyPiClient
4553
private long retries = 0;
4654

4755
/// <summary>
48-
/// This cache is used mostly for consistency, to create a unified view of Pypi response.
56+
/// A thread safe cache implementation which contains a mapping of URI -> HttpResponseMessage
57+
/// and has a limited number of entries which will expire after the cache fills or a specified interval.
4958
/// </summary>
50-
private readonly ConcurrentDictionary<string, Task<HttpResponseMessage>> cachedResponses = new ConcurrentDictionary<string, Task<HttpResponseMessage>>();
59+
private MemoryCache cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = DEFAULTCACHEENTRIES });
60+
61+
// Keep telemetry on how the cache is being used for future refinements
62+
private PypiCacheTelemetryRecord cacheTelemetry;
63+
64+
public PyPiClient()
65+
{
66+
cacheTelemetry = new PypiCacheTelemetryRecord()
67+
{
68+
NumCacheHits = 0,
69+
FinalCacheSize = 0,
70+
};
71+
}
72+
73+
~PyPiClient()
74+
{
75+
cacheTelemetry.FinalCacheSize = cachedResponses.Count;
76+
cacheTelemetry.Dispose();
77+
}
5178

5279
/// <summary>
53-
/// Returns a cached response if it exists, otherwise returns the response from Pypi REST call.
54-
/// The response from Pypi is not automatically added to the cache, to allow caller to make that decision.
80+
/// Returns a cached response if it exists, otherwise returns the response from PyPi REST call.
81+
/// The response from PyPi is automatically added to the cache.
5582
/// </summary>
5683
/// <param name="uri">The REST Uri to call.</param>
57-
/// <returns>The cached response or a new result from Pypi.</returns>
58-
private async Task<HttpResponseMessage> GetPypiResponse(string uri)
84+
/// <returns>The cached response or a new result from PyPi.</returns>
85+
private async Task<HttpResponseMessage> GetAndCachePyPiResponse(string uri)
5986
{
60-
if (cachedResponses.TryGetValue(uri, out var value))
87+
if (!checkedMaxEntriesVariable)
88+
{
89+
InitializeNonDefaultMemoryCache();
90+
}
91+
92+
if (cachedResponses.TryGetValue(uri, out HttpResponseMessage result))
6193
{
62-
return await value;
94+
cacheTelemetry.NumCacheHits++;
95+
Logger.LogVerbose("Retrieved cached Python data from " + uri);
96+
return result;
6397
}
6498

6599
Logger.LogInfo("Getting Python data from " + uri);
66-
return await HttpClient.GetAsync(uri);
100+
var response = await HttpClient.GetAsync(uri);
101+
102+
// The `first - wins` response accepted into the cache. This might be different from the input if another caller wins the race.
103+
return await cachedResponses.GetOrCreateAsync(uri, cacheEntry =>
104+
{
105+
cacheEntry.SlidingExpiration = TimeSpan.FromSeconds(CACHEINTERVALSECONDS); // This entry will expire after CACHEINTERVALSECONDS seconds from last use
106+
cacheEntry.Size = 1; // Specify a size of 1 so a set number of entries can always be in the cache
107+
return Task.FromResult(response);
108+
});
67109
}
68110

69111
/// <summary>
70-
/// Used to update the consistency cache, decision has to be made by the caller to allow for retries!.
112+
/// On the initial caching attempt, see if the user specified an override for
113+
/// PyPiMaxCacheEntries and recreate the cache if needed.
71114
/// </summary>
72-
/// <param name="uri">The REST Uri to call.</param>
73-
/// <param name="message">The proposed response by the caller to store for this Uri.</param>
74-
/// <returns>The `first-wins` response accepted into the cache.
75-
/// This might be different from the input if another caller wins the race!.</returns>
76-
private async Task<HttpResponseMessage> CachePypiResponse(string uri, HttpResponseMessage message)
115+
private void InitializeNonDefaultMemoryCache()
77116
{
78-
if (!cachedResponses.TryAdd(uri, Task.FromResult(message)))
117+
var maxEntriesVariable = EnvironmentVariableService.GetEnvironmentVariable("PyPiMaxCacheEntries");
118+
if (!string.IsNullOrEmpty(maxEntriesVariable) && long.TryParse(maxEntriesVariable, out var maxEntries))
79119
{
80-
return await cachedResponses[uri];
120+
Logger.LogInfo($"Setting IPyPiClient max cache entries to {maxEntries}");
121+
cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = maxEntries });
81122
}
82123

83-
return message;
124+
checkedMaxEntriesVariable = true;
84125
}
85126

86127
public async Task<IList<PipDependencySpecification>> FetchPackageDependencies(string name, string version, PythonProjectRelease release)
87128
{
88129
var dependencies = new List<PipDependencySpecification>();
89130

90131
var uri = release.Url.ToString();
91-
var response = await GetPypiResponse(uri);
92-
93-
response = await CachePypiResponse(uri, response);
132+
var response = await GetAndCachePyPiResponse(uri);
94133

95134
if (!response.IsSuccessStatusCode)
96135
{
@@ -169,11 +208,9 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele
169208
return Task.FromResult<HttpResponseMessage>(null);
170209
}
171210

172-
return GetPypiResponse(requestUri);
211+
return GetAndCachePyPiResponse(requestUri);
173212
});
174213

175-
request = await CachePypiResponse(requestUri, request);
176-
177214
if (request == null)
178215
{
179216
using var r = new PypiMaxRetriesReachedTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray() };

test/Microsoft.ComponentDetection.Common.Tests/BaseDetectionTelemetryRecordTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ public void SerializableProperties()
5959
typeof(string),
6060
typeof(string[]),
6161
typeof(bool),
62+
typeof(int),
6263
typeof(int?),
6364
typeof(TimeSpan?),
6465
typeof(HttpStatusCode),

0 commit comments

Comments
 (0)