Skip to content

Commit

Permalink
Cargo package existence and package version existence logic update. (m…
Browse files Browse the repository at this point in the history
…icrosoft#345)

* Start the process of updating the logic for CargoProjectManager to use the github repo for the cargo index vs their API.
Still todo: the contents of the file isn't valid json.

* Update cargo to use the suggested solution from crates.io for crawling.
The crates.io api always returns 200 even if a package/version doesn't exist, so it didn't work as expected currently anyways.

* Switch JsonParsingOption.Unknown to None. And switched the nullable jsonParsingOption to use None instead of it being nullable.
  • Loading branch information
jpinz authored Aug 22, 2022
1 parent dc63339 commit 52f146e
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 12 deletions.
20 changes: 20 additions & 0 deletions src/Shared/Model/Enums/JsonParsingOption.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.

namespace Microsoft.CST.OpenSource.Model.Enums;

/// <summary>
/// Special cases used for json parsing.
/// </summary>
public enum JsonParsingOption
{
/// <summary>
/// No specific option.
/// </summary>
None = 0,

/// <summary>
/// Used in Cargo only right now as their files aren't formatted correctly for json.
/// <example>https://raw.githubusercontent.com/rust-lang/crates.io-index/master/ra/nd/rand</example>
/// </summary>
NotInArrayNotCsv,
}
28 changes: 24 additions & 4 deletions src/Shared/PackageManagers/BaseProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ namespace Microsoft.CST.OpenSource.PackageManagers
using Helpers;
using Microsoft.Extensions.Caching.Memory;
using Microsoft.CST.OpenSource.Model;
using Model.Enums;
using System;
using System.Collections.Generic;
using System.Linq;
Expand Down Expand Up @@ -189,14 +190,15 @@ internal static async Task<bool> CheckHttpCacheForPackage(HttpClient client, str
/// <param name="client">The <see cref="HttpClient"/> to make the request on.</param>
/// <param name="url">The URL to check.</param>
/// <param name="useCache">If cache should be used.</param>
/// <param name="jsonParsingOption">Any special json parsing rules.</param>
/// <returns>true if the package exists.</returns>
internal static async Task<bool> CheckJsonCacheForPackage(HttpClient client, string url, bool useCache = true)
internal static async Task<bool> CheckJsonCacheForPackage(HttpClient client, string url, bool useCache = true, JsonParsingOption jsonParsingOption = JsonParsingOption.None)
{
Logger.Trace("CheckJsonCacheForPackage {0}", url);
try
{
// GetJsonCache throws an exception if it has trouble finding the package.
_ = await GetJsonCache(client, url, useCache);
_ = await GetJsonCache(client, url, useCache, jsonParsingOption);
return true;
}
catch (Exception e)
Expand All @@ -217,8 +219,9 @@ internal static async Task<bool> CheckJsonCacheForPackage(HttpClient client, str
/// <param name="client">The <see cref="HttpClient"/> to make the request on.</param>
/// <param name="uri">URI to load.</param>
/// <param name="useCache">If cache should be used. If false will make a direct WebClient request.</param>
/// <param name="jsonParsingOption">Any special json parsing rules.</param>
/// <returns>Content, as a JsonDocument, possibly from cache.</returns>
public static async Task<JsonDocument> GetJsonCache(HttpClient client, string uri, bool useCache = true)
public static async Task<JsonDocument> GetJsonCache(HttpClient client, string uri, bool useCache = true, JsonParsingOption jsonParsingOption = JsonParsingOption.None)
{
Logger.Trace("GetJsonCache({0}, {1})", uri, useCache);
if (useCache)
Expand All @@ -235,8 +238,25 @@ public static async Task<JsonDocument> GetJsonCache(HttpClient client, string ur
HttpResponseMessage result = await client.GetAsync(uri);
result.EnsureSuccessStatusCode(); // Don't cache error codes.
long contentLength = result.Content.Headers.ContentLength ?? 8192;
JsonDocument doc = await JsonDocument.ParseAsync(await result.Content.ReadAsStreamAsync());
JsonDocument doc;

switch (jsonParsingOption)
{
case JsonParsingOption.NotInArrayNotCsv:
string data = await result.Content.ReadAsStringAsync();
data = Regex.Replace(data, @"\r\n?|\n", ",");
data = $"[{data}]";

doc = JsonDocument.Parse(data, new JsonDocumentOptions()
{
AllowTrailingCommas = true,
});
break;
default:
doc = await JsonDocument.ParseAsync(await result.Content.ReadAsStreamAsync());
break;
}

if (useCache)
{
lock (DataCache)
Expand Down
53 changes: 46 additions & 7 deletions src/Shared/PackageManagers/CargoProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ namespace Microsoft.CST.OpenSource.PackageManagers
{
using Extensions;
using Helpers;
using Model.Enums;
using PackageUrl;
using System;
using System.Collections.Generic;
Expand All @@ -13,7 +14,7 @@ namespace Microsoft.CST.OpenSource.PackageManagers
using System.Text.Json;
using System.Threading.Tasks;

internal class CargoProjectManager : BaseProjectManager
public class CargoProjectManager : BaseProjectManager
{
/// <summary>
/// The type of the project manager from the package-url type specifications.
Expand All @@ -28,6 +29,9 @@ internal class CargoProjectManager : BaseProjectManager

[System.Diagnostics.CodeAnalysis.SuppressMessage("Style", "IDE0044:Add readonly modifier", Justification = "Modified through reflection.")]
public static string ENV_CARGO_ENDPOINT_STATIC = "https://static.crates.io";

[System.Diagnostics.CodeAnalysis.SuppressMessage("Style", "IDE0044:Add readonly modifier", Justification = "Modified through reflection.")]
public static string ENV_CARGO_INDEX_ENDPOINT = "https://raw.githubusercontent.com/rust-lang/crates.io-index/master";

public CargoProjectManager(IHttpClientFactory httpClientFactory, string destinationDirectory) : base(httpClientFactory, destinationDirectory)
{
Expand Down Expand Up @@ -104,7 +108,8 @@ public override async Task<bool> PackageExistsAsync(PackageURL purl, bool useCac
}
string packageName = purl.Name;
HttpClient httpClient = CreateHttpClient();
return await CheckJsonCacheForPackage(httpClient, $"{ENV_CARGO_ENDPOINT}/api/v1/crates/{packageName}", useCache);
// NOTE: The file isn't valid json, so use the custom rule.
return await CheckJsonCacheForPackage(httpClient, $"{ENV_CARGO_INDEX_ENDPOINT}/{CreatePath(packageName)}", useCache: useCache, jsonParsingOption: JsonParsingOption.NotInArrayNotCsv);
}

/// <inheritdoc />
Expand All @@ -120,11 +125,12 @@ public override async Task<IEnumerable<string>> EnumerateVersionsAsync(PackageUR
{
string? packageName = purl.Name;
HttpClient httpClient = CreateHttpClient();
JsonDocument doc = await GetJsonCache(httpClient, $"{ENV_CARGO_ENDPOINT}/api/v1/crates/{packageName}");
// NOTE: The file isn't valid json, so use the custom rule.
JsonDocument doc = await GetJsonCache(httpClient, $"{ENV_CARGO_INDEX_ENDPOINT}/{CreatePath(packageName)}", jsonParsingOption: JsonParsingOption.NotInArrayNotCsv);
List<string> versionList = new();
foreach (JsonElement versionObject in doc.RootElement.GetProperty("versions").EnumerateArray())
foreach (JsonElement versionObject in doc.RootElement.EnumerateArray())
{
if (versionObject.TryGetProperty("num", out JsonElement version))
if (versionObject.TryGetProperty("vers", out JsonElement version))
{
Logger.Debug("Identified {0} version {1}.", packageName, version.ToString());
if (version.ToString() is string s)
Expand Down Expand Up @@ -162,8 +168,41 @@ public override async Task<IEnumerable<string>> EnumerateVersionsAsync(PackageUR
public override Uri GetPackageAbsoluteUri(PackageURL purl)
{
string? packageName = purl?.Name;
return new Uri($"{ENV_CARGO_ENDPOINT}/crates/{packageName}");
// TODO: Add version support
string? packageVersion = purl?.Version;
string url = $"{ENV_CARGO_ENDPOINT}/crates/{packageName}";
if (packageVersion.IsNotBlank())
{
url += $"/{packageVersion}";
}
return new Uri(url);
}

/// <summary>
/// Helper method to create the path for the crates.io index for this package name.
/// </summary>
/// <param name="crateName">The name of this package.</param>
/// <returns>The path to the package.</returns>
/// <example>
/// rand -> ra/nd/rand<br/>
/// go -> 2/go<br/>
/// who -> 3/w/who<br/>
/// spotify-retro -> sp/ot/spotify-retro
/// </example>
public static string CreatePath(string crateName)
{
switch (crateName.Length)
{
case 0:
return string.Empty;
case 1:
return $"1/{crateName}";
case 2:
return $"2/{crateName}";
case 3:
return $"3/{crateName[0]}/{crateName}";
default:
return $"{crateName[..2]}/{crateName[2..4]}/{crateName}";
}
}
}
}
87 changes: 87 additions & 0 deletions src/oss-tests/ProjectManagerTests/CargoProjectManagerTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.

namespace Microsoft.CST.OpenSource.Tests.ProjectManagerTests
{
using Moq;
using oss;
using PackageManagers;
using PackageUrl;
using RichardSzalay.MockHttp;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading.Tasks;
using VisualStudio.TestTools.UnitTesting;

[TestClass]
public class CargoProjectManagerTests
{
private readonly IDictionary<string, string> _packages = new Dictionary<string, string>()
{
{ "https://raw.githubusercontent.com/rust-lang/crates.io-index/master/ra/nd/rand", Resources.cargo_rand },
}.ToImmutableDictionary();

private readonly CargoProjectManager _projectManager;
private readonly IHttpClientFactory _httpFactory;

public CargoProjectManagerTests()
{
Mock<IHttpClientFactory> mockFactory = new();

MockHttpMessageHandler mockHttp = new();

foreach ((string url, string json) in _packages)
{
MockHttpFetchResponse(HttpStatusCode.OK, url, json, mockHttp);
}

mockFactory.Setup(_ => _.CreateClient(It.IsAny<string>())).Returns(mockHttp.ToHttpClient());
_httpFactory = mockFactory.Object;

_projectManager = new CargoProjectManager(_httpFactory, ".");
}

[DataTestMethod]
[DataRow("pkg:cargo/[email protected]", 68, "0.8.5")]
public async Task EnumerateVersionsSucceeds(string purlString, int count, string latestVersion)
{
PackageURL purl = new(purlString);
List<string> versions = (await _projectManager.EnumerateVersionsAsync(purl, useCache: false)).ToList();

Assert.AreEqual(count, versions.Count);
Assert.AreEqual(latestVersion, versions.First());
}

[DataTestMethod]
[DataRow("pkg:cargo/[email protected]")]
public async Task PackageVersionExistsAsyncSucceeds(string purlString)
{
PackageURL purl = new(purlString);

Assert.IsTrue(await _projectManager.PackageVersionExistsAsync(purl, useCache: false));
}

[DataTestMethod]
[DataRow("pkg:cargo/[email protected]")]
public async Task PackageVersionDoesntExistsAsyncSucceeds(string purlString)
{
PackageURL purl = new(purlString);

Assert.IsFalse(await _projectManager.PackageVersionExistsAsync(purl, useCache: false));
}

private static void MockHttpFetchResponse(
HttpStatusCode statusCode,
string url,
string content,
MockHttpMessageHandler httpMock)
{
httpMock
.When(HttpMethod.Get, url)
.Respond(statusCode, "application/json", content);

}
}
}
11 changes: 10 additions & 1 deletion src/oss-tests/Properties/Resources.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions src/oss-tests/Properties/Resources.resx
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,7 @@
<data name="requests.json" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>..\TestData\PyPI\requests.json;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
<data name="cargo.rand" type="System.Resources.ResXFileRef, System.Windows.Forms">
<value>..\TestData\Cargo\rand;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</data>
</root>
Loading

0 comments on commit 52f146e

Please sign in to comment.