Skip to content

Commit

Permalink
Refactor ExtractArchive into a helper class, and a few other changes. (
Browse files Browse the repository at this point in the history
…microsoft#324)

* Refactor the ExtractArchive logic to a helper instead of being in BaseProjectManager.

* Rename PackageManagerFactory to ProjectManagerFactory, and remove the static constructors.

* Encode the mutated names in mutations to make sure there's no HTTP encoding issues.

* Update PackageDownloader exception message.

* Change PackageMetadata.User to be a record instead of class to help with json parsing.

* Make BaseProjectManager abstract as we never want just a BaseProjectManager.

* Add 'Type' as a property in every project manager that maps the manager to the type defined in the package-url spec. (Doesn't get used in this PR though).

* Add includePrerelease to BaseProjectManager.EnumerateVersions to filter out pre-release/beta versions if they aren't wanted. Defaults to true, so there are no changes from what is currently in production.

* Make BaseProjectManager.GetPackageMetadata return a nullable PackageMetadata object.

* Fix URLProjectManager.EnumerateVersions to remove the need for the disabling CS1998.

* Fix issues with download not going to the specified path if it wasn't being extracted. So the default oss-download would always download the artifact to "." For every package manager except Cargo and GitHub.
Also fixed pypi's download to include the file extension, which it wasn't doing.

* Add a unit test to make sure no regressions occur with the URL encoding of mutated names.

* Remove public const string Type from BaseProjectManager as it never gets used anyways.
Just implemented on the individual BaseProjectManager implementations.

* Create a unit test to test ArchiveHelper.ExtractArchiveAsync.

* Add a non-static abstract string ManagerType to BaseProjectManager to get the package-url spec type name in a non-static context.
  • Loading branch information
jpinz authored Apr 6, 2022
1 parent 0abadd5 commit 96ac5f1
Show file tree
Hide file tree
Showing 26 changed files with 453 additions and 206 deletions.
2 changes: 1 addition & 1 deletion src/Shared.CLI/Helpers/PackageDownloader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public PackageDownloader(PackageURL purl, IHttpClientFactory? httpClientFactory,
if (packageManager == null)
{
// Cannot continue without a package manager.
throw new ArgumentException("Invalid Package URL type: {0}", purl.Type);
throw new ArgumentException($"Invalid Package URL type: {purl.Type}", nameof(purl.Type));
}
PackageVersions = new List<PackageURL>();
if (purl.Version == null || purl.Version.Equals("*"))
Expand Down
74 changes: 74 additions & 0 deletions src/Shared/Helpers/ArchiveHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (c) Microsoft Corporation. Licensed under the MIT License.

namespace Microsoft.CST.OpenSource.Helpers;

using RecursiveExtractor;
using System;
using System.IO;
using System.Text;
using System.Threading.Tasks;

public static class ArchiveHelper
{
/// <summary>
/// Logger for each of the subclasses
/// </summary>
static readonly NLog.Logger Logger = NLog.LogManager.GetCurrentClassLogger();

/// <summary>
/// Extracts an archive (given by 'bytes') into a directory named 'directoryName',
/// recursively, using RecursiveExtractor.
/// </summary>
/// <param name="topLevelDirectory">The top level directory content should be extracted to.</param>
/// <param name="directoryName">directory to extract content into (within <paramref name="topLevelDirectory"/>)</param>
/// <param name="content">stream of the contents to extract (should be an archive file)</param>
/// <param name="cached">If the archive has been cached.</param>
/// <returns>The path that the archive was extracted to.</returns>
public static async Task<string> ExtractArchiveAsync(
string topLevelDirectory,
string directoryName,
Stream content,
bool cached = false)
{
Logger.Trace("ExtractArchive({0}, <stream> len={1})", directoryName, content.Length);

Directory.CreateDirectory(topLevelDirectory);

StringBuilder dirBuilder = new(directoryName);

foreach (char c in Path.GetInvalidPathChars())
{
dirBuilder.Replace(c, '-'); // ignore: lgtm [cs/string-concatenation-in-loop]
}

string fullTargetPath = Path.Combine(topLevelDirectory, dirBuilder.ToString());

if (!cached)
{
while (Directory.Exists(fullTargetPath) || File.Exists(fullTargetPath))
{
dirBuilder.Append("-" + DateTime.Now.Ticks);
fullTargetPath = Path.Combine(topLevelDirectory, dirBuilder.ToString());
}
}

Extractor extractor = new();
ExtractorOptions extractorOptions = new()
{
ExtractSelfOnFail = true, Parallel = true
// MaxExtractedBytes = 1000 * 1000 * 10; // 10 MB maximum package size
};
ExtractionStatusCode result = await extractor.ExtractToDirectoryAsync(topLevelDirectory, dirBuilder.ToString(),
content, extractorOptions);
if (result == ExtractionStatusCode.Ok)
{
Logger.Debug("Archive extracted to {0}", fullTargetPath);
}
else
{
Logger.Warn("Error extracting archive {0} ({1})", fullTargetPath, result);
}

return fullTargetPath;
}
}
12 changes: 6 additions & 6 deletions src/Shared/Model/PackageMetadata.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,22 +161,22 @@ public override string ToString()
}
}

public class User
public record User
{
[JsonProperty(PropertyName = "active_flag", NullValueHandling = NullValueHandling.Ignore)]
public bool? Active { get; set; }
public bool? Active { get; init; }

[JsonProperty(PropertyName = "email", NullValueHandling = NullValueHandling.Ignore)]
public string? Email { get; set; }
public string? Email { get; init; }

[JsonProperty(PropertyName = "Id", NullValueHandling = NullValueHandling.Ignore)]
public int? Id { get; set; }
public int? Id { get; init; }

[JsonProperty(PropertyName = "name", NullValueHandling = NullValueHandling.Ignore)]
public string? Name { get; set; }
public string? Name { get; init; }

[JsonProperty(PropertyName = "url", NullValueHandling = NullValueHandling.Ignore)]
public string? Url { get; set; }
public string? Url { get; init; }
}

public class Version
Expand Down
82 changes: 19 additions & 63 deletions src/Shared/PackageManagers/BaseProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,39 @@

namespace Microsoft.CST.OpenSource.PackageManagers
{
using Microsoft.CST.RecursiveExtractor;
using Microsoft.Extensions.Caching.Memory;
using Microsoft.CST.OpenSource.Model;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Utilities;
using Version = SemanticVersioning.Version;
using PackageUrl;

public class BaseProjectManager
public abstract class BaseProjectManager
{
/// <summary>
/// The type of the project manager from the package-url type specifications.
/// </summary>
/// <remarks>This differs from the Type property defined in other ProjectManagers as this one isn't static.</remarks>
/// <seealso href="https://www.github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst"/>
public abstract string ManagerType { get; }

/// <summary>
/// Initializes a new instance of the <see cref="BaseProjectManager"/> class.
/// </summary>
public BaseProjectManager(IHttpClientFactory httpClientFactory, string destinationDirectory)
public BaseProjectManager(IHttpClientFactory httpClientFactory, string destinationDirectory = ".")
{
Options = new Dictionary<string, object>();
TopLevelExtractionDirectory = destinationDirectory;
HttpClientFactory = httpClientFactory;
}

public BaseProjectManager(string destinationDirectory) : this(new DefaultHttpClientFactory(), destinationDirectory)
public BaseProjectManager(string destinationDirectory = ".") : this(new DefaultHttpClientFactory(), destinationDirectory)
{
}

Expand All @@ -42,7 +46,7 @@ public BaseProjectManager(string destinationDirectory) : this(new DefaultHttpCli
/// <summary>
/// The location (directory) to extract files to.
/// </summary>
public string TopLevelExtractionDirectory { get; set; } = ".";
public string TopLevelExtractionDirectory { get; init; }

/// <summary>
/// The <see cref="IHttpClientFactory"/> for the manager.
Expand Down Expand Up @@ -279,63 +283,13 @@ public virtual Task<IEnumerable<string>> DownloadVersion(PackageURL purl, bool d
/// <remarks>The latest version is always first, then it is sorted by SemVer in descending order.</remarks>
/// <param name="purl">Package URL specifying the package. Version is ignored.</param>
/// <param name="useCache">If the cache should be used when looking for the versions.</param>
/// <param name="includePrerelease">If pre-release versions should be included.</param>
/// <returns> A list of package version numbers.</returns>
public virtual Task<IEnumerable<string>> EnumerateVersions(PackageURL purl, bool useCache = true)
public virtual Task<IEnumerable<string>> EnumerateVersions(PackageURL purl, bool useCache = true, bool includePrerelease = true)
{
throw new NotImplementedException("BaseProjectManager does not implement EnumerateVersions.");
}

/// <summary>
/// Extracts an archive (given by 'bytes') into a directory named 'directoryName',
/// recursively, using RecursiveExtractor.
/// </summary>
/// <param name="directoryName">directory to extract content into (within TopLevelExtractionDirectory)</param>
/// <param name="bytes">bytes to extract (should be an archive file)</param>
/// <param name="cached">If the archive has been cached.</param>
/// <returns></returns>
public async Task<string> ExtractArchive(string directoryName, byte[] bytes, bool cached = false)
{
Logger.Trace("ExtractArchive({0}, <bytes> len={1})", directoryName, bytes.Length);

Directory.CreateDirectory(TopLevelExtractionDirectory);

StringBuilder dirBuilder = new(directoryName);

foreach (char c in Path.GetInvalidPathChars())
{
dirBuilder.Replace(c, '-'); // ignore: lgtm [cs/string-concatenation-in-loop]
}

string fullTargetPath = Path.Combine(TopLevelExtractionDirectory, dirBuilder.ToString());

if (!cached)
{
while (Directory.Exists(fullTargetPath) || File.Exists(fullTargetPath))
{
dirBuilder.Append("-" + DateTime.Now.Ticks);
fullTargetPath = Path.Combine(TopLevelExtractionDirectory, dirBuilder.ToString());
}
}
Extractor extractor = new();
ExtractorOptions extractorOptions = new()
{
ExtractSelfOnFail = true,
Parallel = true
// MaxExtractedBytes = 1000 * 1000 * 10; // 10 MB maximum package size
};
ExtractionStatusCode result = await extractor.ExtractToDirectoryAsync(TopLevelExtractionDirectory, dirBuilder.ToString(), new MemoryStream(bytes), extractorOptions);
if (result == ExtractionStatusCode.Ok)
{
Logger.Debug("Archive extracted to {0}", fullTargetPath);
}
else
{
Logger.Warn("Error extracting archive {0} ({1})", fullTargetPath, result);
}

return fullTargetPath;
}

/// <summary>
/// Gets the latest version from the package metadata.
/// </summary>
Expand Down Expand Up @@ -383,9 +337,10 @@ public virtual async Task<bool> PackageExists(PackageURL purl, bool useCache = t
/// This method should return text reflecting metadata for the given package. There is no
/// assumed format.
/// </summary>
/// <param name="purl">PackageURL to search.</param>
/// <param name="purl">The <see cref="PackageURL"/> to get the metadata for.</param>
/// <param name="useCache">If the metadata should be retrieved from the cache, if it is available.</param>
/// <returns>a string containing metadata.</returns>
/// <remarks>If no version specified, defaults to latest version.</remarks>
/// <returns>A string representing the <see cref="PackageURL"/>'s metadata, or null if it wasn't found.</returns>
public virtual Task<string?> GetMetadata(PackageURL purl, bool useCache = true)
{
throw new NotImplementedException($"{GetType().Name} does not implement GetMetadata.");
Expand All @@ -406,8 +361,9 @@ public virtual async Task<bool> PackageExists(PackageURL purl, bool useCache = t
/// </summary>
/// <param name="purl">The <see cref="PackageURL"/> to get the normalized metadata for.</param>
/// <param name="useCache">If the <see cref="PackageMetadata"/> should be retrieved from the cache, if it is available.</param>
/// <returns>A <see cref="GetPackageMetadata"/> object representing this <see cref="PackageURL"/>.</returns>
public virtual Task<PackageMetadata> GetPackageMetadata(PackageURL purl, bool useCache = true)
/// <remarks>If no version specified, defaults to latest version.</remarks>
/// <returns>A <see cref="PackageMetadata"/> object representing this <see cref="PackageURL"/>.</returns>
public virtual Task<PackageMetadata?> GetPackageMetadata(PackageURL purl, bool useCache = true)
{
string typeName = GetType().Name;
throw new NotImplementedException($"{typeName} does not implement GetPackageMetadata.");
Expand Down
19 changes: 14 additions & 5 deletions src/Shared/PackageManagers/CPANProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Microsoft.CST.OpenSource.PackageManagers
{
using AngleSharp.Html.Parser;
using Helpers;
using PackageUrl;
using System;
using System.Collections.Generic;
Expand All @@ -14,6 +15,14 @@ namespace Microsoft.CST.OpenSource.PackageManagers

internal class CPANProjectManager : BaseProjectManager
{
/// <summary>
/// The type of the project manager from the package-url type specifications.
/// </summary>
/// <seealso href="https://www.github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst"/>
public const string Type = "cpan";

public override string ManagerType => Type;

[System.Diagnostics.CodeAnalysis.SuppressMessage("Style", "IDE0044:Add readonly modifier", Justification = "Modified through reflection.")]
public static string ENV_CPAN_BINARY_ENDPOINT = "https://cpan.metacpan.org";

Expand Down Expand Up @@ -120,21 +129,21 @@ public override async Task<IEnumerable<string>> DownloadVersion(PackageURL purl,
}
if (doExtract)
{
downloadedPaths.Add(await ExtractArchive(targetName, await result.Content.ReadAsByteArrayAsync(), cached));
downloadedPaths.Add(await ArchiveHelper.ExtractArchiveAsync(TopLevelExtractionDirectory, targetName, await result.Content.ReadAsStreamAsync(), cached));
}
else
{
targetName += Path.GetExtension(binaryUrl) ?? "";
await File.WriteAllBytesAsync(targetName, await result.Content.ReadAsByteArrayAsync());
downloadedPaths.Add(targetName);
extractionPath += Path.GetExtension(binaryUrl) ?? "";
await File.WriteAllBytesAsync(extractionPath, await result.Content.ReadAsByteArrayAsync());
downloadedPaths.Add(extractionPath);
}
break;
}
return downloadedPaths;
}

/// <inheritdoc />
public override async Task<IEnumerable<string>> EnumerateVersions(PackageURL purl, bool useCache = true)
public override async Task<IEnumerable<string>> EnumerateVersions(PackageURL purl, bool useCache = true, bool includePrerelease = true)
{
Logger.Trace("EnumerateVersions {0}", purl?.ToString());
if (purl == null || purl.Name is null)
Expand Down
28 changes: 19 additions & 9 deletions src/Shared/PackageManagers/CRANProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Microsoft.CST.OpenSource.PackageManagers
{
using AngleSharp.Html.Parser;
using Helpers;
using PackageUrl;
using System;
using System.Collections.Generic;
Expand All @@ -13,6 +14,14 @@ namespace Microsoft.CST.OpenSource.PackageManagers

internal class CRANProjectManager : BaseProjectManager
{
/// <summary>
/// The type of the project manager from the package-url type specifications.
/// </summary>
/// <seealso href="https://www.github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst"/>
public const string Type = "cran";

public override string ManagerType => Type;

[System.Diagnostics.CodeAnalysis.SuppressMessage("Style", "IDE0044:Add readonly modifier", Justification = "Modified through reflection.")]
public static string ENV_CRAN_ENDPOINT = "https://cran.r-project.org";

Expand Down Expand Up @@ -62,13 +71,13 @@ public override async Task<IEnumerable<string>> DownloadVersion(PackageURL purl,
}
if (doExtract)
{
downloadedPaths.Add(await ExtractArchive(targetName, await result.Content.ReadAsByteArrayAsync(), cached));
downloadedPaths.Add(await ArchiveHelper.ExtractArchiveAsync(TopLevelExtractionDirectory, targetName, await result.Content.ReadAsStreamAsync(), cached));
}
else
{
targetName += Path.GetExtension(url) ?? "";
await File.WriteAllBytesAsync(targetName, await result.Content.ReadAsByteArrayAsync());
downloadedPaths.Add(targetName);
extractionPath += Path.GetExtension(url) ?? "";
await File.WriteAllBytesAsync(extractionPath, await result.Content.ReadAsByteArrayAsync());
downloadedPaths.Add(extractionPath);
}
}
catch (Exception ex)
Expand All @@ -89,15 +98,16 @@ public override async Task<IEnumerable<string>> DownloadVersion(PackageURL purl,
Logger.Debug("Downloading {0}...", purl);

string targetName = $"cran-{packageName}@{packageVersion}";
string extractionPath = Path.Combine(TopLevelExtractionDirectory, targetName);
if (doExtract)
{
downloadedPaths.Add(await ExtractArchive(targetName, await result.Content.ReadAsByteArrayAsync(), cached));
downloadedPaths.Add(await ArchiveHelper.ExtractArchiveAsync(TopLevelExtractionDirectory, targetName, await result.Content.ReadAsStreamAsync(), cached));
}
else
{
targetName += Path.GetExtension(url) ?? "";
await File.WriteAllBytesAsync(targetName, await result.Content.ReadAsByteArrayAsync());
downloadedPaths.Add(targetName);
extractionPath += Path.GetExtension(url) ?? "";
await File.WriteAllBytesAsync(extractionPath, await result.Content.ReadAsByteArrayAsync());
downloadedPaths.Add(extractionPath);
}
}
catch (Exception ex)
Expand All @@ -108,7 +118,7 @@ public override async Task<IEnumerable<string>> DownloadVersion(PackageURL purl,
}

/// <inheritdoc />
public override async Task<IEnumerable<string>> EnumerateVersions(PackageURL purl, bool useCache = true)
public override async Task<IEnumerable<string>> EnumerateVersions(PackageURL purl, bool useCache = true, bool includePrerelease = true)
{
Logger.Trace("EnumerateVersions {0}", purl?.ToString());
if (purl == null || purl.Name is null)
Expand Down
Loading

0 comments on commit 96ac5f1

Please sign in to comment.