From a96c134b8368fbf04b56400faa91aea2892bcf74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Sharma?= Date: Mon, 21 Oct 2019 18:09:26 -0700 Subject: [PATCH] Refactor Azure Search (#393) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Part of https://github.com/loic-sharma/BaGet/issues/362 See https://github.com/loic-sharma/baget.io/pull/3 ⚠️ This change removes the Azure Search import tool as it has been moved here: https://github.com/loic-sharma/baget.io. If you'd like to use Azure Search, the new steps are: ```ps1 https://github.com/loic-sharma/baget.io cd baget.io/BaGet # Update the appsettings.json file with your Azure Search configurations dotnet run -- azure-search create dotnet run -- azure-search rebuild ``` The commands from baget.io will be ported to this project soon. --- BaGet.sln | 9 +- .../IServiceCollectionExtensions.cs | 4 +- .../Search/AzureSearchBatchIndexer.cs | 88 ++++++++++ src/BaGet.Azure/Search/AzureSearchIndexer.cs | 40 +++++ src/BaGet.Azure/Search/AzureSearchService.cs | 8 - src/BaGet.Azure/Search/BatchIndexer.cs | 152 ------------------ src/BaGet.Azure/Search/IndexActionBuilder.cs | 117 ++++++++++++++ .../Search/NewPackageRegistration.cs | 35 ++++ src/BaGet.Azure/Search/PackageDocument.cs | 17 +- .../Indexing/PackageIndexingService.cs | 4 +- src/BaGet.Core/Search/ISearchIndexer.cs | 16 ++ src/BaGet.Core/Search/ISearchService.cs | 8 - src/BaGet.Core/Search/NullSearchIndexer.cs | 16 ++ src/BaGet.Core/Search/NullSearchService.cs | 5 - .../BaGet.Tools.AzureSearchImporter.csproj | 30 ---- .../Entities/IndexerContext.cs | 30 ---- .../Entities/IndexerContextFactory.cs | 19 --- .../Entities/PackageId.cs | 11 -- .../Importer.cs | 70 -------- .../Initializer.cs | 106 ------------ .../20180415185938_Initial.Designer.cs | 44 ----- .../Migrations/20180415185938_Initial.cs | 43 ----- .../Migrations/IndexerContextModelSnapshot.cs | 43 ----- .../Program.cs | 77 --------- .../appsettings.json | 12 -- .../IServiceCollectionExtensions.cs | 20 +++ .../Services/PackageIndexingServiceTests.cs | 4 +- 27 files changed, 353 insertions(+), 675 deletions(-) create mode 100644 src/BaGet.Azure/Search/AzureSearchBatchIndexer.cs create mode 100644 src/BaGet.Azure/Search/AzureSearchIndexer.cs delete mode 100644 src/BaGet.Azure/Search/BatchIndexer.cs create mode 100644 src/BaGet.Azure/Search/IndexActionBuilder.cs create mode 100644 src/BaGet.Azure/Search/NewPackageRegistration.cs create mode 100644 src/BaGet.Core/Search/ISearchIndexer.cs create mode 100644 src/BaGet.Core/Search/NullSearchIndexer.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/BaGet.Tools.AzureSearchImporter.csproj delete mode 100644 src/BaGet.Tools.AzureSearchImporter/Entities/IndexerContext.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/Entities/IndexerContextFactory.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/Entities/PackageId.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/Importer.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/Initializer.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/Migrations/20180415185938_Initial.Designer.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/Migrations/20180415185938_Initial.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/Migrations/IndexerContextModelSnapshot.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/Program.cs delete mode 100644 src/BaGet.Tools.AzureSearchImporter/appsettings.json diff --git a/BaGet.sln b/BaGet.sln index 18ee44bd..766d8fbb 100644 --- a/BaGet.sln +++ b/BaGet.sln @@ -9,8 +9,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Core", "src\BaGet.Cor EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Azure", "src\BaGet.Azure\BaGet.Azure.csproj", "{716C970D-9614-4265-AC92-57E8B227B98E}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Tools.AzureSearchImporter", "src\BaGet.Tools.AzureSearchImporter\BaGet.Tools.AzureSearchImporter.csproj", "{B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}" -EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Core.Tests", "tests\BaGet.Core.Tests\BaGet.Core.Tests.csproj", "{89AB1AE2-6CAA-4809-8B74-D78CBE00B049}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Tests", "tests\BaGet.Tests\BaGet.Tests.csproj", "{892A7A82-4283-4315-B7E5-6D5B70543000}" @@ -44,7 +42,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Gcp", "src\BaGet.Gcp\ EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "samples", "samples", "{DDEC0736-8169-4834-815E-B78E7CE612A4}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BaGet.Protocol.Samples.Tests", "samples\BaGet.Protocol.Samples.Tests.csproj", "{16B0D424-BB2F-4C0C-90B0-4F7955326ADF}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Protocol.Samples.Tests", "samples\BaGet.Protocol.Samples.Tests.csproj", "{16B0D424-BB2F-4C0C-90B0-4F7955326ADF}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -64,10 +62,6 @@ Global {716C970D-9614-4265-AC92-57E8B227B98E}.Debug|Any CPU.Build.0 = Debug|Any CPU {716C970D-9614-4265-AC92-57E8B227B98E}.Release|Any CPU.ActiveCfg = Release|Any CPU {716C970D-9614-4265-AC92-57E8B227B98E}.Release|Any CPU.Build.0 = Release|Any CPU - {B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}.Debug|Any CPU.Build.0 = Debug|Any CPU - {B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}.Release|Any CPU.ActiveCfg = Release|Any CPU - {B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}.Release|Any CPU.Build.0 = Release|Any CPU {89AB1AE2-6CAA-4809-8B74-D78CBE00B049}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {89AB1AE2-6CAA-4809-8B74-D78CBE00B049}.Debug|Any CPU.Build.0 = Debug|Any CPU {89AB1AE2-6CAA-4809-8B74-D78CBE00B049}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -124,7 +118,6 @@ Global {284366CB-C68F-473E-908A-50A382616AE0} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC} {FFFACD28-C300-4046-BCFE-4A7899E88EA3} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC} {716C970D-9614-4265-AC92-57E8B227B98E} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC} - {B232DAFE-5CE8-441F-ACC7-2BB54BCD094F} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC} {89AB1AE2-6CAA-4809-8B74-D78CBE00B049} = {C237857D-AD8E-4C52-974F-6A8155BB0C18} {892A7A82-4283-4315-B7E5-6D5B70543000} = {C237857D-AD8E-4C52-974F-6A8155BB0C18} {A2D23427-9278-4D52-B31F-759212252832} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC} diff --git a/src/BaGet.Azure/Extensions/IServiceCollectionExtensions.cs b/src/BaGet.Azure/Extensions/IServiceCollectionExtensions.cs index 5fbab629..707a488e 100644 --- a/src/BaGet.Azure/Extensions/IServiceCollectionExtensions.cs +++ b/src/BaGet.Azure/Extensions/IServiceCollectionExtensions.cs @@ -74,8 +74,10 @@ public static IServiceCollection AddAzureTableSearch(this IServiceCollection ser public static IServiceCollection AddAzureSearch(this IServiceCollection services) { - services.AddTransient(); + services.AddTransient(); services.AddTransient(); + services.AddTransient(); + services.AddTransient(); services.AddSingleton(provider => { diff --git a/src/BaGet.Azure/Search/AzureSearchBatchIndexer.cs b/src/BaGet.Azure/Search/AzureSearchBatchIndexer.cs new file mode 100644 index 00000000..9b996e55 --- /dev/null +++ b/src/BaGet.Azure/Search/AzureSearchBatchIndexer.cs @@ -0,0 +1,88 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Azure.Search; +using Microsoft.Azure.Search.Models; +using Microsoft.Extensions.Logging; +using Microsoft.Rest.Azure; + +namespace BaGet.Azure.Search +{ + public class AzureSearchBatchIndexer + { + /// + /// Azure Search accepts batches of up to 1000 documents. + /// + public const int MaxBatchSize = 1000; + + private readonly ISearchIndexClient _indexClient; + private readonly ILogger _logger; + + public AzureSearchBatchIndexer( + SearchServiceClient searchClient, + ILogger logger) + { + if (searchClient == null) throw new ArgumentNullException(nameof(searchClient)); + + _indexClient = searchClient.Indexes.GetClient(PackageDocument.IndexName); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public async Task IndexAsync( + IReadOnlyList> batch, + CancellationToken cancellationToken) + { + if (batch.Count > MaxBatchSize) + { + throw new ArgumentException( + $"Batch cannot have more than {MaxBatchSize} elements", + nameof(batch)); + } + + IList indexingResults = null; + Exception innerException = null; + + try + { + await _indexClient.Documents.IndexAsync( + IndexBatch.New(batch), + cancellationToken: cancellationToken); + + _logger.LogInformation("Pushed batch of {DocumentCount} documents", batch.Count); + + } + catch (IndexBatchException ex) + { + _logger.LogError(ex, "An exception was thrown when pushing batch of documents"); + indexingResults = ex.IndexingResults; + innerException = ex; + } + catch (CloudException ex) when (ex.Response.StatusCode == HttpStatusCode.RequestEntityTooLarge && batch.Count > 1) + { + var halfCount = batch.Count / 2; + var halfA = batch.Take(halfCount).ToList(); + var halfB = batch.Skip(halfCount).ToList(); + + _logger.LogWarning( + 0, + ex, + "The request body for a batch of {BatchSize} was too large. Splitting into two batches of size " + + "{HalfA} and {HalfB}.", + batch.Count, + halfA.Count, + halfB.Count); + + await IndexAsync(halfA, cancellationToken); + await IndexAsync(halfB, cancellationToken); + } + + if (indexingResults != null && indexingResults.Any(result => !result.Succeeded)) + { + throw new InvalidOperationException("Failed to pushed batch of documents documents"); + } + } + } +} diff --git a/src/BaGet.Azure/Search/AzureSearchIndexer.cs b/src/BaGet.Azure/Search/AzureSearchIndexer.cs new file mode 100644 index 00000000..c8f80c68 --- /dev/null +++ b/src/BaGet.Azure/Search/AzureSearchIndexer.cs @@ -0,0 +1,40 @@ +using System; +using System.Threading; +using System.Threading.Tasks; +using BaGet.Core; +using Microsoft.Extensions.Logging; + +namespace BaGet.Azure.Search +{ + public class AzureSearchIndexer : ISearchIndexer + { + private readonly IPackageService _packages; + private readonly IndexActionBuilder _actionBuilder; + private readonly AzureSearchBatchIndexer _batchIndexer; + private readonly ILogger _logger; + + public AzureSearchIndexer( + IPackageService packages, + IndexActionBuilder actionBuilder, + AzureSearchBatchIndexer batchIndexer, + ILogger logger) + { + _packages = packages ?? throw new ArgumentNullException(nameof(packages)); + _actionBuilder = actionBuilder ?? throw new ArgumentNullException(nameof(actionBuilder)); + _batchIndexer = batchIndexer ?? throw new ArgumentNullException(nameof(batchIndexer)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public async Task IndexAsync(Package package, CancellationToken cancellationToken = default) + { + var packages = await _packages.FindAsync(package.Id, includeUnlisted: false); + + var actions = _actionBuilder.UpdatePackage( + new PackageRegistration( + package.Id, + packages)); + + await _batchIndexer.IndexAsync(actions, cancellationToken); + } + } +} diff --git a/src/BaGet.Azure/Search/AzureSearchService.cs b/src/BaGet.Azure/Search/AzureSearchService.cs index fe3cc801..37ffdbfc 100644 --- a/src/BaGet.Azure/Search/AzureSearchService.cs +++ b/src/BaGet.Azure/Search/AzureSearchService.cs @@ -16,28 +16,20 @@ namespace BaGet.Azure.Search public class AzureSearchService : ISearchService { - private readonly BatchIndexer _indexer; private readonly SearchIndexClient _searchClient; private readonly IUrlGenerator _url; private readonly IFrameworkCompatibilityService _frameworks; public AzureSearchService( - BatchIndexer indexer, SearchIndexClient searchClient, IUrlGenerator url, IFrameworkCompatibilityService frameworks) { - _indexer = indexer ?? throw new ArgumentNullException(nameof(indexer)); _searchClient = searchClient ?? throw new ArgumentNullException(nameof(searchClient)); _url = url ?? throw new ArgumentNullException(nameof(url)); _frameworks = frameworks ?? throw new ArgumentNullException(nameof(frameworks)); } - public async Task IndexAsync(Package package, CancellationToken cancellationToken) - { - await _indexer.IndexAsync(package.Id); - } - public async Task SearchAsync( string query = null, int skip = 0, diff --git a/src/BaGet.Azure/Search/BatchIndexer.cs b/src/BaGet.Azure/Search/BatchIndexer.cs deleted file mode 100644 index bdda1f89..00000000 --- a/src/BaGet.Azure/Search/BatchIndexer.cs +++ /dev/null @@ -1,152 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using BaGet.Core; -using Microsoft.Azure.Search; -using Microsoft.Azure.Search.Models; -using Microsoft.Extensions.Logging; - -namespace BaGet.Azure.Search -{ - public class BatchIndexer - { - /// - /// Each package creates up to 4 documents, Azure Search accepts batches of up to 1000 documents. - /// - public const int MaxBatchSize = 1000 / 4; - - private readonly IPackageService _packageService; - private readonly ISearchIndexClient _indexClient; - private readonly ILogger _logger; - - public BatchIndexer( - IPackageService packageService, - SearchServiceClient searchClient, - ILogger logger) - { - if (searchClient == null) throw new ArgumentNullException(nameof(searchClient)); - - _indexClient = searchClient.Indexes.GetClient(PackageDocument.IndexName); - - _packageService = packageService ?? throw new ArgumentNullException(nameof(packageService)); - _logger = logger ?? throw new ArgumentNullException(nameof(logger)); - } - - public async Task IndexAsync(params string[] packageIds) - { - if (packageIds == null) throw new ArgumentNullException(nameof(packageIds)); - - var actions = new List>(); - var packageIdSet = new HashSet(packageIds, StringComparer.OrdinalIgnoreCase); - - if (packageIdSet.Count > MaxBatchSize) - { - throw new ArgumentException($"Cannot index more than {MaxBatchSize} packages at once"); - } - - _logger.LogInformation("Indexing {PackageCount} packages...", packageIdSet.Count); - - foreach (var packageId in packageIdSet) - { - foreach (var document in await BuildDocumentsAsync(packageId)) - { - actions.Add(IndexAction.Upload(document)); - } - } - - var batch = IndexBatch.New(actions); - - // TODO: Add retry on IndexBatchException - // See: https://docs.microsoft.com/en-us/azure/search/search-import-data-dotnet#import-data-to-the-index - await _indexClient.Documents.IndexAsync(batch); - - _logger.LogInformation("Indexed {PackageCount} packages", packageIdSet.Count); - } - - private async Task> BuildDocumentsAsync(string packageId) - { - if (packageId == null) throw new ArgumentNullException(nameof(packageId)); - - var packages = await _packageService.FindAsync(packageId, includeUnlisted: false); - - if (packages.Count == 0) - { - _logger.LogError("Could not find package with id {PackageId}", packageId); - - throw new ArgumentException($"Invalid package id {packageId}", nameof(packageId)); - } - - var result = new List(); - for (var i = 0; i < 4; i++) - { - var includePrerelease = (i & 1) != 0; - var includeSemVer2 = (i & 2) != 0; - var searchFilters = (SearchFilters)i; - - IEnumerable filtered = packages; - if (!includePrerelease) - { - filtered = filtered.Where(p => !p.IsPrerelease); - } - - if (!includeSemVer2) - { - filtered = filtered.Where(p => p.SemVerLevel != SemVerLevel.SemVer2); - } - - var versions = filtered.OrderBy(p => p.Version).ToList(); - if (versions.Count == 0) - { - continue; - } - - var latest = versions.Last(); - var dependencies = latest - .Dependencies - .Select(d => d.Id?.ToLowerInvariant()) - .Where(d => d != null) - .Distinct() - .ToArray(); - - var document = new PackageDocument(); - var encodedId = EncodeKey(packageId.ToLowerInvariant()); - - document.Key = $"{encodedId}-{searchFilters}"; - document.Id = latest.Id; - document.Version = latest.Version.ToFullString(); - document.Description = latest.Description; - document.Authors = latest.Authors; - document.IconUrl = latest.IconUrlString; - document.LicenseUrl = latest.LicenseUrlString; - document.ProjectUrl = latest.ProjectUrlString; - document.Published = latest.Published; - document.Summary = latest.Summary; - document.Tags = latest.Tags; - document.Title = latest.Title; - document.TotalDownloads = versions.Sum(p => p.Downloads); - document.DownloadsMagnitude = document.TotalDownloads.ToString().Length; - document.Versions = versions.Select(p => p.Version.ToFullString()).ToArray(); - document.VersionDownloads = versions.Select(p => p.Downloads.ToString()).ToArray(); - document.Dependencies = dependencies; - document.PackageTypes = latest.PackageTypes.Select(t => t.Name).ToArray(); - document.Frameworks = latest.TargetFrameworks.Select(f => f.Moniker.ToLowerInvariant()).ToArray(); - document.SearchFilters = searchFilters.ToString(); - - result.Add(document); - } - - return result; - } - - private string EncodeKey(string key) - { - // Keys can only contain letters, digits, underscore(_), dash(-), or equal sign(=). - var bytes = Encoding.UTF8.GetBytes(key); - var base64 = Convert.ToBase64String(bytes); - - return base64.Replace('+', '-').Replace('/', '_'); - } - } -} diff --git a/src/BaGet.Azure/Search/IndexActionBuilder.cs b/src/BaGet.Azure/Search/IndexActionBuilder.cs new file mode 100644 index 00000000..b4a6a2b3 --- /dev/null +++ b/src/BaGet.Azure/Search/IndexActionBuilder.cs @@ -0,0 +1,117 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using BaGet.Core; +using Microsoft.Azure.Search.Models; + +namespace BaGet.Azure.Search +{ + public class IndexActionBuilder + { + public virtual IReadOnlyList> AddPackage( + PackageRegistration registration) + { + return AddOrUpdatePackage(registration, isUpdate: false); + } + + public virtual IReadOnlyList> UpdatePackage( + PackageRegistration registration) + { + return AddOrUpdatePackage(registration, isUpdate: true); + } + + private IReadOnlyList> AddOrUpdatePackage( + PackageRegistration registration, + bool isUpdate) + { + var encodedId = EncodePackageId(registration.PackageId.ToLowerInvariant()); + var result = new List>(); + + for (var i = 0; i < 4; i++) + { + var includePrerelease = (i & 1) != 0; + var includeSemVer2 = (i & 2) != 0; + var searchFilters = (SearchFilters)i; + + var documentKey = $"{encodedId}-{searchFilters}"; + var filtered = registration.Packages.Where(p => p.Listed); + + if (!includePrerelease) + { + filtered = filtered.Where(p => !p.IsPrerelease); + } + + if (!includeSemVer2) + { + filtered = filtered.Where(p => p.SemVerLevel != SemVerLevel.SemVer2); + } + + var versions = filtered.OrderBy(p => p.Version).ToList(); + if (versions.Count == 0) + { + if (isUpdate) + { + var action = IndexAction.Delete( + new KeyedDocument + { + Key = documentKey + }); + + result.Add(action); + } + + continue; + } + + var latest = versions.Last(); + var dependencies = latest + .Dependencies + .Select(d => d.Id?.ToLowerInvariant()) + .Where(d => d != null) + .Distinct() + .ToArray(); + + var document = new PackageDocument(); + + document.Key = $"{encodedId}-{searchFilters}"; + document.Id = latest.Id; + document.Version = latest.Version.ToFullString(); + document.Description = latest.Description; + document.Authors = latest.Authors; + document.IconUrl = latest.IconUrlString; + document.LicenseUrl = latest.LicenseUrlString; + document.ProjectUrl = latest.ProjectUrlString; + document.Published = latest.Published; + document.Summary = latest.Summary; + document.Tags = latest.Tags; + document.Title = latest.Title; + document.TotalDownloads = versions.Sum(p => p.Downloads); + document.DownloadsMagnitude = document.TotalDownloads.ToString().Length; + document.Versions = versions.Select(p => p.Version.ToFullString()).ToArray(); + document.VersionDownloads = versions.Select(p => p.Downloads.ToString()).ToArray(); + document.Dependencies = dependencies; + document.PackageTypes = latest.PackageTypes.Select(t => t.Name).ToArray(); + document.Frameworks = latest.TargetFrameworks.Select(f => f.Moniker.ToLowerInvariant()).ToArray(); + document.SearchFilters = searchFilters.ToString(); + + result.Add( + isUpdate + ? IndexAction.MergeOrUpload(document) + : IndexAction.Upload(document)); + } + + return result; + } + + private string EncodePackageId(string key) + { + // Keys can only contain letters, digits, underscore(_), dash(-), or equal sign(=). + // TODO: Align with NuGet.org's algorithm. + var bytes = Encoding.UTF8.GetBytes(key); + var base64 = Convert.ToBase64String(bytes); + + return base64.Replace('+', '-').Replace('/', '_'); + } + } +} diff --git a/src/BaGet.Azure/Search/NewPackageRegistration.cs b/src/BaGet.Azure/Search/NewPackageRegistration.cs new file mode 100644 index 00000000..765f5a45 --- /dev/null +++ b/src/BaGet.Azure/Search/NewPackageRegistration.cs @@ -0,0 +1,35 @@ +using System; +using System.Collections.Generic; +using BaGet.Core; + +namespace BaGet.Azure.Search +{ + /// + /// The information on all versions of a package. + /// + public class PackageRegistration + { + /// + /// Create a new registration object. + /// + /// + /// All versions of the package. + public PackageRegistration( + string packageId, + IReadOnlyList packages) + { + PackageId = packageId ?? throw new ArgumentNullException(nameof(packageId)); + Packages = packages ?? throw new ArgumentNullException(nameof(packages)); + } + + /// + /// The package's ID. + /// + public string PackageId { get; } + + /// + /// The information for each version of the package. + /// + public IReadOnlyList Packages { get; } + } +} diff --git a/src/BaGet.Azure/Search/PackageDocument.cs b/src/BaGet.Azure/Search/PackageDocument.cs index aabf0387..ae35ceaf 100644 --- a/src/BaGet.Azure/Search/PackageDocument.cs +++ b/src/BaGet.Azure/Search/PackageDocument.cs @@ -7,13 +7,10 @@ namespace BaGet.Azure.Search { // See: https://docs.microsoft.com/en-us/nuget/api/search-query-service-resource#search-for-packages [SerializePropertyNamesAsCamelCase] - public class PackageDocument + public class PackageDocument : KeyedDocument { public const string IndexName = "packages"; - [Key] - public string Key { get; set; } - [IsSearchable, IsFilterable, IsSortable] public string Id { get; set; } @@ -67,4 +64,16 @@ public class PackageDocument [IsFilterable] public string SearchFilters { get; set; } } + + [SerializePropertyNamesAsCamelCase] + public class KeyedDocument : IKeyedDocument + { + [Key] + public string Key { get; set; } + } + + public interface IKeyedDocument + { + string Key { get; set; } + } } diff --git a/src/BaGet.Core/Indexing/PackageIndexingService.cs b/src/BaGet.Core/Indexing/PackageIndexingService.cs index 749325af..a50661d8 100644 --- a/src/BaGet.Core/Indexing/PackageIndexingService.cs +++ b/src/BaGet.Core/Indexing/PackageIndexingService.cs @@ -12,14 +12,14 @@ public class PackageIndexingService : IPackageIndexingService { private readonly IPackageService _packages; private readonly IPackageStorageService _storage; - private readonly ISearchService _search; + private readonly ISearchIndexer _search; private readonly IOptionsSnapshot _options; private readonly ILogger _logger; public PackageIndexingService( IPackageService packages, IPackageStorageService storage, - ISearchService search, + ISearchIndexer search, IOptionsSnapshot options, ILogger logger) { diff --git a/src/BaGet.Core/Search/ISearchIndexer.cs b/src/BaGet.Core/Search/ISearchIndexer.cs new file mode 100644 index 00000000..c0c9026f --- /dev/null +++ b/src/BaGet.Core/Search/ISearchIndexer.cs @@ -0,0 +1,16 @@ +using System.Threading; +using System.Threading.Tasks; + +namespace BaGet.Core +{ + public interface ISearchIndexer + { + /// + /// Add a package to the search index. + /// + /// The package to add. + /// A token to cancel the task. + /// A task that completes once the package has been added. + Task IndexAsync(Package package, CancellationToken cancellationToken = default); + } +} diff --git a/src/BaGet.Core/Search/ISearchService.cs b/src/BaGet.Core/Search/ISearchService.cs index 69cb2ea8..e1304aa7 100644 --- a/src/BaGet.Core/Search/ISearchService.cs +++ b/src/BaGet.Core/Search/ISearchService.cs @@ -11,14 +11,6 @@ namespace BaGet.Core /// public interface ISearchService { - /// - /// Add a package to the search index. - /// - /// The package to add. - /// A token to cancel the task. - /// A task that completes once the package has been added. - Task IndexAsync(Package package, CancellationToken cancellationToken = default); - /// /// Perform a search query. /// See: https://docs.microsoft.com/en-us/nuget/api/search-query-service-resource#search-for-packages diff --git a/src/BaGet.Core/Search/NullSearchIndexer.cs b/src/BaGet.Core/Search/NullSearchIndexer.cs new file mode 100644 index 00000000..0dd1c5ce --- /dev/null +++ b/src/BaGet.Core/Search/NullSearchIndexer.cs @@ -0,0 +1,16 @@ +using System.Threading; +using System.Threading.Tasks; + +namespace BaGet.Core +{ + /// + /// A no-op indexer, used when search does not need to index packages. + /// + public class NullSearchIndexer : ISearchIndexer + { + public Task IndexAsync(Package package, CancellationToken cancellationToken = default) + { + return Task.CompletedTask; + } + } +} diff --git a/src/BaGet.Core/Search/NullSearchService.cs b/src/BaGet.Core/Search/NullSearchService.cs index 110ca828..f4838bfb 100644 --- a/src/BaGet.Core/Search/NullSearchService.cs +++ b/src/BaGet.Core/Search/NullSearchService.cs @@ -55,11 +55,6 @@ public Task FindDependentsAsync( return EmptyDependentsResponseTask; } - public Task IndexAsync(Package package, CancellationToken cancellationToken = default) - { - return Task.CompletedTask; - } - public Task SearchAsync( string query = null, int skip = 0, diff --git a/src/BaGet.Tools.AzureSearchImporter/BaGet.Tools.AzureSearchImporter.csproj b/src/BaGet.Tools.AzureSearchImporter/BaGet.Tools.AzureSearchImporter.csproj deleted file mode 100644 index 216b8493..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/BaGet.Tools.AzureSearchImporter.csproj +++ /dev/null @@ -1,30 +0,0 @@ - - - - Exe - netcoreapp2.2 - - false - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/BaGet.Tools.AzureSearchImporter/Entities/IndexerContext.cs b/src/BaGet.Tools.AzureSearchImporter/Entities/IndexerContext.cs deleted file mode 100644 index 732c9f17..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/Entities/IndexerContext.cs +++ /dev/null @@ -1,30 +0,0 @@ -using Microsoft.EntityFrameworkCore; - -namespace BaGet.Tools.AzureSearchImporter.Entities -{ - public class IndexerContext : DbContext - { - public IndexerContext(DbContextOptions options) - : base(options) - {} - - public DbSet PackageIds { get; set; } - - protected override void OnModelCreating(ModelBuilder builder) - { - builder.Entity() - .HasKey(p => p.Key); - - builder.Entity() - .Property(p => p.Value) - .HasColumnType("TEXT COLLATE NOCASE"); - - builder.Entity() - .HasIndex(p => p.Value) - .IsUnique(); - - builder.Entity() - .HasIndex(p => p.Done); - } - } -} diff --git a/src/BaGet.Tools.AzureSearchImporter/Entities/IndexerContextFactory.cs b/src/BaGet.Tools.AzureSearchImporter/Entities/IndexerContextFactory.cs deleted file mode 100644 index 6b740940..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/Entities/IndexerContextFactory.cs +++ /dev/null @@ -1,19 +0,0 @@ -using Microsoft.EntityFrameworkCore; -using Microsoft.EntityFrameworkCore.Design; - -namespace BaGet.Tools.AzureSearchImporter.Entities -{ - public class IndexerContextFactory : IDesignTimeDbContextFactory - { - public const string ConnectionString = "Data Source=indexer.db"; - - public IndexerContext CreateDbContext(string[] args) - { - var optionsBuilder = new DbContextOptionsBuilder(); - - optionsBuilder.UseSqlite(ConnectionString); - - return new IndexerContext(optionsBuilder.Options); - } - } -} diff --git a/src/BaGet.Tools.AzureSearchImporter/Entities/PackageId.cs b/src/BaGet.Tools.AzureSearchImporter/Entities/PackageId.cs deleted file mode 100644 index bb0f53ac..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/Entities/PackageId.cs +++ /dev/null @@ -1,11 +0,0 @@ -namespace BaGet.Tools.AzureSearchImporter.Entities -{ - public class PackageId - { - public int Key { get; set; } - - public string Value { get; set; } - - public bool Done { get; set; } - } -} \ No newline at end of file diff --git a/src/BaGet.Tools.AzureSearchImporter/Importer.cs b/src/BaGet.Tools.AzureSearchImporter/Importer.cs deleted file mode 100644 index fff42e03..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/Importer.cs +++ /dev/null @@ -1,70 +0,0 @@ -using System; -using System.Linq; -using System.Threading.Tasks; -using BaGet.Azure.Search; -using BaGet.Tools.AzureSearchImporter.Entities; -using Microsoft.EntityFrameworkCore; -using Microsoft.Extensions.Logging; -using MoreLinq; - -namespace BaGet.Tools.AzureSearchImporter -{ - public class Importer - { - private const int ImportBatchSize = 200; - - private readonly IndexerContext _context; - private readonly BatchIndexer _indexer; - private readonly ILogger _logger; - - public Importer(IndexerContext context, BatchIndexer indexer, ILogger logger) - { - _context = context ?? throw new ArgumentException(nameof(context)); - _indexer = indexer ?? throw new ArgumentNullException(nameof(indexer)); - _logger = logger ?? throw new ArgumentNullException(nameof(logger)); - } - - public async Task ImportAsync(int skip = 0) - { - _logger.LogInformation("Starting import with skip {Skip}...", skip); - - var batchCount = 1; - var left = await _context.PackageIds - .Where(p => !p.Done) - .CountAsync(); - - _logger.LogInformation("{PackageIdsLeft} package ids left to import", left); - - while (true) - { - _logger.LogInformation("Importing batch {BatchCount}...", batchCount); - - var batch = await _context.PackageIds - .Where(p => !p.Done) - .OrderBy(p => p.Key) - .Skip(skip) - .Take(ImportBatchSize) - .ToListAsync(); - - if (batch.Count == 0) - { - break; - } - - await _indexer.IndexAsync(batch.Select(p => p.Value).ToArray()); - - foreach (var package in batch) - { - package.Done = true; - } - - await _context.SaveChangesAsync(); - - _logger.LogInformation("Imported batch {BatchCount}", batchCount); - batchCount++; - } - - _logger.LogInformation("Finished importing"); - } - } -} diff --git a/src/BaGet.Tools.AzureSearchImporter/Initializer.cs b/src/BaGet.Tools.AzureSearchImporter/Initializer.cs deleted file mode 100644 index e0fdef4a..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/Initializer.cs +++ /dev/null @@ -1,106 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Threading.Tasks; -using BaGet.Azure.Search; -using BaGet.Core; -using BaGet.Tools.AzureSearchImporter.Entities; -using Microsoft.Azure.Search; -using Microsoft.Azure.Search.Models; -using Microsoft.EntityFrameworkCore; -using Microsoft.Extensions.Logging; -using MoreLinq; - -namespace BaGet.Tools.AzureSearchImporter -{ - public class Initializer - { - public const int InitializationBatchSize = 100; - - private readonly IContext _bagetContext; - private readonly IndexerContext _indexerContext; - private readonly SearchServiceClient _searchClient; - private readonly ILogger _logger; - - public Initializer( - IContext bagetContext, - IndexerContext indexerContext, - SearchServiceClient searchClient, - ILogger logger) - { - _bagetContext = bagetContext ?? throw new ArgumentNullException(nameof(bagetContext)); - _indexerContext = indexerContext ?? throw new ArgumentNullException(nameof(indexerContext)); - _searchClient = searchClient ?? throw new ArgumentNullException(nameof(searchClient)); - _logger = logger ?? throw new ArgumentNullException(nameof(logger)); - } - - public Task InitializeAsync() - => Task.WhenAll( - InitializeIndex(), - InitializeStateAsync()); - - private async Task InitializeIndex() - { - if (await _searchClient.Indexes.ExistsAsync(PackageDocument.IndexName)) - { - _logger.LogInformation("Search index already exists"); - return; - } - - _logger.LogInformation("Search index does not exist, creating..."); - - await _searchClient.Indexes.CreateAsync(new Index - { - Name = PackageDocument.IndexName, - Fields = FieldBuilder.BuildForType(), - Analyzers = new List - { - ExactMatchCustomAnalyzer.Instance - } - }); - - _logger.LogInformation("Created search index"); - } - - private async Task InitializeStateAsync() - { - if (await _indexerContext.PackageIds.AnyAsync()) - { - _logger.LogInformation("Indexer state is already initialized"); - return; - } - - _logger.LogInformation("Unitialized state. Finding packages to track in indexer state..."); - - var packageIds = await _bagetContext.Packages - .Select(p => p.Id) - .Distinct() - .ToListAsync(); - - _logger.LogInformation("Found {PackageIdCount} package ids to track in indexer state", packageIds.Count); - - var batchCount = 1; - - foreach (var batch in packageIds.Batch(InitializationBatchSize)) - { - foreach (var packageId in batch) - { - _indexerContext.PackageIds.Add(new PackageId - { - Value = packageId, - Done = false, - }); - } - - _logger.LogInformation("Saving package id batch {BatchCount} to indexer state...", batchCount); - - await _indexerContext.SaveChangesAsync(); - batchCount++; - } - - _logger.LogInformation( - "Finished adding {PackageIdCount} package ids to indexer state", - packageIds.Count); - } - } -} diff --git a/src/BaGet.Tools.AzureSearchImporter/Migrations/20180415185938_Initial.Designer.cs b/src/BaGet.Tools.AzureSearchImporter/Migrations/20180415185938_Initial.Designer.cs deleted file mode 100644 index 03e58faf..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/Migrations/20180415185938_Initial.Designer.cs +++ /dev/null @@ -1,44 +0,0 @@ -// -using BaGet.Tools.AzureSearchImporter.Entities; -using Microsoft.EntityFrameworkCore; -using Microsoft.EntityFrameworkCore.Infrastructure; -using Microsoft.EntityFrameworkCore.Metadata; -using Microsoft.EntityFrameworkCore.Migrations; -using Microsoft.EntityFrameworkCore.Storage; -using System; - -namespace BaGet.Tools.AzureSearchImporter.Migrations -{ - [DbContext(typeof(IndexerContext))] - [Migration("20180415185938_Initial")] - partial class Initial - { - protected override void BuildTargetModel(ModelBuilder modelBuilder) - { -#pragma warning disable 612, 618 - modelBuilder - .HasAnnotation("ProductVersion", "2.0.1-rtm-125"); - - modelBuilder.Entity("BaGet.Tools.AzureSearchImporter.Entities.PackageId", b => - { - b.Property("Key") - .ValueGeneratedOnAdd(); - - b.Property("Done"); - - b.Property("Value") - .HasColumnType("TEXT COLLATE NOCASE"); - - b.HasKey("Key"); - - b.HasIndex("Done"); - - b.HasIndex("Value") - .IsUnique(); - - b.ToTable("PackageIds"); - }); -#pragma warning restore 612, 618 - } - } -} diff --git a/src/BaGet.Tools.AzureSearchImporter/Migrations/20180415185938_Initial.cs b/src/BaGet.Tools.AzureSearchImporter/Migrations/20180415185938_Initial.cs deleted file mode 100644 index ea6e5d3f..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/Migrations/20180415185938_Initial.cs +++ /dev/null @@ -1,43 +0,0 @@ -using Microsoft.EntityFrameworkCore.Migrations; -using System; -using System.Collections.Generic; - -namespace BaGet.Tools.AzureSearchImporter.Migrations -{ - public partial class Initial : Migration - { - protected override void Up(MigrationBuilder migrationBuilder) - { - migrationBuilder.CreateTable( - name: "PackageIds", - columns: table => new - { - Key = table.Column(nullable: false) - .Annotation("Sqlite:Autoincrement", true), - Done = table.Column(nullable: false), - Value = table.Column(type: "TEXT COLLATE NOCASE", nullable: true) - }, - constraints: table => - { - table.PrimaryKey("PK_PackageIds", x => x.Key); - }); - - migrationBuilder.CreateIndex( - name: "IX_PackageIds_Done", - table: "PackageIds", - column: "Done"); - - migrationBuilder.CreateIndex( - name: "IX_PackageIds_Value", - table: "PackageIds", - column: "Value", - unique: true); - } - - protected override void Down(MigrationBuilder migrationBuilder) - { - migrationBuilder.DropTable( - name: "PackageIds"); - } - } -} diff --git a/src/BaGet.Tools.AzureSearchImporter/Migrations/IndexerContextModelSnapshot.cs b/src/BaGet.Tools.AzureSearchImporter/Migrations/IndexerContextModelSnapshot.cs deleted file mode 100644 index 278d0e5a..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/Migrations/IndexerContextModelSnapshot.cs +++ /dev/null @@ -1,43 +0,0 @@ -// -using BaGet.Tools.AzureSearchImporter.Entities; -using Microsoft.EntityFrameworkCore; -using Microsoft.EntityFrameworkCore.Infrastructure; -using Microsoft.EntityFrameworkCore.Metadata; -using Microsoft.EntityFrameworkCore.Migrations; -using Microsoft.EntityFrameworkCore.Storage; -using System; - -namespace BaGet.Tools.AzureSearchImporter.Migrations -{ - [DbContext(typeof(IndexerContext))] - partial class IndexerContextModelSnapshot : ModelSnapshot - { - protected override void BuildModel(ModelBuilder modelBuilder) - { -#pragma warning disable 612, 618 - modelBuilder - .HasAnnotation("ProductVersion", "2.0.1-rtm-125"); - - modelBuilder.Entity("BaGet.Tools.AzureSearchImporter.Entities.PackageId", b => - { - b.Property("Key") - .ValueGeneratedOnAdd(); - - b.Property("Done"); - - b.Property("Value") - .HasColumnType("TEXT COLLATE NOCASE"); - - b.HasKey("Key"); - - b.HasIndex("Done"); - - b.HasIndex("Value") - .IsUnique(); - - b.ToTable("PackageIds"); - }); -#pragma warning restore 612, 618 - } - } -} diff --git a/src/BaGet.Tools.AzureSearchImporter/Program.cs b/src/BaGet.Tools.AzureSearchImporter/Program.cs deleted file mode 100644 index 5595a36a..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/Program.cs +++ /dev/null @@ -1,77 +0,0 @@ -using System; -using System.Threading.Tasks; -using BaGet.Extensions; -using BaGet.Tools.AzureSearchImporter.Entities; -using Microsoft.EntityFrameworkCore; -using Microsoft.Extensions.Configuration; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging; - -namespace BaGet.Tools.AzureSearchImporter -{ - public class Program - { - public static void Main(string[] args) - => MainAsync(args) - .GetAwaiter() - .GetResult(); - - private async static Task MainAsync(string[] args) - { - // Parse the skip from arguments. - var skip = 0; - - if (args.Length > 0) - { - int.TryParse(args[args.Length - 1], out skip); - } - - // Prepare the job. - var provider = GetServiceProvider(GetConfiguration()); - var scopeFactory = provider.GetRequiredService(); - var initializer = provider.GetRequiredService(); - var importer = provider.GetRequiredService(); - - using (var scope = scopeFactory.CreateScope()) - { - scope.ServiceProvider - .GetRequiredService() - .Database - .Migrate(); - } - - // Initialize the state and start importing packages to the search index. - await initializer.InitializeAsync(); - await importer.ImportAsync(skip); - } - - private static IConfiguration GetConfiguration() - => new ConfigurationBuilder() - .SetBasePath(Environment.CurrentDirectory) - .AddJsonFile("appsettings.json") - .Build(); - - private static IServiceProvider GetServiceProvider(IConfiguration configuration) - { - var services = new ServiceCollection(); - - services.ConfigureBaGet(configuration, httpServices: false); - - services.AddLogging(logging => - { - logging.AddFilter(DbLoggerCategory.Database.Command.Name, LogLevel.Warning); - logging.AddConsole(); - }); - - services.AddDbContext((provider, options) => - { - options.UseSqlite(IndexerContextFactory.ConnectionString); - }); - - services.AddTransient(); - services.AddTransient(); - - return services.BuildServiceProvider(); - } - } -} diff --git a/src/BaGet.Tools.AzureSearchImporter/appsettings.json b/src/BaGet.Tools.AzureSearchImporter/appsettings.json deleted file mode 100644 index 62c67758..00000000 --- a/src/BaGet.Tools.AzureSearchImporter/appsettings.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "Database": { - "Type": "Sqlite", - "ConnectionString": "Data Source=..\\BaGet\\baget.db" - }, - - "Search": { - "Type": "Azure", - "AccountName": "", - "ApiKey": "" - } -} \ No newline at end of file diff --git a/src/BaGet/Extensions/IServiceCollectionExtensions.cs b/src/BaGet/Extensions/IServiceCollectionExtensions.cs index 6ddbbbd9..a0d4941d 100644 --- a/src/BaGet/Extensions/IServiceCollectionExtensions.cs +++ b/src/BaGet/Extensions/IServiceCollectionExtensions.cs @@ -263,8 +263,28 @@ public static IServiceCollection AddSearchProviders(this IServiceCollection serv } }); + services.AddTransient(provider => + { + var searchOptions = provider.GetRequiredService>(); + + switch (searchOptions.Value.Type) + { + case SearchType.Null: + case SearchType.Database: + return provider.GetRequiredService(); + + case SearchType.Azure: + return provider.GetRequiredService(); + + default: + throw new InvalidOperationException( + $"Unsupported search service: {searchOptions.Value.Type}"); + } + }); + services.AddTransient(); services.AddSingleton(); + services.AddSingleton(); services.AddAzureSearch(); services.AddAzureTableSearch(); diff --git a/tests/BaGet.Core.Tests/Services/PackageIndexingServiceTests.cs b/tests/BaGet.Core.Tests/Services/PackageIndexingServiceTests.cs index cf3d901b..903a8302 100644 --- a/tests/BaGet.Core.Tests/Services/PackageIndexingServiceTests.cs +++ b/tests/BaGet.Core.Tests/Services/PackageIndexingServiceTests.cs @@ -10,14 +10,14 @@ public class PackageIndexingServiceTests { private readonly Mock _packages; private readonly Mock _storage; - private readonly Mock _search; + private readonly Mock _search; private readonly PackageIndexingService _target; public PackageIndexingServiceTests() { _packages = new Mock(); _storage = new Mock(); - _search = new Mock(); + _search = new Mock(); _target = new PackageIndexingService( _packages.Object,