Skip to content

Commit

Permalink
Refactor Azure Search (loic-sharma#393)
Browse files Browse the repository at this point in the history
Part of loic-sharma#362
See loic-sharma/baget.io#3

⚠️ This change removes the Azure Search import tool as it has been moved here: https://github.com/loic-sharma/baget.io. If you'd like to use Azure Search, the new steps are:

```ps1
https://github.com/loic-sharma/baget.io
cd baget.io/BaGet
# Update the appsettings.json file with your Azure Search configurations
dotnet run -- azure-search create
dotnet run -- azure-search rebuild
```

The commands from baget.io will be ported to this project soon.
  • Loading branch information
loic-sharma authored Oct 22, 2019
1 parent c4546e0 commit a96c134
Show file tree
Hide file tree
Showing 27 changed files with 353 additions and 675 deletions.
9 changes: 1 addition & 8 deletions BaGet.sln
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Core", "src\BaGet.Cor
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Azure", "src\BaGet.Azure\BaGet.Azure.csproj", "{716C970D-9614-4265-AC92-57E8B227B98E}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Tools.AzureSearchImporter", "src\BaGet.Tools.AzureSearchImporter\BaGet.Tools.AzureSearchImporter.csproj", "{B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Core.Tests", "tests\BaGet.Core.Tests\BaGet.Core.Tests.csproj", "{89AB1AE2-6CAA-4809-8B74-D78CBE00B049}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Tests", "tests\BaGet.Tests\BaGet.Tests.csproj", "{892A7A82-4283-4315-B7E5-6D5B70543000}"
Expand Down Expand Up @@ -44,7 +42,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Gcp", "src\BaGet.Gcp\
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "samples", "samples", "{DDEC0736-8169-4834-815E-B78E7CE612A4}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BaGet.Protocol.Samples.Tests", "samples\BaGet.Protocol.Samples.Tests.csproj", "{16B0D424-BB2F-4C0C-90B0-4F7955326ADF}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BaGet.Protocol.Samples.Tests", "samples\BaGet.Protocol.Samples.Tests.csproj", "{16B0D424-BB2F-4C0C-90B0-4F7955326ADF}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand All @@ -64,10 +62,6 @@ Global
{716C970D-9614-4265-AC92-57E8B227B98E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{716C970D-9614-4265-AC92-57E8B227B98E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{716C970D-9614-4265-AC92-57E8B227B98E}.Release|Any CPU.Build.0 = Release|Any CPU
{B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B232DAFE-5CE8-441F-ACC7-2BB54BCD094F}.Release|Any CPU.Build.0 = Release|Any CPU
{89AB1AE2-6CAA-4809-8B74-D78CBE00B049}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{89AB1AE2-6CAA-4809-8B74-D78CBE00B049}.Debug|Any CPU.Build.0 = Debug|Any CPU
{89AB1AE2-6CAA-4809-8B74-D78CBE00B049}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand Down Expand Up @@ -124,7 +118,6 @@ Global
{284366CB-C68F-473E-908A-50A382616AE0} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC}
{FFFACD28-C300-4046-BCFE-4A7899E88EA3} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC}
{716C970D-9614-4265-AC92-57E8B227B98E} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC}
{B232DAFE-5CE8-441F-ACC7-2BB54BCD094F} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC}
{89AB1AE2-6CAA-4809-8B74-D78CBE00B049} = {C237857D-AD8E-4C52-974F-6A8155BB0C18}
{892A7A82-4283-4315-B7E5-6D5B70543000} = {C237857D-AD8E-4C52-974F-6A8155BB0C18}
{A2D23427-9278-4D52-B31F-759212252832} = {26A0B557-53FB-4B9A-94C4-BCCF1BDCB0CC}
Expand Down
4 changes: 3 additions & 1 deletion src/BaGet.Azure/Extensions/IServiceCollectionExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ public static IServiceCollection AddAzureTableSearch(this IServiceCollection ser

public static IServiceCollection AddAzureSearch(this IServiceCollection services)
{
services.AddTransient<BatchIndexer>();
services.AddTransient<AzureSearchBatchIndexer>();
services.AddTransient<AzureSearchService>();
services.AddTransient<AzureSearchIndexer>();
services.AddTransient<IndexActionBuilder>();

services.AddSingleton(provider =>
{
Expand Down
88 changes: 88 additions & 0 deletions src/BaGet.Azure/Search/AzureSearchBatchIndexer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Azure.Search;
using Microsoft.Azure.Search.Models;
using Microsoft.Extensions.Logging;
using Microsoft.Rest.Azure;

namespace BaGet.Azure.Search
{
public class AzureSearchBatchIndexer
{
/// <summary>
/// Azure Search accepts batches of up to 1000 documents.
/// </summary>
public const int MaxBatchSize = 1000;

private readonly ISearchIndexClient _indexClient;
private readonly ILogger<AzureSearchBatchIndexer> _logger;

public AzureSearchBatchIndexer(
SearchServiceClient searchClient,
ILogger<AzureSearchBatchIndexer> logger)
{
if (searchClient == null) throw new ArgumentNullException(nameof(searchClient));

_indexClient = searchClient.Indexes.GetClient(PackageDocument.IndexName);
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

public async Task IndexAsync(
IReadOnlyList<IndexAction<KeyedDocument>> batch,
CancellationToken cancellationToken)
{
if (batch.Count > MaxBatchSize)
{
throw new ArgumentException(
$"Batch cannot have more than {MaxBatchSize} elements",
nameof(batch));
}

IList<IndexingResult> indexingResults = null;
Exception innerException = null;

try
{
await _indexClient.Documents.IndexAsync(
IndexBatch.New(batch),
cancellationToken: cancellationToken);

_logger.LogInformation("Pushed batch of {DocumentCount} documents", batch.Count);

}
catch (IndexBatchException ex)
{
_logger.LogError(ex, "An exception was thrown when pushing batch of documents");
indexingResults = ex.IndexingResults;
innerException = ex;
}
catch (CloudException ex) when (ex.Response.StatusCode == HttpStatusCode.RequestEntityTooLarge && batch.Count > 1)
{
var halfCount = batch.Count / 2;
var halfA = batch.Take(halfCount).ToList();
var halfB = batch.Skip(halfCount).ToList();

_logger.LogWarning(
0,
ex,
"The request body for a batch of {BatchSize} was too large. Splitting into two batches of size " +
"{HalfA} and {HalfB}.",
batch.Count,
halfA.Count,
halfB.Count);

await IndexAsync(halfA, cancellationToken);
await IndexAsync(halfB, cancellationToken);
}

if (indexingResults != null && indexingResults.Any(result => !result.Succeeded))
{
throw new InvalidOperationException("Failed to pushed batch of documents documents");
}
}
}
}
40 changes: 40 additions & 0 deletions src/BaGet.Azure/Search/AzureSearchIndexer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using BaGet.Core;
using Microsoft.Extensions.Logging;

namespace BaGet.Azure.Search
{
public class AzureSearchIndexer : ISearchIndexer
{
private readonly IPackageService _packages;
private readonly IndexActionBuilder _actionBuilder;
private readonly AzureSearchBatchIndexer _batchIndexer;
private readonly ILogger<AzureSearchIndexer> _logger;

public AzureSearchIndexer(
IPackageService packages,
IndexActionBuilder actionBuilder,
AzureSearchBatchIndexer batchIndexer,
ILogger<AzureSearchIndexer> logger)
{
_packages = packages ?? throw new ArgumentNullException(nameof(packages));
_actionBuilder = actionBuilder ?? throw new ArgumentNullException(nameof(actionBuilder));
_batchIndexer = batchIndexer ?? throw new ArgumentNullException(nameof(batchIndexer));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}

public async Task IndexAsync(Package package, CancellationToken cancellationToken = default)
{
var packages = await _packages.FindAsync(package.Id, includeUnlisted: false);

var actions = _actionBuilder.UpdatePackage(
new PackageRegistration(
package.Id,
packages));

await _batchIndexer.IndexAsync(actions, cancellationToken);
}
}
}
8 changes: 0 additions & 8 deletions src/BaGet.Azure/Search/AzureSearchService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,20 @@ namespace BaGet.Azure.Search

public class AzureSearchService : ISearchService
{
private readonly BatchIndexer _indexer;
private readonly SearchIndexClient _searchClient;
private readonly IUrlGenerator _url;
private readonly IFrameworkCompatibilityService _frameworks;

public AzureSearchService(
BatchIndexer indexer,
SearchIndexClient searchClient,
IUrlGenerator url,
IFrameworkCompatibilityService frameworks)
{
_indexer = indexer ?? throw new ArgumentNullException(nameof(indexer));
_searchClient = searchClient ?? throw new ArgumentNullException(nameof(searchClient));
_url = url ?? throw new ArgumentNullException(nameof(url));
_frameworks = frameworks ?? throw new ArgumentNullException(nameof(frameworks));
}

public async Task IndexAsync(Package package, CancellationToken cancellationToken)
{
await _indexer.IndexAsync(package.Id);
}

public async Task<SearchResponse> SearchAsync(
string query = null,
int skip = 0,
Expand Down
152 changes: 0 additions & 152 deletions src/BaGet.Azure/Search/BatchIndexer.cs

This file was deleted.

Loading

0 comments on commit a96c134

Please sign in to comment.