From 6863c96abfba6e106a8a3c6ceca26d3cb7aad284 Mon Sep 17 00:00:00 2001 From: Oliver Skroblin Date: Fri, 5 Apr 2024 13:24:50 +0000 Subject: [PATCH] NEXT-34030 - ES exact hit --- .../Content/Test/Product/ProductBuilder.php | 7 ++ .../DependencyInjection/Configuration.php | 2 + .../ElasticsearchEntityAggregator.php | 4 +- .../ElasticsearchEntitySearcher.php | 4 +- .../Product/ProductSearchQueryBuilder.php | 4 + .../config/packages/elasticsearch.yaml | 2 + .../config/packages/test/elasticsearch.yaml | 5 + .../Resources/config/services.xml | 2 + .../Test/ElasticsearchTestTestBehaviour.php | 8 +- .../Elasticsearch/Product/SearchCasesTest.php | 109 ++++++++++++++++++ .../ElasticsearchEntityAggregatorTest.php | 8 +- .../ElasticsearchEntitySearcherTest.php | 10 +- .../Product/ProductSearchQueryBuilderTest.php | 56 +++++++++ 13 files changed, 214 insertions(+), 7 deletions(-) create mode 100644 src/Elasticsearch/Resources/config/packages/test/elasticsearch.yaml create mode 100644 tests/integration/Elasticsearch/Product/SearchCasesTest.php diff --git a/src/Core/Content/Test/Product/ProductBuilder.php b/src/Core/Content/Test/Product/ProductBuilder.php index dc85d9fce78..6551d7bd5e8 100644 --- a/src/Core/Content/Test/Product/ProductBuilder.php +++ b/src/Core/Content/Test/Product/ProductBuilder.php @@ -180,6 +180,13 @@ public function __construct( $this->tax($taxKey); } + public function number(string $number): self + { + $this->productNumber = $number; + + return $this; + } + /** * @return array */ diff --git a/src/Elasticsearch/DependencyInjection/Configuration.php b/src/Elasticsearch/DependencyInjection/Configuration.php index 058a94b4501..bf4d0c74bdf 100644 --- a/src/Elasticsearch/DependencyInjection/Configuration.php +++ b/src/Elasticsearch/DependencyInjection/Configuration.php @@ -47,6 +47,7 @@ public function getConfigTreeBuilder(): TreeBuilder ->children() ->scalarNode('timeout')->end() ->integerNode('term_max_length')->end() + ->scalarNode('search_type')->end() ->end() ->end() ->arrayNode('administration') @@ -62,6 +63,7 @@ public function getConfigTreeBuilder(): TreeBuilder ->children() ->scalarNode('timeout')->end() ->integerNode('term_max_length')->end() + ->scalarNode('search_type')->end() ->end() ->end() ->end() diff --git a/src/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntityAggregator.php b/src/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntityAggregator.php index 148ec74216f..d97e589060b 100644 --- a/src/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntityAggregator.php +++ b/src/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntityAggregator.php @@ -29,7 +29,8 @@ public function __construct( private readonly EntityAggregatorInterface $decorated, private readonly AbstractElasticsearchAggregationHydrator $hydrator, private readonly EventDispatcherInterface $eventDispatcher, - private readonly string $timeout = '5s' + private readonly string $timeout, + private readonly string $searchType ) { } @@ -52,6 +53,7 @@ public function aggregate(EntityDefinition $definition, Criteria $criteria, Cont $result = $this->client->search([ 'index' => $this->helper->getIndexName($definition), 'body' => $searchArray, + 'search_type' => $this->searchType, ]); $result = $this->hydrator->hydrate($definition, $criteria, $context, $result); diff --git a/src/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntitySearcher.php b/src/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntitySearcher.php index f5d07cba9e2..0b9fbfae91a 100644 --- a/src/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntitySearcher.php +++ b/src/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntitySearcher.php @@ -36,7 +36,8 @@ public function __construct( private readonly CriteriaParser $criteriaParser, private readonly AbstractElasticsearchSearchHydrator $hydrator, private readonly EventDispatcherInterface $eventDispatcher, - private readonly string $timeout = '5s' + private readonly string $timeout, + private readonly string $searchType ) { } @@ -68,6 +69,7 @@ public function search(EntityDefinition $definition, Criteria $criteria, Context 'index' => $this->helper->getIndexName($definition), 'track_total_hits' => true, 'body' => $search, + 'search_type' => $this->searchType, ]); $result = $this->hydrator->hydrate($definition, $criteria, $context, $result); diff --git a/src/Elasticsearch/Product/ProductSearchQueryBuilder.php b/src/Elasticsearch/Product/ProductSearchQueryBuilder.php index 005fdb94911..d01e1a808f5 100644 --- a/src/Elasticsearch/Product/ProductSearchQueryBuilder.php +++ b/src/Elasticsearch/Product/ProductSearchQueryBuilder.php @@ -55,6 +55,10 @@ public function build(Criteria $criteria, Context $context): BoolQuery $tokens = $this->tokenizer->tokenize((string) $criteria->getTerm()); $tokens = $this->tokenFilter->filter($tokens, $context); + $term = strtolower((string) $criteria->getTerm()); + if (!\in_array($term, $tokens, true)) { + $tokens[] = $term; + } foreach ($tokens as $originalToken) { $tokenBool = new BoolQuery(); diff --git a/src/Elasticsearch/Resources/config/packages/elasticsearch.yaml b/src/Elasticsearch/Resources/config/packages/elasticsearch.yaml index 61c1720032b..04222a5acda 100644 --- a/src/Elasticsearch/Resources/config/packages/elasticsearch.yaml +++ b/src/Elasticsearch/Resources/config/packages/elasticsearch.yaml @@ -8,6 +8,7 @@ elasticsearch: search: timeout: 5s term_max_length: 300 + search_type: "query_then_fetch" administration: hosts: "%env(string:ADMIN_OPENSEARCH_URL)%" enabled: "%env(bool:SHOPWARE_ADMIN_ES_ENABLED)%" @@ -16,6 +17,7 @@ elasticsearch: search: timeout: 5s term_max_length: 300 + search_type: "query_then_fetch" index_settings: number_of_shards: 3 number_of_replicas: 3 diff --git a/src/Elasticsearch/Resources/config/packages/test/elasticsearch.yaml b/src/Elasticsearch/Resources/config/packages/test/elasticsearch.yaml new file mode 100644 index 00000000000..18a89c2f7fe --- /dev/null +++ b/src/Elasticsearch/Resources/config/packages/test/elasticsearch.yaml @@ -0,0 +1,5 @@ +elasticsearch: + index_settings: + number_of_shards: 1 + number_of_replicas: 0 + diff --git a/src/Elasticsearch/Resources/config/services.xml b/src/Elasticsearch/Resources/config/services.xml index 6d2cf5711d8..cf360f82499 100644 --- a/src/Elasticsearch/Resources/config/services.xml +++ b/src/Elasticsearch/Resources/config/services.xml @@ -160,6 +160,7 @@ %elasticsearch.search.timeout% + %elasticsearch.search.search_type% %elasticsearch.search.timeout% + %elasticsearch.search.search_type% getDiContainer()->get(Client::class), $decorated, $this->getDiContainer()->get(AbstractElasticsearchAggregationHydrator::class), - $this->getDiContainer()->get('event_dispatcher') + $this->getDiContainer()->get('event_dispatcher'), + '5s', + 'dfs_query_then_fetch' ); } @@ -89,7 +91,9 @@ protected function createEntitySearcher(): ElasticsearchEntitySearcher $this->getDiContainer()->get(ElasticsearchHelper::class), $this->getDiContainer()->get(CriteriaParser::class), $this->getDiContainer()->get(AbstractElasticsearchSearchHydrator::class), - $this->getDiContainer()->get('event_dispatcher') + $this->getDiContainer()->get('event_dispatcher'), + '5s', + 'dfs_query_then_fetch' ); } diff --git a/tests/integration/Elasticsearch/Product/SearchCasesTest.php b/tests/integration/Elasticsearch/Product/SearchCasesTest.php new file mode 100644 index 00000000000..2505a12f2ce --- /dev/null +++ b/tests/integration/Elasticsearch/Product/SearchCasesTest.php @@ -0,0 +1,109 @@ + $products + */ + #[DataProvider('numbersProvider')] + public function testSearch(array $products, string $term, string $best): void + { + $this->clearElasticsearch(); + + $this->getContainer()->get(Connection::class)->executeStatement('DELETE FROM product'); + + $this->getContainer()->get('product.repository')->create(array_values($products), Context::createDefaultContext()); + + $this->indexElasticSearch(); + + $searcher = $this->createEntitySearcher(); + + $criteria = new Criteria(); + $criteria->addState(Criteria::STATE_ELASTICSEARCH_AWARE); + $criteria->setTerm($term); + + $definition = $this->getContainer()->get(ProductDefinition::class); + + $result = $searcher->search($definition, $criteria, Context::createDefaultContext()); + + $scores = []; + foreach ($result->getData() as $item) { + $scores[self::$ids->getKey((string) $item['id'])] = $item['_score']; + } + + static::assertEquals( + $best, + self::$ids->getKey((string) $result->firstId()), + print_r($scores, true) + ); + } + + public static function numbersProvider(): \Generator + { + self::$ids = $ids = new IdsCollection(); + + $products = [ + 'p1' => self::product($ids, 'p1', 'DE-031668-B', 'HP LaserJet Enterprise M608x Inkl. Stapelfach und Papierfach'), + 'p2' => self::product($ids, 'p2', 'DE-031677-B', 'HP LaserJet Enterprise M608x Inkl. Stapelfach'), + 'p3' => self::product($ids, 'p3', 'DE-031687-B', 'HP LaserJet Enterprise M608x'), + 'p4' => self::product($ids, 'p4', 'DE-13.116-B', 'LG 24MB35PM-B - 1920 x 1080 - FHD'), + 'p5' => self::product($ids, 'p5', 'DE-15.174-N', 'Crucial DDR4 Desktop Speicher - DIMM - DDR4 - 2400 MHz - CL17'), + 'p6' => self::product($ids, 'p6', 'DE-17.028-A', 'Fujitsu Display B24-8 TE - 1920 x 1080 - FHD'), + 'p7' => self::product($ids, 'p7', 'DE-17.028-B', 'Fujitsu Display B24-8 TE - 1920 x 1080 - FHD'), + 'p8' => self::product($ids, 'p8', 'DE-17.346-B', 'LG 24BK550Y-B - 1920 x 1080 - FHD'), + 'p9' => self::product($ids, 'p9', 'DE-17.353-B', 'Eizo FlexScan EV2416W-BK - 1920 x 1200 - WUXGA'), + 'p10' => self::product($ids, 'p10', 'DE-17.447-N', 'SOLID DDR3 Desktop Speicher - DIMM 240-PIN - DDR3 - 1600 MHz - CL 11'), + ]; + + yield 'Exact number match' => [$products, 'DE-031668-B', 'p1']; + } + + protected function getDiContainer(): ContainerInterface + { + return $this->getContainer(); + } + + protected function runWorker(): void + { + } + + /** + * @return array + */ + private static function product(IdsCollection $ids, string $key, string $number, string $name): array + { + return (new ProductBuilder($ids, $key)) + ->number($number) + ->price(100) + ->visibility() + ->name($name) + ->build(); + } +} diff --git a/tests/unit/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntityAggregatorTest.php b/tests/unit/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntityAggregatorTest.php index 484b7442693..911dc9a77a1 100644 --- a/tests/unit/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntityAggregatorTest.php +++ b/tests/unit/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntityAggregatorTest.php @@ -43,7 +43,9 @@ public function testEmptyQueryExceptionIsCatched(): void $client, $this->createMock(EntityAggregatorInterface::class), $this->createMock(AbstractElasticsearchAggregationHydrator::class), - new EventDispatcher() + new EventDispatcher(), + '10s', + 'dfs_query_then_fetch' ); $context = Context::createDefaultContext(); @@ -73,6 +75,7 @@ public function testAggregateWithTimeout(): void 'timeout' => '10s', 'size' => 0, ], + 'search_type' => 'dfs_query_then_fetch', ])->willReturn([]); $helper = $this->createMock(ElasticsearchHelper::class); @@ -86,7 +89,8 @@ public function testAggregateWithTimeout(): void $this->createMock(EntityAggregatorInterface::class), $this->createMock(AbstractElasticsearchAggregationHydrator::class), new EventDispatcher(), - '10s' + '10s', + 'dfs_query_then_fetch' ); $context = Context::createDefaultContext(); diff --git a/tests/unit/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntitySearcherTest.php b/tests/unit/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntitySearcherTest.php index 7338ffd7bad..060c08980e0 100644 --- a/tests/unit/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntitySearcherTest.php +++ b/tests/unit/Elasticsearch/Framework/DataAbstractionLayer/ElasticsearchEntitySearcherTest.php @@ -50,6 +50,8 @@ public function testEmptyQueryExceptionIsCatched(): void $this->createMock(CriteriaParser::class), $this->createMock(AbstractElasticsearchSearchHydrator::class), new EventDispatcher(), + '10s', + 'dfs_query_then_fetch' ); $context = Context::createDefaultContext(); @@ -87,6 +89,8 @@ public function testWithCriteriaLimitOfZero(): void $this->createMock(CriteriaParser::class), $this->createMock(AbstractElasticsearchSearchHydrator::class), new EventDispatcher(), + '5s', + 'dfs_query_then_fetch' ); $context = Context::createDefaultContext(); @@ -118,6 +122,7 @@ public function testSearchWithTimeout(): void 'from' => 0, 'size' => 10, ], + 'search_type' => 'dfs_query_then_fetch', ])->willReturn([]); $helper = $this->createMock(ElasticsearchHelper::class); @@ -132,7 +137,8 @@ public function testSearchWithTimeout(): void $this->createMock(CriteriaParser::class), $this->createMock(AbstractElasticsearchSearchHydrator::class), new EventDispatcher(), - '10s' + '10s', + 'dfs_query_then_fetch' ); $context = Context::createDefaultContext(); @@ -168,6 +174,8 @@ public function testExceptionsGetLogged(): void new CriteriaParser(new EntityDefinitionQueryHelper(), $this->createMock(CustomFieldService::class)), $this->createMock(AbstractElasticsearchSearchHydrator::class), new EventDispatcher(), + '5s', + 'dfs_query_then_fetch' ); $context = Context::createDefaultContext(); diff --git a/tests/unit/Elasticsearch/Product/ProductSearchQueryBuilderTest.php b/tests/unit/Elasticsearch/Product/ProductSearchQueryBuilderTest.php index 59a8e648d82..d219259e887 100644 --- a/tests/unit/Elasticsearch/Product/ProductSearchQueryBuilderTest.php +++ b/tests/unit/Elasticsearch/Product/ProductSearchQueryBuilderTest.php @@ -162,6 +162,18 @@ public static function buildSingleLanguageProvider(): iterable ], ], ], + [ + 'bool' => [ + 'should' => [ + self::match('name.' . Defaults::LANGUAGE_SYSTEM . '.search', 'foo 2023', 5000), + self::matchPhrasePrefix('name.' . Defaults::LANGUAGE_SYSTEM . '.search', 'foo 2023', 1000), + self::match('ean.search', 'foo 2023', 10000), + self::matchPhrasePrefix('ean.search', 'foo 2023', 2000), + self::nested('tags', self::match('tags.name.search', 'foo 2023', 2500)), + self::nested('tags', self::matchPhrasePrefix('tags.name.search', 'foo 2023', 500)), + ], + ], + ], ], ], ]; @@ -195,6 +207,14 @@ public static function buildSingleLanguageProvider(): iterable ], ], ], + [ + 'bool' => [ + 'should' => [ + self::match($prefix . 'evolvesText', 'foo 2023', 2500), + self::matchPhrasePrefix($prefix . 'evolvesText', 'foo 2023', 500), + ], + ], + ], ], ], ]; @@ -311,6 +331,24 @@ public static function buildMultipleLanguageProvider(): iterable ], ], ], + [ + 'bool' => [ + 'should' => [ + self::multiMatch(fields: [ + 'name.' . Defaults::LANGUAGE_SYSTEM . '.search', + 'name.' . self::SECOND_LANGUAGE_ID . '.search', + ], query: 'foo 2023', lenient: true, boost: 5000, fuzziness: 0), + self::multiMatch(fields: [ + 'name.' . Defaults::LANGUAGE_SYSTEM . '.search', + 'name.' . self::SECOND_LANGUAGE_ID . '.search', + ], query: 'foo 2023', boost: 1000, slop: 5, type: 'phrase_prefix'), + self::match('ean.search', 'foo 2023', 10000), + self::matchPhrasePrefix('ean.search', 'foo 2023', 2000), + self::nested('tags', self::match('tags.name.search', 'foo 2023', 2500)), + self::nested('tags', self::matchPhrasePrefix('tags.name.search', 'foo 2023', 500)), + ], + ], + ], ], ], ]; @@ -371,6 +409,24 @@ public static function buildMultipleLanguageProvider(): iterable ], ], ], + [ + 'bool' => [ + 'should' => [ + self::multiMatch(fields: [ + $prefixCfLang1 . 'evolvesText', + $prefixCfLang2 . 'evolvesText', + ], query: 'foo 2023', lenient: true, boost: 2500, fuzziness: 0), + self::multiMatch(fields: [ + $prefixCfLang1 . 'evolvesText', + $prefixCfLang2 . 'evolvesText', + ], query: 'foo 2023', boost: 500, slop: 5, type: 'phrase_prefix'), + self::multiMatch(fields: [ + $prefixCfLang1 . 'evolvesText', + $prefixCfLang2 . 'evolvesText', + ], query: 'foo 2023', boost: 1500, lenient: true, fuzziness: 'auto'), + ], + ], + ], ], ], ];