Merge branch 'main' into gh-oblt/add-asciidoc-freeze-warning

reakaleek · Jan 29, 2025 · 6c134ca · 6c134ca
2 parents 54ee8e7 + 6b76457
commit 6c134ca
Show file tree

Hide file tree

Showing 818 changed files with 2,877 additions and 3,279 deletions.
diff --git a/build-tools-internal/src/main/resources/changelog-schema.json b/build-tools-internal/src/main/resources/changelog-schema.json
@@ -291,6 +291,7 @@
         "JVM option",
         "Java API",
         "Logging",
+        "Logs",
         "Mapping",
         "Packaging",
         "Painless",

diff --git a/catalog-info.yaml b/catalog-info.yaml
@@ -123,7 +123,7 @@ spec:
       pipeline_file: .buildkite/pipelines/lucene-snapshot/build-snapshot.yml
       env:
         ELASTIC_SLACK_NOTIFICATIONS_ENABLED: "true"
-        SLACK_NOTIFICATIONS_CHANNEL: "#lucene"
+        SLACK_NOTIFICATIONS_CHANNEL: "#lucene-ci"
         SLACK_NOTIFICATIONS_ALL_BRANCHES: "true"
       branch_configuration: lucene_snapshot
       default_branch: lucene_snapshot
@@ -167,7 +167,7 @@ spec:
       pipeline_file: .buildkite/pipelines/lucene-snapshot/update-branch.yml
       env:
         ELASTIC_SLACK_NOTIFICATIONS_ENABLED: "true"
-        SLACK_NOTIFICATIONS_CHANNEL: "#lucene"
+        SLACK_NOTIFICATIONS_CHANNEL: "#lucene-ci"
         SLACK_NOTIFICATIONS_ALL_BRANCHES: "true"
       default_branch: lucene_snapshot
       teams:
@@ -210,7 +210,7 @@ spec:
       pipeline_file: .buildkite/pipelines/lucene-snapshot/run-tests.yml
       env:
         ELASTIC_SLACK_NOTIFICATIONS_ENABLED: "true"
-        SLACK_NOTIFICATIONS_CHANNEL: "#lucene"
+        SLACK_NOTIFICATIONS_CHANNEL: "#lucene-ci"
         SLACK_NOTIFICATIONS_ALL_BRANCHES: "true"
       branch_configuration: lucene_snapshot
       default_branch: lucene_snapshot

diff --git a/docs/changelog/120340.yaml b/docs/changelog/120340.yaml
@@ -0,0 +1,5 @@
+pr: 120340
+summary: Add support for `extended_stats`
+area: Transform
+type: enhancement
+issues: []
diff --git a/docs/changelog/121049.yaml b/docs/changelog/121049.yaml
@@ -0,0 +1,19 @@
+pr: 121049
+summary: Conditionally enable logsdb by default for data streams matching with logs-*-*
+  pattern.
+area: Logs
+type: breaking
+issues:
+ - 106489
+breaking:
+  title: Conditionally enable logsdb by default
+  area: Logs
+  details: |-
+    Logsdb will be enabled by default for data streams matching with logs-*-* pattern.
+    If upgrading from 8.x to 9.x and data streams matching with log-*-* do exist,
+    then Logsdb will not be enabled by default.
+  impact: |-
+    Logsdb reduce storage footprint in Elasticsearch for logs, but there are side effects
+    to be taken into account that are described in the Logsdb docs:
+    https://www.elastic.co/guide/en/elasticsearch/reference/current/logs-data-stream.html#upgrade-to-logsdb-notes
+  notable: true
diff --git a/docs/changelog/121074.yaml b/docs/changelog/121074.yaml
@@ -0,0 +1,5 @@
+pr: 121074
+summary: Implement a `MetricsAware` interface
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/docs/changelog/121105.yaml b/docs/changelog/121105.yaml
@@ -0,0 +1,5 @@
+pr: 121105
+summary: Mark bbq indices as GA and add rolling upgrade integration tests
+area: Vector Search
+type: feature
+issues: []
diff --git a/docs/reference/aggregations/bucket/time-series-aggregation.asciidoc b/docs/reference/aggregations/bucket/time-series-aggregation.asciidoc
@@ -6,9 +6,14 @@
 
 preview::[]
 
-The time series aggregation queries data created using a time series index. This is typically data such as metrics
+The time series aggregation queries data created using a <<tsds,Time series data stream (TSDS)>>. This is typically data such as metrics
 or other data streams with a time component, and requires creating an index using the time series mode.
 
+[NOTE]
+====
+Refer to the <<differences-from-regular-data-stream, TSDS documentation>> to learn more about the key differences from regular data streams.
+====
+
 //////////////////////////
 
 Creating a time series mapping

diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc
@@ -254,6 +254,16 @@ as they contain data essential to the operation of the system.
 IMPORTANT: Direct access to system indices is deprecated and
 will no longer be allowed in a future major version.
 
+To view system indices within cluster:
+
+[source,console]
+--------------------------------------------------
+GET _cluster/state/metadata?filter_path=metadata.indices.*.system
+--------------------------------------------------
+
+WARNING: When overwriting current cluster state, system indices should be restored
+as part of their {ref}/snapshot-restore.html#feature-state[feature state].
+
 [discrete]
 [[api-conventions-parameters]]
 === Parameters

diff --git a/docs/reference/cluster/remote-info.asciidoc b/docs/reference/cluster/remote-info.asciidoc
@@ -26,10 +26,18 @@ Returns configured remote cluster information.
 [[cluster-remote-info-api-desc]]
 ==== {api-description-title}
 
-The cluster remote info API allows you to retrieve all of the configured
-remote cluster information. It returns connection and endpoint information keyed
+The cluster remote info API allows you to retrieve information about configured
+remote clusters. It returns connection and endpoint information keyed
 by the configured remote cluster alias.
 
+TIP: This API returns information that reflects current state on the local cluster.
+The `connected` field does not necessarily reflect whether a remote cluster is
+down or unavailable, only whether there is currently an open connection to it.
+Elasticsearch does not spontaneously try to reconnect to a disconnected remote
+cluster. To trigger a reconnection, attempt a <<modules-cross-cluster-search,{ccs}>>,
+<<esql-cross-clusters,{esql} {ccs}>>, or try the
+<<indices-resolve-cluster-api,resolve cluster>> endpoint.
+
 
 [[cluster-remote-info-api-response-body]]
 ==== {api-response-body-title}
@@ -39,7 +47,10 @@ by the configured remote cluster alias.
     `proxy`.
 
 `connected`::
-	True if there is at least one connection to the remote cluster.
+    True if there is at least one open connection to the remote cluster. When
+    false, it means that the cluster no longer has an open connection to the
+    remote cluster. It does not necessarily mean that the remote cluster is
+    down or unavailable, just that at some point a connection was lost.
 
 `initial_connect_timeout`::
 	The initial connect timeout for remote cluster connections.

diff --git a/docs/reference/data-streams/logs.asciidoc b/docs/reference/data-streams/logs.asciidoc
@@ -237,3 +237,9 @@ The `logsdb` index mode uses the following settings:
 * **`index.mapping.ignore_above`**: `8191`
 
 * **`index.mapping.total_fields.ignore_dynamic_beyond_limit`**: `true`
+
+[discrete]
+[[upgrade-to-logsdb-notes]]
+=== Notes about upgrading to Logsdb
+
+TODO: add notes.
diff --git a/docs/reference/indices/resolve-cluster.asciidoc b/docs/reference/indices/resolve-cluster.asciidoc
@@ -11,9 +11,7 @@ For the most up-to-date API details, refer to {api-es}/group/endpoint-indices[In
 --
 
 Resolves the specified index expressions to return information about
-each cluster, including the local "querying" cluster, if included. If no index expression
-is provided, this endpoint will return information about all the remote
-clusters that are configured on the querying cluster.
+each cluster, including the local "querying" cluster, if included.
 
 This endpoint is useful before doing a <<modules-cross-cluster-search,{ccs}>> in
 order to determine which remote clusters should be included in a search.
@@ -24,10 +22,12 @@ with this endpoint.
 
 For each cluster in scope, information is returned about:
 
-1. whether the querying ("local") cluster is currently connected to it
+1. whether the querying ("local") cluster was able to connect to each remote cluster
+   specified in the index expression. Note that this endpoint actively attempts to
+   contact the remote clusters, unlike the <<cluster-remote-info,remote/info>> endpoint.
 2. whether each remote cluster is configured with `skip_unavailable` as `true` or `false`
 3. whether there are any indices, aliases or data streams on that cluster that match
-   the index expression (if one provided)
+   the index expression
 4. whether the search is likely to have errors returned when you do a {ccs} (including any
    authorization errors if your user does not have permission to query a remote cluster or
    the indices on that cluster)
@@ -42,12 +42,6 @@ Once the proper security permissions are obtained, then you can rely on the `con
 in the response to determine whether the remote cluster is available and ready for querying.
 ====
 
-NOTE: When querying older clusters that do not support the _resolve/cluster endpoint
-without an index expression, the local cluster will send the index expression `dummy*`
-to those remote clusters, so if an errors occur, you may see a reference to that index
-expression even though you didn't request it. If it causes a problem, you can instead
-include an index expression like `*:*` to this endpoint to bypass the issue.
-
 ////
 [source,console]
 --------------------------------
@@ -77,14 +71,6 @@ PUT _cluster/settings
 // TEST[s/35.238.149.\d+:930\d+/\${transport_host}/]
 ////
 
-[source,console]
-----
-GET /_resolve/cluster
-----
-// TEST[continued]
-
-Returns information about all remote clusters configured on the local cluster.
-
 [source,console]
 ----
 GET /_resolve/cluster/my-index-*,cluster*:my-index-*
@@ -140,21 +126,28 @@ ignored when frozen. Defaults to `false`.
 +
 deprecated:[7.16.0]
 
-[TIP]
-====
-The index options above are only allowed when specifying an index expression.
-You will get an error if you specify index options to the _resolve/cluster API
-that takes no index expression.
-====
-
 
 [discrete]
 [[usecases-for-resolve-cluster]]
+=== Test availability of remote clusters
+
+The <<cluster-remote-info,remote/info>> endpoint is commonly used to test whether the "local"
+cluster (the cluster being queried) is connected to its remote clusters, but it does not
+necessarily reflect whether the remote cluster is available or not. The remote cluster may
+be available, while the local cluster is not currently connected to it.
+
+You can use the resolve-cluster API to attempt to reconnect to remote clusters
+(for example with `GET _resolve/cluster/*:*`) and
+the `connected` field in the response will indicate whether it was successful or not.
+If a connection was (re-)established, this will also cause the
+<<cluster-remote-info,remote/info>> endpoint to now indicate a connected status.
+
+
 === Advantages of using this endpoint before a {ccs}
 
 You may want to exclude a cluster or index from a search when:
 
-1. A remote cluster is not currently connected and is configured with `skip_unavailable`=`false`.
+1. A remote cluster could not be connected to and is configured with `skip_unavailable`=`false`.
 Executing a {ccs} under those conditions will cause
 <<cross-cluster-search-failures,the entire search to fail>>.
 
@@ -268,14 +261,7 @@ GET /_resolve/cluster/not-present,clust*:my-index*,oldcluster:*?ignore_unavailab
   },
   "cluster_two": {
     "connected": false,           <3>
-    "skip_unavailable": false,
-    "matching_indices": true,
-    "version": {
-      "number": "8.13.0",
-      "build_flavor": "default",
-      "minimum_wire_compatibility_version": "7.17.0",
-      "minimum_index_compatibility_version": "7.0.0"
-    }
+    "skip_unavailable": false
   },
   "oldcluster": {         <4>
     "connected": true,

diff --git a/docs/reference/mapping/types/dense-vector.asciidoc b/docs/reference/mapping/types/dense-vector.asciidoc
@@ -118,7 +118,7 @@ The three following quantization strategies are supported:
 
 * `int8` - Quantizes each dimension of the vector to 1-byte integers. This reduces the memory footprint by 75% (or 4x) at the cost of some accuracy.
 * `int4` - Quantizes each dimension of the vector to half-byte integers. This reduces the memory footprint by 87% (or 8x) at the cost of accuracy.
-* `bbq` - experimental:[] Better binary quantization which reduces each dimension to a single bit precision. This reduces the memory footprint by 96% (or 32x) at a larger cost of accuracy. Generally, oversampling during query time and reranking can help mitigate the accuracy loss.
+* `bbq` - Better binary quantization which reduces each dimension to a single bit precision. This reduces the memory footprint by 96% (or 32x) at a larger cost of accuracy. Generally, oversampling during query time and reranking can help mitigate the accuracy loss.
 
 
 When using a quantized format, you may want to oversample and rescore the results to improve accuracy. See <<dense-vector-knn-search-rescoring, oversampling and rescoring>> for more information.
@@ -133,7 +133,7 @@ This means disk usage will increase by ~25% for `int8`, ~12.5% for `int4`, and ~
 
 NOTE: `int4` quantization requires an even number of vector dimensions.
 
-NOTE: experimental:[] `bbq` quantization only supports vector dimensions that are greater than 64.
+NOTE: `bbq` quantization only supports vector dimensions that are greater than 64.
 
 Here is an example of how to create a byte-quantized index:
 
@@ -177,7 +177,7 @@ PUT my-byte-quantized-index
 }
 --------------------------------------------------
 
-experimental:[] Here is an example of how to create a binary quantized index:
+Here is an example of how to create a binary quantized index:
 
 [source,console]
 --------------------------------------------------
@@ -325,15 +325,15 @@ by 4x at the cost of some accuracy. See <<dense-vector-quantization, Automatical
 * `int4_hnsw` - This utilizes the https://arxiv.org/abs/1603.09320[HNSW algorithm] in addition to automatically scalar
 quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint
 by 8x at the cost of some accuracy. See <<dense-vector-quantization, Automatically quantize vectors for kNN search>>.
-* experimental:[] `bbq_hnsw` - This utilizes the https://arxiv.org/abs/1603.09320[HNSW algorithm] in addition to automatically binary
+* `bbq_hnsw` - This utilizes the https://arxiv.org/abs/1603.09320[HNSW algorithm] in addition to automatically binary
 quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint
 by 32x at the cost of accuracy. See <<dense-vector-quantization, Automatically quantize vectors for kNN search>>.
 * `flat` - This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values.
 * `int8_flat` - This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports
 `element_type` of `float`.
 * `int4_flat` - This utilizes a brute-force search algorithm in addition to automatically half-byte scalar quantization. Only supports
 `element_type` of `float`.
-* experimental:[] `bbq_flat` - This utilizes a brute-force search algorithm in addition to automatically binary quantization. Only supports
+* `bbq_flat` - This utilizes a brute-force search algorithm in addition to automatically binary quantization. Only supports
 `element_type` of `float`.
 --
 `m`:::

diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc
@@ -130,7 +130,7 @@ You can extract the most relevant fragments from a semantic text field by using
 
 [source,console]
 ------------------------------------------------------------
-PUT test-index
+POST test-index/_search
 {
     "query": {
         "semantic": {
@@ -240,4 +240,4 @@ PUT test-index
 `semantic_text` field types have the following limitations:
 
 * `semantic_text` fields are not currently supported as elements of <<nested,nested fields>>.
-* `semantic_text` fields can't currently be set as part of <<dynamic-templates>>.
+* `semantic_text` fields can't currently be set as part of <<dynamic-templates>>.
diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc
@@ -918,7 +918,7 @@ An array of index names. Wildcards are supported. For example:
 `["it_ops_metrics", "server*"]`.
 +
 --
-NOTE: If any indices are in remote clusters then the {ml} nodes need to have the
+NOTE: If any indices are in remote clusters then the master nodes and the {ml} nodes need to have the
 `remote_cluster_client` role.
 
 --

diff --git a/docs/reference/quickstart/getting-started.asciidoc b/docs/reference/quickstart/getting-started.asciidoc
@@ -293,7 +293,7 @@ POST /books/_doc
 // TEST[continued]
 <1> The new field.
 
-View the mapping for the `books` index with the <<indices-get-mapping, Get mapping API>>. The new field `new_field` has been added to the mapping with a `text` data type.
+View the mapping for the `books` index with the <<indices-get-mapping, Get mapping API>>. The new field `language` has been added to the mapping with a `text` data type.
 
 [source,console]
 ----
@@ -328,7 +328,7 @@ GET /books/_mapping
             }
           }
         },
-        "new_field": {
+        "language": {
           "type": "text",
           "fields": {
             "keyword": {

diff --git a/docs/reference/rest-api/common-parms.asciidoc b/docs/reference/rest-api/common-parms.asciidoc
@@ -808,6 +808,7 @@ currently supported:
 * <<search-aggregations-pipeline-bucket-script-aggregation,Bucket script>>
 * <<search-aggregations-pipeline-bucket-selector-aggregation,Bucket selector>>
 * <<search-aggregations-metrics-cardinality-aggregation,Cardinality>>
+* <<search-aggregations-metrics-extendedstats-aggregation,Extended Stats>>
 * <<search-aggregations-bucket-filter-aggregation,Filter>>
 * <<search-aggregations-metrics-geobounds-aggregation,Geo bounds>>
 * <<search-aggregations-metrics-geocentroid-aggregation,Geo centroid>>

diff --git a/docs/reference/rest-api/info.asciidoc b/docs/reference/rest-api/info.asciidoc
@@ -177,7 +177,7 @@ Example response:
       },
       "logsdb": {
         "available": true,
-        "enabled": false
+        "enabled": true
       }
    },
    "tagline" : "You know, for X"

diff --git a/docs/reference/rest-api/usage.asciidoc b/docs/reference/rest-api/usage.asciidoc
@@ -514,7 +514,7 @@ GET /_xpack/usage
   },
   "logsdb": {
     "available": true,
-    "enabled": false,
+    "enabled": true,
     "indices_count": 0,
     "indices_with_synthetic_source": 0,
     "num_docs": 0,

diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc
@@ -456,10 +456,13 @@ GET movies/_search
   "retriever": {
     "rescorer": { <2>
       "rescore": {
-        "query": { <3>
-          "window_size": 50, <4>
+        "window_size": 50, <3>
+        "query": { <4>
           "rescore_query": {
             "script_score": {
+              "query": {
+                "match_all": {}
+              },
               "script": {
                 "source": "cosineSimilarity(params.queryVector, 'product-vector_final_stage') + 1.0",
                 "params": {
@@ -516,8 +519,8 @@ GET movies/_search
 // TEST[skip:uses ELSER]
 <1> Specifies the number of top documents to return in the final response.
 <2> A `rescorer` retriever applied as the final step.
-<3> The definition of the `query` rescorer.
-<4> Defines the number of documents to rescore from the child retriever.
+<3> Defines the number of documents to rescore from the child retriever.
+<4> The definition of the `query` rescorer.
 <5> Specifies the child retriever definition.
 <6> Defines the number of documents returned by the `rrf` retriever, which limits the available documents to