update

sjuratov · Sep 26, 2024 · 6c37d36 · 6c37d36
1 parent 7a8af7e
commit 6c37d36
Show file tree

Hide file tree

Showing 3 changed files with 84 additions and 71 deletions.
diff --git a/demo-python/code/vector-quantization-and-storage/README.md b/demo-python/code/vector-quantization-and-storage/README.md
@@ -18,6 +18,7 @@ The Python notebook creates vectorized data on Azure AI Search and demonstrates
 - Built-in scalar quantization that reduces vector index size in memory and on disk.
 - Disabling storage of vectors returned in query responses. These vectors are stored separately from vectors used for the queries themselves.
 - Smaller data types than `Edm.Single`.
+- Truncating dimensions of vectors.
 
 The sample data is a JSON file containing a pre-chunked version of the sample documents about a fictious company called Contoso Electronics and their policies. They have been embedded using text-embedding-3-large with 3072 dimensions.
 

diff --git a/...hon/code/vector-quantization-and-storage/vector-quantization-and-storage-requirements.txt b/...hon/code/vector-quantization-and-storage/vector-quantization-and-storage-requirements.txt
@@ -1,4 +1,4 @@
 python-dotenv
-azure-search-documents==11.5.1
+azure-search-documents==11.6.0b5
 openai==1.14.3
 azure-identity
diff --git a/demo-python/code/vector-quantization-and-storage/vector-quantization-and-storage.ipynb b/demo-python/code/vector-quantization-and-storage/vector-quantization-and-storage.ipynb
@@ -11,6 +11,7 @@
     "+ Use smaller \"narrow\" data types instead of `Edm.Single`. Types such as `Edm.Half` reduce storage overhead.\n",
     "+ Disable storing vectors used in the query response. Vectors returned in a query response are stored separately from the vectors used during queries.\n",
     "+ Quantizing vectors. Use built-in scalar or binary quantization to quantize embeddings to `Edm.Int8` without any reduction in query performance. Information loss from quantization can be compensated for using the original unquantized embeddings and oversampling.\n",
+    "+ Truncating dimensions. Use built-in truncation dimension option to reduce vector dimensionality with minimal reduction in query performance.\n",
     "\n",
     "### Prerequisites\n",
     "\n",
@@ -42,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -58,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -87,7 +88,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -109,7 +110,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -133,7 +134,7 @@
     "    VectorSearchCompression\n",
     ")\n",
     "\n",
-    "def create_index(index_name, dimensions, use_scalar_compression=False, use_binary_compression=False, use_float16=False, use_stored=True):\n",
+    "def create_index(index_name, dimensions, use_scalar_compression=False, use_binary_compression=False, use_float16=False, use_stored=True, truncation_dimension=None):\n",
     "    if use_float16:\n",
     "        vector_type = \"Collection(Edm.Half)\"\n",
     "    else:\n",
@@ -151,12 +152,12 @@
     "    if use_scalar_compression:\n",
     "        compression_name = \"myCompression\"\n",
     "        compression_configurations = [\n",
-    "            ScalarQuantizationCompression(compression_name=compression_name)\n",
+    "            ScalarQuantizationCompression(compression_name=compression_name, truncation_dimension=truncation_dimension)\n",
     "        ]\n",
     "    elif use_binary_compression:\n",
     "        compression_name = \"myCompression\"\n",
     "        compression_configurations = [\n",
-    "            BinaryQuantizationCompression(compression_name=compression_name)\n",
+    "            BinaryQuantizationCompression(compression_name=compression_name, truncation_dimension=truncation_dimension)\n",
     "        ]\n",
     "    else:\n",
     "        compression_name = None\n",
@@ -186,7 +187,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -201,35 +202,53 @@
     "# Create indexes to compare storage usage\n",
     "# The baseline index does not use any options\n",
     "\n",
-    "baseline_index = f\"{base_index_name}-baseline\"\n",
-    "scalar_compression_index = f\"{base_index_name}-scalar-compression\"\n",
-    "binary_compression_index = f\"{base_index_name}-binary-compression\"\n",
-    "narrow_index = f\"{base_index_name}-narrow\"\n",
-    "no_stored_index = f\"{base_index_name}-no-stored\"\n",
-    "all_index_scalar = f\"{base_index_name}-all-options-with-scalar\"\n",
-    "all_index_binary = f\"{base_index_name}-all-options-with-binary\"\n",
+    "indexes = {\n",
+    "    \"baseline\": {},\n",
+    "    \"scalar-compression\": {\n",
+    "        \"use_scalar_compression\": True\n",
+    "    },\n",
+    "    \"binary-compression\": {\n",
+    "        \"use_binary_compression\": True\n",
+    "    },\n",
+    "    \"narrow\": {\n",
+    "        \"use_float16\": True\n",
+    "    },\n",
+    "    \"no-stored\": {\n",
+    "        \"use_stored\": False\n",
+    "    },\n",
+    "    \"scalar-compresssion-truncation-dimension\": {\n",
+    "        \"use_scalar_compression\": True,\n",
+    "        \"truncation_dimension\": 1024\n",
+    "    },\n",
+    "    \"binary-compression-truncation-dimension\": {\n",
+    "        \"use_binary_compression\": True,\n",
+    "        \"truncation_dimension\": 1024\n",
+    "    },\n",
+    "    \"all-options-with-scalar\": {\n",
+    "        \"use_scalar_compression\": True,\n",
+    "        \"use_float16\": True,\n",
+    "        \"use_stored\": False,\n",
+    "        \"truncation_dimension\": 1024\n",
+    "    },\n",
+    "    \"all-options-with-binary\": {\n",
+    "        \"use_binary_compression\": True,\n",
+    "        \"use_float16\": True,\n",
+    "        \"use_stored\": False,\n",
+    "        \"truncation_dimension\": 1024\n",
+    "    }\n",
+    "}\n",
     "\n",
     "search_index_client = SearchIndexClient(endpoint, credential)\n",
-    "search_index_client.create_or_update_index(\n",
-    "    create_index(scalar_compression_index, embedding_dimensions, use_scalar_compression=True))\n",
-    "search_index_client.create_or_update_index(\n",
-    "    create_index(binary_compression_index, embedding_dimensions, use_binary_compression=True))\n",
-    "search_index_client.create_or_update_index(\n",
-    "    create_index(narrow_index, embedding_dimensions, use_float16=True))\n",
-    "search_index_client.create_or_update_index(\n",
-    "    create_index(no_stored_index, embedding_dimensions, use_stored=False))\n",
-    "search_index_client.create_or_update_index(\n",
-    "    create_index(all_index_scalar, embedding_dimensions, use_scalar_compression=True, use_float16=True, use_stored=False))\n",
-    "search_index_client.create_or_update_index(\n",
-    "    create_index(all_index_binary, embedding_dimensions, use_binary_compression=True, use_float16=True, use_stored=False))\n",
-    "search_index_client.create_or_update_index(\n",
-    "    create_index(baseline_index, embedding_dimensions))\n",
+    "for index, options in indexes.items():\n",
+    "    index = create_index(f\"{base_index_name}-{index}\", dimensions=embedding_dimensions, **options)\n",
+    "    search_index_client.create_or_update_index(index)\n",
+    "\n",
     "print(\"Created indexes\")\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -249,27 +268,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Uploaded embeddings\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "upload_embeddings(scalar_compression_index)\n",
-    "upload_embeddings(binary_compression_index)\n",
-    "upload_embeddings(narrow_index)\n",
-    "upload_embeddings(no_stored_index)\n",
-    "upload_embeddings(all_index_scalar)\n",
-    "upload_embeddings(all_index_binary)\n",
-    "upload_embeddings(baseline_index)\n",
-    "\n",
-    "print(\"Uploaded embeddings\")"
+    "for index in indexes.keys():\n",
+    "    upload_embeddings(f\"{base_index_name}-{index}\")"
    ]
   },
   {
@@ -283,41 +287,49 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "****************************************\n",
-      "Index Name: quantize-scalar-compression\n",
-      "Storage Size: 38.6966MB\n",
+      "Index Name: teststorage-baseline\n",
+      "Storage Size: 22.9593MB\n",
+      "Vector Size: 4.8277MB\n",
+      "****************************************\n",
+      "Index Name: teststorage-scalar-compression\n",
+      "Storage Size: 19.3577MB\n",
       "Vector Size: 1.2242MB\n",
       "****************************************\n",
-      "Index Name: quantize-binary-compression\n",
-      "Storage Size: 36.6071MB\n",
-      "Vector Size: 0.1733MB\n",
+      "Index Name: teststorage-scalar-compresssion-truncation-dimension\n",
+      "Storage Size: 18.5776MB\n",
+      "Vector Size: 0.4323MB\n",
       "****************************************\n",
-      "Index Name: quantize-narrow\n",
-      "Storage Size: 36.3026MB\n",
-      "Vector Size: 2.4254MB\n",
+      "Index Name: teststorage-binary-compression\n",
+      "Storage Size: 18.3063MB\n",
+      "Vector Size: 0.1732MB\n",
       "****************************************\n",
-      "Index Name: quantize-no-stored\n",
-      "Storage Size: 25.0226MB\n",
-      "Vector Size: 4.8277MB\n",
+      "Index Name: teststorage-binary-compression-truncation-dimension\n",
+      "Storage Size: 18.2062MB\n",
+      "Vector Size: 0.0731MB\n",
+      "****************************************\n",
+      "Index Name: teststorage-narrow\n",
+      "Storage Size: 18.1547MB\n",
+      "Vector Size: 2.4254MB\n",
       "****************************************\n",
-      "Index Name: quantize-baseline\n",
-      "Storage Size: 22.9517MB\n",
+      "Index Name: teststorage-no-stored\n",
+      "Storage Size: 12.5178MB\n",
       "Vector Size: 4.8277MB\n",
       "****************************************\n",
-      "Index Name: quantize-all-options-with-scalar\n",
-      "Storage Size: 13.0689MB\n",
-      "Vector Size: 1.2538MB\n",
+      "Index Name: teststorage-all-options-with-scalar\n",
+      "Storage Size: 5.7131MB\n",
+      "Vector Size: 0.4234MB\n",
       "****************************************\n",
-      "Index Name: quantize-all-options-with-binary\n",
-      "Storage Size: 5.4571MB\n",
-      "Vector Size: 0.1732MB\n"
+      "Index Name: teststorage-all-options-with-binary\n",
+      "Storage Size: 5.3624MB\n",
+      "Vector Size: 0.0731MB\n"
      ]
     }
    ],
@@ -333,7 +345,7 @@
     "    response = search_index_client.get_index_statistics(index_name)\n",
     "    return bytes_to_mb(response[\"storage_size\"]), bytes_to_mb(response[\"vector_index_size\"])\n",
     "\n",
-    "index_sizes = [(find_storage_size_mb(index_name), index_name) for index_name in [scalar_compression_index, binary_compression_index, baseline_index, no_stored_index, narrow_index, all_index_scalar, all_index_binary]]\n",
+    "index_sizes = [(find_storage_size_mb(index_name), index_name) for index_name in (f\"{base_index_name}-{index}\" for index in indexes.keys())]\n",
     "index_sizes.sort(key=lambda item: item[0][0], reverse=True)\n",
     "\n",
     "for ((storage_size, vector_size), index_name) in index_sizes:\n",