Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
mattgotteiner committed Sep 26, 2024
1 parent 7a8af7e commit 6c37d36
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 71 deletions.
1 change: 1 addition & 0 deletions demo-python/code/vector-quantization-and-storage/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The Python notebook creates vectorized data on Azure AI Search and demonstrates
- Built-in scalar quantization that reduces vector index size in memory and on disk.
- Disabling storage of vectors returned in query responses. These vectors are stored separately from vectors used for the queries themselves.
- Smaller data types than `Edm.Single`.
- Truncating dimensions of vectors.

The sample data is a JSON file containing a pre-chunked version of the sample documents about a fictious company called Contoso Electronics and their policies. They have been embedded using text-embedding-3-large with 3072 dimensions.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
python-dotenv
azure-search-documents==11.5.1
azure-search-documents==11.6.0b5
openai==1.14.3
azure-identity
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"+ Use smaller \"narrow\" data types instead of `Edm.Single`. Types such as `Edm.Half` reduce storage overhead.\n",
"+ Disable storing vectors used in the query response. Vectors returned in a query response are stored separately from the vectors used during queries.\n",
"+ Quantizing vectors. Use built-in scalar or binary quantization to quantize embeddings to `Edm.Int8` without any reduction in query performance. Information loss from quantization can be compensated for using the original unquantized embeddings and oversampling.\n",
"+ Truncating dimensions. Use built-in truncation dimension option to reduce vector dimensionality with minimal reduction in query performance.\n",
"\n",
"### Prerequisites\n",
"\n",
Expand Down Expand Up @@ -42,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -58,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -87,7 +88,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -109,7 +110,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -133,7 +134,7 @@
" VectorSearchCompression\n",
")\n",
"\n",
"def create_index(index_name, dimensions, use_scalar_compression=False, use_binary_compression=False, use_float16=False, use_stored=True):\n",
"def create_index(index_name, dimensions, use_scalar_compression=False, use_binary_compression=False, use_float16=False, use_stored=True, truncation_dimension=None):\n",
" if use_float16:\n",
" vector_type = \"Collection(Edm.Half)\"\n",
" else:\n",
Expand All @@ -151,12 +152,12 @@
" if use_scalar_compression:\n",
" compression_name = \"myCompression\"\n",
" compression_configurations = [\n",
" ScalarQuantizationCompression(compression_name=compression_name)\n",
" ScalarQuantizationCompression(compression_name=compression_name, truncation_dimension=truncation_dimension)\n",
" ]\n",
" elif use_binary_compression:\n",
" compression_name = \"myCompression\"\n",
" compression_configurations = [\n",
" BinaryQuantizationCompression(compression_name=compression_name)\n",
" BinaryQuantizationCompression(compression_name=compression_name, truncation_dimension=truncation_dimension)\n",
" ]\n",
" else:\n",
" compression_name = None\n",
Expand Down Expand Up @@ -186,7 +187,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -201,35 +202,53 @@
"# Create indexes to compare storage usage\n",
"# The baseline index does not use any options\n",
"\n",
"baseline_index = f\"{base_index_name}-baseline\"\n",
"scalar_compression_index = f\"{base_index_name}-scalar-compression\"\n",
"binary_compression_index = f\"{base_index_name}-binary-compression\"\n",
"narrow_index = f\"{base_index_name}-narrow\"\n",
"no_stored_index = f\"{base_index_name}-no-stored\"\n",
"all_index_scalar = f\"{base_index_name}-all-options-with-scalar\"\n",
"all_index_binary = f\"{base_index_name}-all-options-with-binary\"\n",
"indexes = {\n",
" \"baseline\": {},\n",
" \"scalar-compression\": {\n",
" \"use_scalar_compression\": True\n",
" },\n",
" \"binary-compression\": {\n",
" \"use_binary_compression\": True\n",
" },\n",
" \"narrow\": {\n",
" \"use_float16\": True\n",
" },\n",
" \"no-stored\": {\n",
" \"use_stored\": False\n",
" },\n",
" \"scalar-compresssion-truncation-dimension\": {\n",
" \"use_scalar_compression\": True,\n",
" \"truncation_dimension\": 1024\n",
" },\n",
" \"binary-compression-truncation-dimension\": {\n",
" \"use_binary_compression\": True,\n",
" \"truncation_dimension\": 1024\n",
" },\n",
" \"all-options-with-scalar\": {\n",
" \"use_scalar_compression\": True,\n",
" \"use_float16\": True,\n",
" \"use_stored\": False,\n",
" \"truncation_dimension\": 1024\n",
" },\n",
" \"all-options-with-binary\": {\n",
" \"use_binary_compression\": True,\n",
" \"use_float16\": True,\n",
" \"use_stored\": False,\n",
" \"truncation_dimension\": 1024\n",
" }\n",
"}\n",
"\n",
"search_index_client = SearchIndexClient(endpoint, credential)\n",
"search_index_client.create_or_update_index(\n",
" create_index(scalar_compression_index, embedding_dimensions, use_scalar_compression=True))\n",
"search_index_client.create_or_update_index(\n",
" create_index(binary_compression_index, embedding_dimensions, use_binary_compression=True))\n",
"search_index_client.create_or_update_index(\n",
" create_index(narrow_index, embedding_dimensions, use_float16=True))\n",
"search_index_client.create_or_update_index(\n",
" create_index(no_stored_index, embedding_dimensions, use_stored=False))\n",
"search_index_client.create_or_update_index(\n",
" create_index(all_index_scalar, embedding_dimensions, use_scalar_compression=True, use_float16=True, use_stored=False))\n",
"search_index_client.create_or_update_index(\n",
" create_index(all_index_binary, embedding_dimensions, use_binary_compression=True, use_float16=True, use_stored=False))\n",
"search_index_client.create_or_update_index(\n",
" create_index(baseline_index, embedding_dimensions))\n",
"for index, options in indexes.items():\n",
" index = create_index(f\"{base_index_name}-{index}\", dimensions=embedding_dimensions, **options)\n",
" search_index_client.create_or_update_index(index)\n",
"\n",
"print(\"Created indexes\")\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -249,27 +268,12 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Uploaded embeddings\n"
]
}
],
"outputs": [],
"source": [
"upload_embeddings(scalar_compression_index)\n",
"upload_embeddings(binary_compression_index)\n",
"upload_embeddings(narrow_index)\n",
"upload_embeddings(no_stored_index)\n",
"upload_embeddings(all_index_scalar)\n",
"upload_embeddings(all_index_binary)\n",
"upload_embeddings(baseline_index)\n",
"\n",
"print(\"Uploaded embeddings\")"
"for index in indexes.keys():\n",
" upload_embeddings(f\"{base_index_name}-{index}\")"
]
},
{
Expand All @@ -283,41 +287,49 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"****************************************\n",
"Index Name: quantize-scalar-compression\n",
"Storage Size: 38.6966MB\n",
"Index Name: teststorage-baseline\n",
"Storage Size: 22.9593MB\n",
"Vector Size: 4.8277MB\n",
"****************************************\n",
"Index Name: teststorage-scalar-compression\n",
"Storage Size: 19.3577MB\n",
"Vector Size: 1.2242MB\n",
"****************************************\n",
"Index Name: quantize-binary-compression\n",
"Storage Size: 36.6071MB\n",
"Vector Size: 0.1733MB\n",
"Index Name: teststorage-scalar-compresssion-truncation-dimension\n",
"Storage Size: 18.5776MB\n",
"Vector Size: 0.4323MB\n",
"****************************************\n",
"Index Name: quantize-narrow\n",
"Storage Size: 36.3026MB\n",
"Vector Size: 2.4254MB\n",
"Index Name: teststorage-binary-compression\n",
"Storage Size: 18.3063MB\n",
"Vector Size: 0.1732MB\n",
"****************************************\n",
"Index Name: quantize-no-stored\n",
"Storage Size: 25.0226MB\n",
"Vector Size: 4.8277MB\n",
"Index Name: teststorage-binary-compression-truncation-dimension\n",
"Storage Size: 18.2062MB\n",
"Vector Size: 0.0731MB\n",
"****************************************\n",
"Index Name: teststorage-narrow\n",
"Storage Size: 18.1547MB\n",
"Vector Size: 2.4254MB\n",
"****************************************\n",
"Index Name: quantize-baseline\n",
"Storage Size: 22.9517MB\n",
"Index Name: teststorage-no-stored\n",
"Storage Size: 12.5178MB\n",
"Vector Size: 4.8277MB\n",
"****************************************\n",
"Index Name: quantize-all-options-with-scalar\n",
"Storage Size: 13.0689MB\n",
"Vector Size: 1.2538MB\n",
"Index Name: teststorage-all-options-with-scalar\n",
"Storage Size: 5.7131MB\n",
"Vector Size: 0.4234MB\n",
"****************************************\n",
"Index Name: quantize-all-options-with-binary\n",
"Storage Size: 5.4571MB\n",
"Vector Size: 0.1732MB\n"
"Index Name: teststorage-all-options-with-binary\n",
"Storage Size: 5.3624MB\n",
"Vector Size: 0.0731MB\n"
]
}
],
Expand All @@ -333,7 +345,7 @@
" response = search_index_client.get_index_statistics(index_name)\n",
" return bytes_to_mb(response[\"storage_size\"]), bytes_to_mb(response[\"vector_index_size\"])\n",
"\n",
"index_sizes = [(find_storage_size_mb(index_name), index_name) for index_name in [scalar_compression_index, binary_compression_index, baseline_index, no_stored_index, narrow_index, all_index_scalar, all_index_binary]]\n",
"index_sizes = [(find_storage_size_mb(index_name), index_name) for index_name in (f\"{base_index_name}-{index}\" for index in indexes.keys())]\n",
"index_sizes.sort(key=lambda item: item[0][0], reverse=True)\n",
"\n",
"for ((storage_size, vector_size), index_name) in index_sizes:\n",
Expand Down

0 comments on commit 6c37d36

Please sign in to comment.