forked from ww-jermaine/Knowlege-Graphs-for-RAG
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
33389b9
commit 3386eeb
Showing
8 changed files
with
4,986 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,360 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "b8e2a3fb-cc4d-4f8d-8f78-97415e849416", | ||
"metadata": {}, | ||
"source": [ | ||
"# Lesson 3: Preparing Text Data for RAG" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "fafb0f88-0b26-4cdf-9b55-7ce191652155", | ||
"metadata": {}, | ||
"source": [ | ||
"<p style=\"background-color:#fd4a6180; padding:15px; margin-left:20px\"> <b>Note:</b> This notebook takes about 30 seconds to be ready to use. Please wait until the \"Kernel starting, please wait...\" message clears from the top of the notebook before running any cells. You may start the video while you wait.</p>\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "9fb43372-7cff-4dc6-8c47-2885aac4c09a", | ||
"metadata": {}, | ||
"source": [ | ||
"### Import packages and set up Neo4j" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e854e1a7-6c56-48e9-b3f0-999b68940726", | ||
"metadata": { | ||
"height": 149 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from dotenv import load_dotenv\n", | ||
"import os\n", | ||
"\n", | ||
"from langchain_community.graphs import Neo4jGraph\n", | ||
"\n", | ||
"# Warning control\n", | ||
"import warnings\n", | ||
"warnings.filterwarnings(\"ignore\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "45e2bd85-1d15-4e91-9e6d-c1b647d8e97d", | ||
"metadata": { | ||
"height": 217 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load from environment\n", | ||
"load_dotenv('.env', override=True)\n", | ||
"NEO4J_URI = os.getenv('NEO4J_URI')\n", | ||
"NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')\n", | ||
"NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')\n", | ||
"NEO4J_DATABASE = os.getenv('NEO4J_DATABASE')\n", | ||
"OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n", | ||
"\n", | ||
"# Note the code below is unique to this course environment, and not a \n", | ||
"# standard part of Neo4j's integration with OpenAI. Remove if running \n", | ||
"# in your own environment.\n", | ||
"OPENAI_ENDPOINT = os.getenv('OPENAI_BASE_URL') + '/embeddings'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "0c277dd6-23b9-42d0-9d2a-c6aa9948dad9", | ||
"metadata": { | ||
"height": 81 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Connect to the knowledge graph instance using LangChain\n", | ||
"kg = Neo4jGraph(\n", | ||
" url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "b4478433-1ab7-4ad6-ba51-73d54e45d798", | ||
"metadata": {}, | ||
"source": [ | ||
"### Create a vector index " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "a26f80f6-d21a-447f-a5e2-eb940c0a25e4", | ||
"metadata": { | ||
"height": 166 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"kg.query(\"\"\"\n", | ||
" CREATE VECTOR INDEX movie_tagline_embeddings IF NOT EXISTS\n", | ||
" FOR (m:Movie) ON (m.taglineEmbedding) \n", | ||
" OPTIONS { indexConfig: {\n", | ||
" `vector.dimensions`: 1536,\n", | ||
" `vector.similarity_function`: 'cosine'\n", | ||
" }}\"\"\"\n", | ||
")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "9472ede6-18da-4d13-a5de-0dbad618c0f3", | ||
"metadata": { | ||
"height": 81 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"kg.query(\"\"\"\n", | ||
" SHOW VECTOR INDEXES\n", | ||
" \"\"\"\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "569049b5-6ad7-4348-81f8-068bde0f185c", | ||
"metadata": { | ||
"height": 30 | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "ebe87478-723a-49e4-a197-b4cccd7153b0", | ||
"metadata": {}, | ||
"source": [ | ||
"### Populate the vector index\n", | ||
"- Calculate vector representation for each movie tagline using OpenAI\n", | ||
"- Add vector to the `Movie` node as `taglineEmbedding` property" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "5921f75b-706d-414a-b7a0-b81623a71a23", | ||
"metadata": { | ||
"height": 217 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"kg.query(\"\"\"\n", | ||
" MATCH (movie:Movie) WHERE movie.tagline IS NOT NULL\n", | ||
" WITH movie, genai.vector.encode(\n", | ||
" movie.tagline, \n", | ||
" \"OpenAI\", \n", | ||
" {\n", | ||
" token: $openAiApiKey,\n", | ||
" endpoint: $openAiEndpoint\n", | ||
" }) AS vector\n", | ||
" CALL db.create.setNodeVectorProperty(movie, \"taglineEmbedding\", vector)\n", | ||
" \"\"\", \n", | ||
" params={\"openAiApiKey\":OPENAI_API_KEY, \"openAiEndpoint\": OPENAI_ENDPOINT} )" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "73efe0c6-d3e9-4815-9154-04f177101f17", | ||
"metadata": { | ||
"height": 132 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"result = kg.query(\"\"\"\n", | ||
" MATCH (m:Movie) \n", | ||
" WHERE m.tagline IS NOT NULL\n", | ||
" RETURN m.tagline, m.taglineEmbedding\n", | ||
" LIMIT 1\n", | ||
" \"\"\"\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "71f4fe0b-01da-493f-862b-36a60842d648", | ||
"metadata": { | ||
"height": 30 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"result[0]['m.tagline']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "93c3fe17-7310-4e11-84c3-4955c8ed8f1a", | ||
"metadata": { | ||
"height": 30 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"result[0]['m.taglineEmbedding'][:10]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "221af96b-63ce-4a95-8d23-47f9aa8c3a4e", | ||
"metadata": { | ||
"height": 30 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"len(result[0]['m.taglineEmbedding'])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "03365be1-4aaa-4440-bbc5-0df650fd9013", | ||
"metadata": { | ||
"height": 30 | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "790b238d-9189-4111-962b-1bf3dfcdad05", | ||
"metadata": {}, | ||
"source": [ | ||
"### Similarity search\n", | ||
"- Calculate embedding for question\n", | ||
"- Identify matching movies based on similarity of question and `taglineEmbedding` vectors" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "5648d30a-ab89-4fa1-ad61-35bbb43ef102", | ||
"metadata": { | ||
"height": 30 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"question = \"What movies are about love?\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6b1bc582-fd8e-4a2f-8347-fb62cbfec1c2", | ||
"metadata": { | ||
"height": 353 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"kg.query(\"\"\"\n", | ||
" WITH genai.vector.encode(\n", | ||
" $question, \n", | ||
" \"OpenAI\", \n", | ||
" {\n", | ||
" token: $openAiApiKey,\n", | ||
" endpoint: $openAiEndpoint\n", | ||
" }) AS question_embedding\n", | ||
" CALL db.index.vector.queryNodes(\n", | ||
" 'movie_tagline_embeddings', \n", | ||
" $top_k, \n", | ||
" question_embedding\n", | ||
" ) YIELD node AS movie, score\n", | ||
" RETURN movie.title, movie.tagline, score\n", | ||
" \"\"\", \n", | ||
" params={\"openAiApiKey\":OPENAI_API_KEY,\n", | ||
" \"openAiEndpoint\": OPENAI_ENDPOINT,\n", | ||
" \"question\": question,\n", | ||
" \"top_k\": 5\n", | ||
" })" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "0d3c6eaa-aaea-4a03-b9f8-3c0ead98e83b", | ||
"metadata": {}, | ||
"source": [ | ||
"### Try for yourself: ask you own question!\n", | ||
"- Change the question below and run the graph query to find different movies" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "21b08047-0086-4291-8d62-fb646de330fa", | ||
"metadata": { | ||
"height": 30 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"question = \"What movies are about adventure?\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7cdc7751-dddf-46b8-a74b-3d7c5544748f", | ||
"metadata": { | ||
"height": 285 | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"kg.query(\"\"\"\n", | ||
" WITH genai.vector.encode(\n", | ||
" $question, \n", | ||
" \"OpenAI\", \n", | ||
" {\n", | ||
" token: $openAiApiKey,\n", | ||
" endpoint: $openAiEndpoint\n", | ||
" }) AS question_embedding\n", | ||
" CALL db.index.vector.queryNodes(\n", | ||
" 'movie_tagline_embeddings', \n", | ||
" $top_k, \n", | ||
" question_embedding\n", | ||
" ) YIELD node AS movie, score\n", | ||
" RETURN movie.title, movie.tagline, score\n", | ||
" \"\"\", \n", | ||
" params={\"openAiApiKey\":OPENAI_API_KEY,\n", | ||
" \"openAiEndpoint\": OPENAI_ENDPOINT,\n", | ||
" \"question\": question,\n", | ||
" \"top_k\": 5\n", | ||
" })" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.