forked from tomasonjo/blogs
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
296 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,296 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"provenance": [], | ||
"authorship_tag": "ABX9TyO7tx7538YWWviLYeotHCRL", | ||
"include_colab_link": true | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "view-in-github", | ||
"colab_type": "text" | ||
}, | ||
"source": [ | ||
"<a href=\"https://colab.research.google.com/github/tomasonjo/blogs/blob/master/llm/llm_graph_transformer_in_depth.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "zK-Jv5o5qPgm", | ||
"outputId": "1f87312b-01de-4c79-a03c-2b681f8cb659" | ||
}, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.6/50.6 kB\u001b[0m \u001b[31m494.8 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m296.6/296.6 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m36.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m208.1/208.1 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.9/49.9 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m408.0/408.0 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m296.9/296.9 kB\u001b[0m \u001b[31m19.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m386.9/386.9 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m31.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.0/78.0 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m325.2/325.2 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.5/49.5 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m144.5/144.5 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.5/54.5 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | ||
"\u001b[?25h" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"!pip install --quiet neo4j langchain-community langchain-experimental langchain-openai json-repair" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from langchain_community.graphs import Neo4jGraph\n", | ||
"\n", | ||
"graph = Neo4jGraph(\n", | ||
" url=\"bolt://52.201.218.219:7687\",\n", | ||
" username=\"neo4j\",\n", | ||
" password=\"contact-fifths-chaplain\",\n", | ||
" refresh_schema=False\n", | ||
")\n", | ||
"\n", | ||
"def clean_graph():\n", | ||
" query = \"\"\"\n", | ||
" MATCH (n)\n", | ||
" DETACH DELETE n\n", | ||
" \"\"\"\n", | ||
" graph.query(query)" | ||
], | ||
"metadata": { | ||
"id": "tbUnI_F-s5rP" | ||
}, | ||
"execution_count": 3, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from langchain_core.documents import Document\n", | ||
"\n", | ||
"text = \"\"\"\n", | ||
"Marie Curie, 7 November 1867 – 4 July 1934, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\n", | ||
"She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\n", | ||
"Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\n", | ||
"She was, in 1906, the first woman to become a professor at the University of Paris.\n", | ||
"Also, Robin Williams!\n", | ||
"\"\"\"\n", | ||
"documents = [Document(page_content=text)]" | ||
], | ||
"metadata": { | ||
"id": "ASyrLxerqUQc" | ||
}, | ||
"execution_count": 4, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from langchain_openai import ChatOpenAI\n", | ||
"import getpass\n", | ||
"import os\n", | ||
"\n", | ||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI api key\")\n", | ||
"\n", | ||
"llm = ChatOpenAI(model='gpt-4o')" | ||
], | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "g0hE3kk1q1t4", | ||
"outputId": "068bd039-663e-46c9-cc60-c2ec64f5c03b" | ||
}, | ||
"execution_count": 5, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"OpenAI api key··········\n" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from langchain_experimental.graph_transformers import LLMGraphTransformer\n", | ||
"\n", | ||
"no_schema = LLMGraphTransformer(llm=llm)" | ||
], | ||
"metadata": { | ||
"id": "6980jrt6rcA-" | ||
}, | ||
"execution_count": 6, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"data = await no_schema.aconvert_to_graph_documents(documents)\n", | ||
"print(data)\n", | ||
"graph.add_graph_documents(data)" | ||
], | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "CCsfJvlfrshH", | ||
"outputId": "a4cc072c-cd39-4c7b-f628-1ba615cddc09" | ||
}, | ||
"execution_count": 33, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"[GraphDocument(nodes=[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='Nobel Prize', type='Award', properties={}), Node(id='University Of Paris', type='Organization', properties={}), Node(id='Robin Williams', type='Person', properties={})], relationships=[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WON', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WON', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='PROFESSOR', properties={}), Relationship(source=Node(id='Pierre Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WON', properties={})], source=Document(metadata={'id': 'de3c93515e135ac0e47ca82a4f9b82d8'}, page_content='\\nMarie Curie, 7 November 1867 – 4 July 1934, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\\nShe was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\\nHer husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\\nShe was, in 1906, the first woman to become a professor at the University of Paris.\\nAlso, Robin Williams!\\n'))]\n" | ||
] | ||
} | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"clean_graph()" | ||
], | ||
"metadata": { | ||
"id": "qcZpEwBmtnqf" | ||
}, | ||
"execution_count": 31, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"no_schema_prompt = LLMGraphTransformer(llm=llm, ignore_tool_usage=True)\n", | ||
"data = await no_schema_prompt.aconvert_to_graph_documents(documents)\n", | ||
"graph.add_graph_documents(data)" | ||
], | ||
"metadata": { | ||
"id": "pSDKSDpmvYgt" | ||
}, | ||
"execution_count": 29, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"clean_graph()" | ||
], | ||
"metadata": { | ||
"id": "xyfW25s7xNPe" | ||
}, | ||
"execution_count": 36, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"allowed_nodes = [\"Person\", \"Organization\", \"Location\", \"Award\", \"ResearchField\"]\n", | ||
"nodes_defined = LLMGraphTransformer(llm=llm, allowed_nodes=allowed_nodes)\n", | ||
"data = await nodes_defined.aconvert_to_graph_documents(documents)\n", | ||
"graph.add_graph_documents(data)" | ||
], | ||
"metadata": { | ||
"id": "qyBPVNenUuXy" | ||
}, | ||
"execution_count": 37, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"clean_graph()" | ||
], | ||
"metadata": { | ||
"id": "nAx68NCWVAcQ" | ||
}, | ||
"execution_count": 38, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"allowed_nodes = [\"Person\", \"Organization\", \"Place\", \"Award\", \"ResearchField\"]\n", | ||
"allowed_relationships = [\"SPOUSE\", \"AWARD\", \"FIELD_OF_RESEARCH\", \"WORKS_AT\", \"IN_LOCATION\"]\n", | ||
"rels_defined = LLMGraphTransformer(\n", | ||
" llm=llm,\n", | ||
" allowed_nodes=allowed_nodes,\n", | ||
" allowed_relationships=allowed_relationships\n", | ||
")\n", | ||
"data = await rels_defined.aconvert_to_graph_documents(documents)\n", | ||
"graph.add_graph_documents(data)" | ||
], | ||
"metadata": { | ||
"id": "s6EPH9bHJWuh" | ||
}, | ||
"execution_count": 7, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"clean_graph()" | ||
], | ||
"metadata": { | ||
"id": "VBJKhP__hz3X" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"allowed_nodes = [\"Person\", \"Organization\", \"Location\", \"Award\", \"ResearchField\"]\n", | ||
"allowed_relationships = [\n", | ||
" (\"Person\", \"SPOUSE\", \"Person\"),\n", | ||
" (\"Person\", \"AWARD\", \"Award\"),\n", | ||
" (\"Person\", \"WORKS_AT\", \"Organization\"),\n", | ||
" (\"Organization\", \"IN_LOCATION\", \"Location\"),\n", | ||
" (\"Person\", \"FIELD_OF_RESEARCH\", \"ResearchField\")\n", | ||
"]\n", | ||
"rels_defined = LLMGraphTransformer(\n", | ||
" llm=llm,\n", | ||
" allowed_nodes=allowed_nodes,\n", | ||
" allowed_relationships=allowed_relationships\n", | ||
")\n", | ||
"data = await rels_defined.aconvert_to_graph_documents(documents)" | ||
], | ||
"metadata": { | ||
"id": "Up0RRInFJadP" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
} | ||
] | ||
} |