Skip to content

Commit

Permalink
Created using Colab
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasonjo committed Oct 26, 2024
1 parent 7057945 commit 08c81cf
Showing 1 changed file with 296 additions and 0 deletions.
296 changes: 296 additions & 0 deletions llm/llm_graph_transformer_in_depth.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyO7tx7538YWWviLYeotHCRL",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/tomasonjo/blogs/blob/master/llm/llm_graph_transformer_in_depth.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "zK-Jv5o5qPgm",
"outputId": "1f87312b-01de-4c79-a03c-2b681f8cb659"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.6/50.6 kB\u001b[0m \u001b[31m494.8 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m296.6/296.6 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m36.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m208.1/208.1 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.9/49.9 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m408.0/408.0 kB\u001b[0m \u001b[31m19.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m296.9/296.9 kB\u001b[0m \u001b[31m19.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m386.9/386.9 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m31.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.0/78.0 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m325.2/325.2 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.5/49.5 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m144.5/144.5 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.5/54.5 kB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h"
]
}
],
"source": [
"!pip install --quiet neo4j langchain-community langchain-experimental langchain-openai json-repair"
]
},
{
"cell_type": "code",
"source": [
"from langchain_community.graphs import Neo4jGraph\n",
"\n",
"graph = Neo4jGraph(\n",
" url=\"bolt://52.201.218.219:7687\",\n",
" username=\"neo4j\",\n",
" password=\"contact-fifths-chaplain\",\n",
" refresh_schema=False\n",
")\n",
"\n",
"def clean_graph():\n",
" query = \"\"\"\n",
" MATCH (n)\n",
" DETACH DELETE n\n",
" \"\"\"\n",
" graph.query(query)"
],
"metadata": {
"id": "tbUnI_F-s5rP"
},
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from langchain_core.documents import Document\n",
"\n",
"text = \"\"\"\n",
"Marie Curie, 7 November 1867 – 4 July 1934, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\n",
"She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\n",
"Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\n",
"She was, in 1906, the first woman to become a professor at the University of Paris.\n",
"Also, Robin Williams!\n",
"\"\"\"\n",
"documents = [Document(page_content=text)]"
],
"metadata": {
"id": "ASyrLxerqUQc"
},
"execution_count": 4,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from langchain_openai import ChatOpenAI\n",
"import getpass\n",
"import os\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI api key\")\n",
"\n",
"llm = ChatOpenAI(model='gpt-4o')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "g0hE3kk1q1t4",
"outputId": "068bd039-663e-46c9-cc60-c2ec64f5c03b"
},
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI api key··········\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from langchain_experimental.graph_transformers import LLMGraphTransformer\n",
"\n",
"no_schema = LLMGraphTransformer(llm=llm)"
],
"metadata": {
"id": "6980jrt6rcA-"
},
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data = await no_schema.aconvert_to_graph_documents(documents)\n",
"print(data)\n",
"graph.add_graph_documents(data)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "CCsfJvlfrshH",
"outputId": "a4cc072c-cd39-4c7b-f628-1ba615cddc09"
},
"execution_count": 33,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[GraphDocument(nodes=[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='Nobel Prize', type='Award', properties={}), Node(id='University Of Paris', type='Organization', properties={}), Node(id='Robin Williams', type='Person', properties={})], relationships=[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WON', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WON', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='PROFESSOR', properties={}), Relationship(source=Node(id='Pierre Curie', type='Person', properties={}), target=Node(id='Nobel Prize', type='Award', properties={}), type='WON', properties={})], source=Document(metadata={'id': 'de3c93515e135ac0e47ca82a4f9b82d8'}, page_content='\\nMarie Curie, 7 November 1867 – 4 July 1934, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\\nShe was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\\nHer husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\\nShe was, in 1906, the first woman to become a professor at the University of Paris.\\nAlso, Robin Williams!\\n'))]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"clean_graph()"
],
"metadata": {
"id": "qcZpEwBmtnqf"
},
"execution_count": 31,
"outputs": []
},
{
"cell_type": "code",
"source": [
"no_schema_prompt = LLMGraphTransformer(llm=llm, ignore_tool_usage=True)\n",
"data = await no_schema_prompt.aconvert_to_graph_documents(documents)\n",
"graph.add_graph_documents(data)"
],
"metadata": {
"id": "pSDKSDpmvYgt"
},
"execution_count": 29,
"outputs": []
},
{
"cell_type": "code",
"source": [
"clean_graph()"
],
"metadata": {
"id": "xyfW25s7xNPe"
},
"execution_count": 36,
"outputs": []
},
{
"cell_type": "code",
"source": [
"allowed_nodes = [\"Person\", \"Organization\", \"Location\", \"Award\", \"ResearchField\"]\n",
"nodes_defined = LLMGraphTransformer(llm=llm, allowed_nodes=allowed_nodes)\n",
"data = await nodes_defined.aconvert_to_graph_documents(documents)\n",
"graph.add_graph_documents(data)"
],
"metadata": {
"id": "qyBPVNenUuXy"
},
"execution_count": 37,
"outputs": []
},
{
"cell_type": "code",
"source": [
"clean_graph()"
],
"metadata": {
"id": "nAx68NCWVAcQ"
},
"execution_count": 38,
"outputs": []
},
{
"cell_type": "code",
"source": [
"allowed_nodes = [\"Person\", \"Organization\", \"Place\", \"Award\", \"ResearchField\"]\n",
"allowed_relationships = [\"SPOUSE\", \"AWARD\", \"FIELD_OF_RESEARCH\", \"WORKS_AT\", \"IN_LOCATION\"]\n",
"rels_defined = LLMGraphTransformer(\n",
" llm=llm,\n",
" allowed_nodes=allowed_nodes,\n",
" allowed_relationships=allowed_relationships\n",
")\n",
"data = await rels_defined.aconvert_to_graph_documents(documents)\n",
"graph.add_graph_documents(data)"
],
"metadata": {
"id": "s6EPH9bHJWuh"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [
"clean_graph()"
],
"metadata": {
"id": "VBJKhP__hz3X"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"allowed_nodes = [\"Person\", \"Organization\", \"Location\", \"Award\", \"ResearchField\"]\n",
"allowed_relationships = [\n",
" (\"Person\", \"SPOUSE\", \"Person\"),\n",
" (\"Person\", \"AWARD\", \"Award\"),\n",
" (\"Person\", \"WORKS_AT\", \"Organization\"),\n",
" (\"Organization\", \"IN_LOCATION\", \"Location\"),\n",
" (\"Person\", \"FIELD_OF_RESEARCH\", \"ResearchField\")\n",
"]\n",
"rels_defined = LLMGraphTransformer(\n",
" llm=llm,\n",
" allowed_nodes=allowed_nodes,\n",
" allowed_relationships=allowed_relationships\n",
")\n",
"data = await rels_defined.aconvert_to_graph_documents(documents)"
],
"metadata": {
"id": "Up0RRInFJadP"
},
"execution_count": null,
"outputs": []
}
]
}

0 comments on commit 08c81cf

Please sign in to comment.