final ans

VomV · Aug 8, 2024 · 790c112 · 790c112
1 parent deb7d7d
commit 790c112
Showing 1 changed file with 113 additions and 24 deletions.
diff --git a/rag_basic.ipynb b/rag_basic.ipynb
@@ -110431,7 +110431,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -110456,7 +110456,8 @@
     "\n",
     "        print(\"Reranking Docs...\")\n",
     "        relevant_docs = reranker.rerank(question, relevant_docs, k=num_final_docs)\n",
-    "        relevant_docs = [doc[\"page_content\"] for doc in relevant_docs]\n",
+    "\n",
+    "        relevant_docs = [doc[\"content\"] for doc in relevant_docs]\n",
     "\n",
     "    relevant_docs = relevant_docs[:num_final_docs]\n",
     "\n",
@@ -110475,7 +110476,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
@@ -110489,10 +110490,10 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Python(51678) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
-      "Python(51683) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
-      "Python(51685) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
-      "Python(51688) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n"
+      "Python(72818) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
+      "Python(72819) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
+      "Python(72822) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
+      "Python(72823) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n"
      ]
     },
     {
@@ -110506,27 +110507,115 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/vivekr/Documents/vivek/projects/rag_lab/.viv-rag/lib/python3.12/site-packages/colbert/utils/amp.py:15: FutureWarning:\n",
+      "100%|██████████| 1/1 [00:05<00:00,  5.37s/it]\n",
+      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generating Ans...\n",
+      "___========Answer=========____\n",
+      "import re\n",
       "\n",
-      "`torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n",
+      "def get_text():\n",
+      "    text = input()\n",
+      "    return text\n",
       "\n",
-      "/Users/vivekr/Documents/vivek/projects/rag_lab/.viv-rag/lib/python3.12/site-packages/torch/amp/autocast_mode.py:265: UserWarning:\n",
+      "def get_regex(text):\n",
+      "    regex = re.compile(r'(\\w+)\\b\\w+\\b')\n",
+      "    return regex.findall(text)\n",
       "\n",
-      "User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
+      "def get_pattern(text):\n",
+      "    pattern = re.compile(r'\\d{3}-\\d{3}-\\d{4}')\n",
+      "    return pattern.findall(text)\n",
       "\n",
-      "100%|██████████| 1/1 [00:08<00:00,  8.31s/it]\n"
-     ]
-    },
-    {
-     "ename": "AttributeError",
-     "evalue": "'dict' object has no attribute 'page_content'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[31], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m question \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhow to create a pipeline object?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m ans, relevant_docs \u001b[38;5;241m=\u001b[39m \u001b[43manswer_with_rag\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquestion\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mREADER_LLM\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mKB_VDB\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreranker\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mRERANKER\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m___========Answer=========____\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28mprint\u001b[39m(ans)\n",
-      "Cell \u001b[0;32mIn[30], line 22\u001b[0m, in \u001b[0;36manswer_with_rag\u001b[0;34m(question, llm, knowledge_index, reranker, num_retrieved_docs, num_final_docs)\u001b[0m\n\u001b[1;32m     20\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReranking Docs...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     21\u001b[0m     relevant_docs \u001b[38;5;241m=\u001b[39m reranker\u001b[38;5;241m.\u001b[39mrerank(question, relevant_docs, k\u001b[38;5;241m=\u001b[39mnum_final_docs)\n\u001b[0;32m---> 22\u001b[0m     relevant_docs \u001b[38;5;241m=\u001b[39m [\u001b[43mdoc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpage_content\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m relevant_docs]\n\u001b[1;32m     24\u001b[0m relevant_docs \u001b[38;5;241m=\u001b[39m relevant_docs[:num_final_docs]\n\u001b[1;32m     26\u001b[0m \u001b[38;5;66;03m#Final prompt\u001b[39;00m\n",
-      "\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'page_content'"
+      "if __name__ == '__main__':\n",
+      "    text = get_text()\n",
+      "    print(get_pattern(text))\n",
+      "\n",
+      "___============Relevant Docs===========____\n",
+      "# Allocate a pipeline for object detection\n",
+      ">>> object_detector = pipeline('object-detection')\n",
+      ">>> object_detector(image)\n",
+      "[{'score': 0.9982201457023621,\n",
+      "  'label': 'remote',\n",
+      "  'box': {'xmin': 40, 'ymin': 70, 'xmax': 175, 'ymax': 117}},\n",
+      " {'score': 0.9960021376609802,\n",
+      "  'label': 'remote',\n",
+      "  'box': {'xmin': 333, 'ymin': 72, 'xmax': 368, 'ymax': 187}},\n",
+      " {'score': 0.9954745173454285,\n",
+      "  'label': 'couch',\n",
+      "  'box': {'xmin': 0, 'ymin': 1, 'xmax': 639, 'ymax': 473}},\n",
+      " {'score': 0.9988006353378296,\n",
+      "  'label': 'cat',\n",
+      "  'box': {'xmin': 13, 'ymin': 52, 'xmax': 314, 'ymax': 470}},\n",
+      " {'score': 0.9986783862113953,\n",
+      "  'label': 'cat',\n",
+      "  'box': {'xmin': 345, 'ymin': 23, 'xmax': 640, 'ymax': 368}}]\n",
+      "# Allocate a pipeline for object detection\n",
+      ">>> object_detector = pipeline('object_detection')\n",
+      ">>> object_detector(image)\n",
+      "[{'score': 0.9982201457023621,\n",
+      "  'label': 'remote',\n",
+      "  'box': {'xmin': 40, 'ymin': 70, 'xmax': 175, 'ymax': 117}},\n",
+      " {'score': 0.9960021376609802,\n",
+      "  'label': 'remote',\n",
+      "  'box': {'xmin': 333, 'ymin': 72, 'xmax': 368, 'ymax': 187}},\n",
+      " {'score': 0.9954745173454285,\n",
+      "  'label': 'couch',\n",
+      "  'box': {'xmin': 0, 'ymin': 1, 'xmax': 639, 'ymax': 473}},\n",
+      " {'score': 0.9988006353378296,\n",
+      "  'label': 'cat',\n",
+      "  'box': {'xmin': 13, 'ymin': 52, 'xmax': 314, 'ymax': 470}},\n",
+      " {'score': 0.9986783862113953,\n",
+      "  'label': 'cat',\n",
+      "  'box': {'xmin': 345, 'ymin': 23, 'xmax': 640, 'ymax': 368}}]\n",
+      "Start by creating an instance of [`pipeline`] and specifying a task you want to use it for. In this guide, you'll use the [`pipeline`] for sentiment analysis as an example:\n",
+      "\n",
+      "```py\n",
+      ">>> from transformers import pipeline\n",
+      "\n",
+      ">>> classifier = pipeline(\"sentiment-analysis\")\n",
+      "```\n",
+      "\n",
+      "2. Pass a prompt to the pipeline to generate an image:\n",
+      "\n",
+      "```py\n",
+      "image = pipeline(\n",
+      "\t\"stained glass of darth vader, backlight, centered composition, masterpiece, photorealistic, 8k\"\n",
+      ").images[0]\n",
+      "image\n",
+      "```\n",
+      "\n",
+      "## Add the pipeline to 🤗 Transformers\n",
+      "\n",
+      "If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the `pipelines` submodule\n",
+      "with the code of your pipeline, then add it to the list of tasks defined in `pipelines/__init__.py`.\n",
+      "\n",
+      "Then you will need to add tests. Create a new file `tests/test_pipelines_MY_PIPELINE.py` with examples of the other tests.\n",
+      "\n",
+      "The `run_pipeline_test` function will be very generic and run on small random models on every possible\n",
+      "architecture as defined by `model_mapping` and `tf_model_mapping`.\n",
+      "\n",
+      "This is very important to test future compatibility, meaning if someone adds a new model for\n",
+      "`XXXForQuestionAnswering` then the pipeline test will attempt to run on it. Because the models are random it's\n",
+      "impossible to check for actual values, that's why there is a helper `ANY` that will simply attempt to match the\n",
+      "output of the pipeline TYPE.\n",
+      "\n",
+      "You also *need* to implement 2 (ideally 4) tests.\n",
+      "\n",
+      "- `test_small_model_pt` : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)\n",
+      "  and test the pipeline outputs. The results should be the same as `test_small_model_tf`.\n",
+      "- `test_small_model_tf` : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)\n",
+      "  and test the pipeline outputs. The results should be the same as `test_small_model_pt`.\n",
+      "- `test_large_model_pt` (`optional`): Tests the pipeline on a real pipeline where the results are supposed to\n",
+      "  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make\n",
+      "  sure there is no drift in future releases.\n",
+      "- `test_large_model_tf` (`optional`): Tests the pipeline on a real pipeline where the results are supposed to\n",
+      "  make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make\n",
+      "  sure there is no drift in future releases.\n"
      ]
     }
    ],