Skip to content

Commit

Permalink
final ans
Browse files Browse the repository at this point in the history
  • Loading branch information
VomV committed Aug 8, 2024
1 parent deb7d7d commit 790c112
Showing 1 changed file with 113 additions and 24 deletions.
137 changes: 113 additions & 24 deletions rag_basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -110431,7 +110431,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -110456,7 +110456,8 @@
"\n",
" print(\"Reranking Docs...\")\n",
" relevant_docs = reranker.rerank(question, relevant_docs, k=num_final_docs)\n",
" relevant_docs = [doc[\"page_content\"] for doc in relevant_docs]\n",
"\n",
" relevant_docs = [doc[\"content\"] for doc in relevant_docs]\n",
"\n",
" relevant_docs = relevant_docs[:num_final_docs]\n",
"\n",
Expand All @@ -110475,7 +110476,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 37,
"metadata": {},
"outputs": [
{
Expand All @@ -110489,10 +110490,10 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Python(51678) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
"Python(51683) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
"Python(51685) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
"Python(51688) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n"
"Python(72818) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
"Python(72819) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
"Python(72822) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n",
"Python(72823) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.\n"
]
},
{
Expand All @@ -110506,27 +110507,115 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/vivekr/Documents/vivek/projects/rag_lab/.viv-rag/lib/python3.12/site-packages/colbert/utils/amp.py:15: FutureWarning:\n",
"100%|██████████| 1/1 [00:05<00:00, 5.37s/it]\n",
"Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Generating Ans...\n",
"___========Answer=========____\n",
"import re\n",
"\n",
"`torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n",
"def get_text():\n",
" text = input()\n",
" return text\n",
"\n",
"/Users/vivekr/Documents/vivek/projects/rag_lab/.viv-rag/lib/python3.12/site-packages/torch/amp/autocast_mode.py:265: UserWarning:\n",
"def get_regex(text):\n",
" regex = re.compile(r'(\\w+)\\b\\w+\\b')\n",
" return regex.findall(text)\n",
"\n",
"User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
"def get_pattern(text):\n",
" pattern = re.compile(r'\\d{3}-\\d{3}-\\d{4}')\n",
" return pattern.findall(text)\n",
"\n",
"100%|██████████| 1/1 [00:08<00:00, 8.31s/it]\n"
]
},
{
"ename": "AttributeError",
"evalue": "'dict' object has no attribute 'page_content'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[31], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m question \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhow to create a pipeline object?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m ans, relevant_docs \u001b[38;5;241m=\u001b[39m \u001b[43manswer_with_rag\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquestion\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mREADER_LLM\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mKB_VDB\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreranker\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mRERANKER\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m___========Answer=========____\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(ans)\n",
"Cell \u001b[0;32mIn[30], line 22\u001b[0m, in \u001b[0;36manswer_with_rag\u001b[0;34m(question, llm, knowledge_index, reranker, num_retrieved_docs, num_final_docs)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReranking Docs...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 21\u001b[0m relevant_docs \u001b[38;5;241m=\u001b[39m reranker\u001b[38;5;241m.\u001b[39mrerank(question, relevant_docs, k\u001b[38;5;241m=\u001b[39mnum_final_docs)\n\u001b[0;32m---> 22\u001b[0m relevant_docs \u001b[38;5;241m=\u001b[39m [\u001b[43mdoc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpage_content\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m relevant_docs]\n\u001b[1;32m 24\u001b[0m relevant_docs \u001b[38;5;241m=\u001b[39m relevant_docs[:num_final_docs]\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m#Final prompt\u001b[39;00m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'page_content'"
"if __name__ == '__main__':\n",
" text = get_text()\n",
" print(get_pattern(text))\n",
"\n",
"___============Relevant Docs===========____\n",
"# Allocate a pipeline for object detection\n",
">>> object_detector = pipeline('object-detection')\n",
">>> object_detector(image)\n",
"[{'score': 0.9982201457023621,\n",
" 'label': 'remote',\n",
" 'box': {'xmin': 40, 'ymin': 70, 'xmax': 175, 'ymax': 117}},\n",
" {'score': 0.9960021376609802,\n",
" 'label': 'remote',\n",
" 'box': {'xmin': 333, 'ymin': 72, 'xmax': 368, 'ymax': 187}},\n",
" {'score': 0.9954745173454285,\n",
" 'label': 'couch',\n",
" 'box': {'xmin': 0, 'ymin': 1, 'xmax': 639, 'ymax': 473}},\n",
" {'score': 0.9988006353378296,\n",
" 'label': 'cat',\n",
" 'box': {'xmin': 13, 'ymin': 52, 'xmax': 314, 'ymax': 470}},\n",
" {'score': 0.9986783862113953,\n",
" 'label': 'cat',\n",
" 'box': {'xmin': 345, 'ymin': 23, 'xmax': 640, 'ymax': 368}}]\n",
"# Allocate a pipeline for object detection\n",
">>> object_detector = pipeline('object_detection')\n",
">>> object_detector(image)\n",
"[{'score': 0.9982201457023621,\n",
" 'label': 'remote',\n",
" 'box': {'xmin': 40, 'ymin': 70, 'xmax': 175, 'ymax': 117}},\n",
" {'score': 0.9960021376609802,\n",
" 'label': 'remote',\n",
" 'box': {'xmin': 333, 'ymin': 72, 'xmax': 368, 'ymax': 187}},\n",
" {'score': 0.9954745173454285,\n",
" 'label': 'couch',\n",
" 'box': {'xmin': 0, 'ymin': 1, 'xmax': 639, 'ymax': 473}},\n",
" {'score': 0.9988006353378296,\n",
" 'label': 'cat',\n",
" 'box': {'xmin': 13, 'ymin': 52, 'xmax': 314, 'ymax': 470}},\n",
" {'score': 0.9986783862113953,\n",
" 'label': 'cat',\n",
" 'box': {'xmin': 345, 'ymin': 23, 'xmax': 640, 'ymax': 368}}]\n",
"Start by creating an instance of [`pipeline`] and specifying a task you want to use it for. In this guide, you'll use the [`pipeline`] for sentiment analysis as an example:\n",
"\n",
"```py\n",
">>> from transformers import pipeline\n",
"\n",
">>> classifier = pipeline(\"sentiment-analysis\")\n",
"```\n",
"\n",
"2. Pass a prompt to the pipeline to generate an image:\n",
"\n",
"```py\n",
"image = pipeline(\n",
"\t\"stained glass of darth vader, backlight, centered composition, masterpiece, photorealistic, 8k\"\n",
").images[0]\n",
"image\n",
"```\n",
"\n",
"## Add the pipeline to 🤗 Transformers\n",
"\n",
"If you want to contribute your pipeline to 🤗 Transformers, you will need to add a new module in the `pipelines` submodule\n",
"with the code of your pipeline, then add it to the list of tasks defined in `pipelines/__init__.py`.\n",
"\n",
"Then you will need to add tests. Create a new file `tests/test_pipelines_MY_PIPELINE.py` with examples of the other tests.\n",
"\n",
"The `run_pipeline_test` function will be very generic and run on small random models on every possible\n",
"architecture as defined by `model_mapping` and `tf_model_mapping`.\n",
"\n",
"This is very important to test future compatibility, meaning if someone adds a new model for\n",
"`XXXForQuestionAnswering` then the pipeline test will attempt to run on it. Because the models are random it's\n",
"impossible to check for actual values, that's why there is a helper `ANY` that will simply attempt to match the\n",
"output of the pipeline TYPE.\n",
"\n",
"You also *need* to implement 2 (ideally 4) tests.\n",
"\n",
"- `test_small_model_pt` : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)\n",
" and test the pipeline outputs. The results should be the same as `test_small_model_tf`.\n",
"- `test_small_model_tf` : Define 1 small model for this pipeline (doesn't matter if the results don't make sense)\n",
" and test the pipeline outputs. The results should be the same as `test_small_model_pt`.\n",
"- `test_large_model_pt` (`optional`): Tests the pipeline on a real pipeline where the results are supposed to\n",
" make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make\n",
" sure there is no drift in future releases.\n",
"- `test_large_model_tf` (`optional`): Tests the pipeline on a real pipeline where the results are supposed to\n",
" make sense. These tests are slow and should be marked as such. Here the goal is to showcase the pipeline and to make\n",
" sure there is no drift in future releases.\n"
]
}
],
Expand Down

0 comments on commit 790c112

Please sign in to comment.