modifying freeze.txt and adjusting max_tokens in
reducing freeze.txt to only relevant modules which allow notebook to run in colab. Updating to add option for max_tokens to give concise outputs for the phi2 model
faaiz-25 committed Mar 25, 2024
1 parent c3a763a commit cf496f0
Showing 3 changed files with 17 additions and 236 deletions.
14 changes: 7 additions & 7 deletions src/
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@
import torch

def initialise_phi2():
def initialise_phi2(max_tokens):
"""initialise phi2 model from HuggingFace and output as a langchain model object
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
Expand All @@ -25,12 +25,12 @@ def initialise_phi2():

#load in phi-2 model - a small model with 2B parameters
model_id = "microsoft/phi-2"
#set max tokens to 1000 as small models such as phi-2 will produce verbose outputs
max_new_tokens = 1000
max_new_tokens = 1000

#create hugging face pipeline for phi2 model using the max_tokens parameter
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=1000)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=max_tokens)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=max_tokens)

#set logging information to info to avoid warnings
Expand Down Expand Up @@ -84,7 +84,7 @@ def initialise_anthropic():

class RagPipeline:
def __init__(self, EMBEDDING_MODEL, PERSIST_DIRECTORY, stuff_documents_prompt=STUFF_DOCUMENTS_PROMPT, inject_metadata_prompt=INJECT_METADATA_PROMPT, hyde_prompt = HYDE_PROMPT, device=None, model_type="anthropic"):
def __init__(self, EMBEDDING_MODEL, PERSIST_DIRECTORY, stuff_documents_prompt=STUFF_DOCUMENTS_PROMPT, inject_metadata_prompt=INJECT_METADATA_PROMPT, hyde_prompt = HYDE_PROMPT, device=None, model_type="anthropic", max_tokens=500):

if device is None:
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
Expand All @@ -93,7 +93,7 @@ def __init__(self, EMBEDDING_MODEL, PERSIST_DIRECTORY, stuff_documents_prompt=ST

#if user wants to run phi2 model insert this as the prompt for the stuff documents chain if not default to anthropic prompt
if model_type == 'phi2':
self.llm = initialise_phi2()
self.llm = initialise_phi2()
stuff_documents_prompt = PHI2_PROMPT
self.llm = initialise_anthropic()
self.llm = initialise_anthropic()
Expand Down

