diff --git a/1.pdf b/1.pdf new file mode 100644 index 0000000..2be1634 Binary files /dev/null and b/1.pdf differ diff --git a/1.pdf:Zone.Identifier b/1.pdf:Zone.Identifier new file mode 100644 index 0000000..7b52146 --- /dev/null +++ b/1.pdf:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=https://cdn.discordapp.com/attachments/1105452323918073886/1105866194705186816/2110.09734.pdf diff --git a/= b/= new file mode 100644 index 0000000..e69de29 diff --git a/google-test.py b/google-test.py index 77d93d4..bc967f2 100644 --- a/google-test.py +++ b/google-test.py @@ -1,8 +1,8 @@ import google.generativeai as palm -palm.configure(api_key='') +palm.configure(api_key="") # Create a new conversation -response = palm.chat(messages='how are you') +response = palm.chat(messages="how are you") # Last contains the model's response: print(response.last) diff --git a/load_creds.py b/load_creds.py index 1c49125..7617100 100644 --- a/load_creds.py +++ b/load_creds.py @@ -4,11 +4,12 @@ from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow -SCOPES = ['https://www.googleapis.com/auth/generative-language.tuning'] +SCOPES = ["https://www.googleapis.com/auth/generative-language.tuning"] + def load_creds(): """Converts `oauth-client-id.json` to a credential object. - + This function caches the generated tokens to minimize the use of the consent screen. """ @@ -16,17 +17,18 @@ def load_creds(): # The file token.json stores the user's access and refresh tokens, and is # created automatically when the authorization flow completes for the first # time. - if os.path.exists('token.json'): - creds = Credentials.from_authorized_user_file('token.json', SCOPES) + if os.path.exists("token.json"): + creds = Credentials.from_authorized_user_file("token.json", SCOPES) # If there are no (valid) credentials available, let the user log in. if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( - 'oauth-client-id.json', SCOPES) + "oauth-client-id.json", SCOPES + ) creds = flow.run_local_server(port=0) # Save the credentials for the next run - with open('token.json', 'w') as token: + with open("token.json", "w") as token: token.write(creds.to_json()) - return creds \ No newline at end of file + return creds diff --git a/main-google.py b/main-google.py index fb12ded..1a26a08 100644 --- a/main-google.py +++ b/main-google.py @@ -9,7 +9,7 @@ from langchain.vectorstores import Chroma from langchain.embeddings import OpenAIEmbeddings -from langchain.chat_models import AzureChatOpenAI +from langchain.chat_models import AzureChatOpenAI from langchain.memory import ConversationSummaryMemory from langchain.chains import ConversationalRetrievalChain @@ -19,10 +19,11 @@ import google.generativeai as palm from load_creds import load_creds + def run(): dotenv.load_dotenv() - - # creds = load_creds() + + # creds = load_creds() # load a text from web loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/") diff --git a/palm.py b/palm.py new file mode 100644 index 0000000..9b0b2bc --- /dev/null +++ b/palm.py @@ -0,0 +1,32 @@ +import os +from dotenv import load_dotenv, find_dotenv + +import google.generativeai as palm + +load_dotenv(find_dotenv()) + + +api_key = os.environ["GOOGLE_API_KEY"] +palm.configure(api_key=api_key) + + +# choose a model and write details of available model. +models = [ + m for m in palm.list_models() if "generateText" in m.supported_generation_methods +] + +print("there are {} models available".format(len(models))) +model = models[0].name +print(model) + +# generate text +prompt = "Why sky is green?" +text = palm.generate_text( + prompt=prompt, + model=model, + temperature=0.1, + max_output_tokens=64, + stop_sequences=["\n"], +) + +print(text.result) diff --git a/palm_langchain.py b/palm_langchain.py new file mode 100644 index 0000000..4ff47b5 --- /dev/null +++ b/palm_langchain.py @@ -0,0 +1,64 @@ +import os +from dotenv import load_dotenv, find_dotenv + +from langchain.vectorstores import Chroma +from langchain.embeddings import GooglePalmEmbeddings +from langchain.llms import GooglePalm +from langchain.document_loaders import PyPDFLoader +from langchain.chains.question_answering import load_qa_chain +from langchain.text_splitter import CharacterTextSplitter +from langchain.chains import RetrievalQA + + +load_dotenv(find_dotenv()) + +llm = GooglePalm(google_api_key=os.environ["GOOGLE_API_KEY"]) +llm.temperature = 0.1 + +# prompts = ["The opposite of hot is", "the opposite of cold is"] +# llm_results = llm._generate(prompts) + +# print(llm_results.generations[0][0].text) +# print(llm_results.generations[1][0].text) + + +# urls = [ +# "https://www.linkedin.com/pulse/transformers-without-pain-ibrahim-sobh-phd/", +# ] + +# loader = [UnstructuredURLLoader(urls=urls)] +# index = VectorstoreIndexCreator( +# embedding=GooglePalmEmbeddings(), +# text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) +# ).from_loaders(loader) + +# memory = ConversationSummaryMemory( +# llm=llm, memory_key="chat_history", return_messages=True +# ) + +# chain = RetrievalQA.from_chain_type( +# llm=llm, +# chain_type="stuff", +# retriever=index.vectorstore.as_retriever(), +# input_key="question", +# memory=memory +# ) + +# answer = chain.run('What is machine translation?') +# print(answer) + + +pdf_folder_path = "/home/beast/langchain-test/1.pdf" +pdf_loaders = PyPDFLoader(pdf_folder_path) + +text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) +splits = text_splitter.split_documents(pdf_loaders.load()) +vectorstore = Chroma.from_documents(documents=splits, embedding=GooglePalmEmbeddings()) +retriever = vectorstore.as_retriever() + +pdf_chain = RetrievalQA.from_chain_type( + llm=llm, chain_type="stuff", retriever=retriever, input_key="question" +) + +pdf_answer = pdf_chain.run("What are GANs?") +print(pdf_answer) diff --git a/requirements.txt b/requirements.txt index 1b93fca..31873cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ langchain openai -chromadb +chromadb==0.3.29 langchainhub python-dotenv bs4 @@ -11,4 +11,8 @@ google-cloud-aiplatform>=1.35.0 google-api-python-client google-auth-httplib2 google-auth-oauthlib -google-generativeai \ No newline at end of file +google-generativeai +unstructured +# pdf2image +# pdfminer +pypdf \ No newline at end of file diff --git a/script.py b/script.py index 17e30aa..f6941bd 100644 --- a/script.py +++ b/script.py @@ -7,5 +7,5 @@ palm.configure(credentials=creds) print() -print('Available base models:', [m.name for m in palm.list_tuned_models()]) -print('My tuned models:', [m.name for m in palm.list_tuned_models()]) \ No newline at end of file +print("Available base models:", [m.name for m in palm.list_tuned_models()]) +print("My tuned models:", [m.name for m in palm.list_tuned_models()])