balamir53 · balamir53 · Nov 4, 2023 · Nov 3, 2023 · Nov 3, 2023
diff --git a/1.pdf b/1.pdf
diff --git a/1.pdf:Zone.Identifier b/1.pdf:Zone.Identifier
@@ -0,0 +1,3 @@
+[ZoneTransfer]
+ZoneId=3
+HostUrl=https://cdn.discordapp.com/attachments/1105452323918073886/1105866194705186816/2110.09734.pdf
diff --git a/= b/=
diff --git a/google-test.py b/google-test.py
@@ -1,8 +1,8 @@
 import google.generativeai as palm
 
-palm.configure(api_key='')
+palm.configure(api_key="")
 # Create a new conversation
-response = palm.chat(messages='how are you')
+response = palm.chat(messages="how are you")
 
 # Last contains the model's response:
 print(response.last)
diff --git a/load_creds.py b/load_creds.py
@@ -4,29 +4,31 @@
 from google.oauth2.credentials import Credentials
 from google_auth_oauthlib.flow import InstalledAppFlow
 
-SCOPES = ['https://www.googleapis.com/auth/generative-language.tuning']
+SCOPES = ["https://www.googleapis.com/auth/generative-language.tuning"]
+
 
 def load_creds():
     """Converts `oauth-client-id.json` to a credential object.
-    
+
     This function caches the generated tokens to minimize the use of the
     consent screen.
     """
     creds = None
     # The file token.json stores the user's access and refresh tokens, and is
     # created automatically when the authorization flow completes for the first
     # time.
-    if os.path.exists('token.json'):
-        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
+    if os.path.exists("token.json"):
+        creds = Credentials.from_authorized_user_file("token.json", SCOPES)
     # If there are no (valid) credentials available, let the user log in.
     if not creds or not creds.valid:
         if creds and creds.expired and creds.refresh_token:
             creds.refresh(Request())
         else:
             flow = InstalledAppFlow.from_client_secrets_file(
-                'oauth-client-id.json', SCOPES)
+                "oauth-client-id.json", SCOPES
+            )
             creds = flow.run_local_server(port=0)
         # Save the credentials for the next run
-        with open('token.json', 'w') as token:
+        with open("token.json", "w") as token:
             token.write(creds.to_json())
-    return creds
+    return creds
diff --git a/main-google.py b/main-google.py
@@ -9,7 +9,7 @@
 from langchain.vectorstores import Chroma
 from langchain.embeddings import OpenAIEmbeddings
 
-from langchain.chat_models import  AzureChatOpenAI
+from langchain.chat_models import AzureChatOpenAI
 from langchain.memory import ConversationSummaryMemory
 from langchain.chains import ConversationalRetrievalChain
 
@@ -19,10 +19,11 @@
 import google.generativeai as palm
 from load_creds import load_creds
 
+
 def run():
     dotenv.load_dotenv()
-    
-    # creds = load_creds()    
+
+    # creds = load_creds()
 
     # load a text from web
     loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")

diff --git a/palm.py b/palm.py
@@ -0,0 +1,32 @@
+import os
+from dotenv import load_dotenv, find_dotenv
+
+import google.generativeai as palm
+
+load_dotenv(find_dotenv())
+
+
+api_key = os.environ["GOOGLE_API_KEY"]
+palm.configure(api_key=api_key)
+
+
+# choose a model and write details of available model.
+models = [
+    m for m in palm.list_models() if "generateText" in m.supported_generation_methods
+]
+
+print("there are {} models available".format(len(models)))
+model = models[0].name
+print(model)
+
+# generate text
+prompt = "Why sky is green?"
+text = palm.generate_text(
+    prompt=prompt,
+    model=model,
+    temperature=0.1,
+    max_output_tokens=64,
+    stop_sequences=["\n"],
+)
+
+print(text.result)
diff --git a/palm_langchain.py b/palm_langchain.py
@@ -0,0 +1,64 @@
+import os
+from dotenv import load_dotenv, find_dotenv
+
+from langchain.vectorstores import Chroma
+from langchain.embeddings import GooglePalmEmbeddings
+from langchain.llms import GooglePalm
+from langchain.document_loaders import PyPDFLoader
+from langchain.chains.question_answering import load_qa_chain
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.chains import RetrievalQA
+
+
+load_dotenv(find_dotenv())
+
+llm = GooglePalm(google_api_key=os.environ["GOOGLE_API_KEY"])
+llm.temperature = 0.1
+
+# prompts = ["The opposite of hot is", "the opposite of cold is"]
+# llm_results = llm._generate(prompts)
+
+# print(llm_results.generations[0][0].text)
+# print(llm_results.generations[1][0].text)
+
+
+# urls = [
+#     "https://www.linkedin.com/pulse/transformers-without-pain-ibrahim-sobh-phd/",
+# ]
+
+# loader = [UnstructuredURLLoader(urls=urls)]
+# index = VectorstoreIndexCreator(
+#     embedding=GooglePalmEmbeddings(),
+#     text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+# ).from_loaders(loader)
+
+# memory = ConversationSummaryMemory(
+#         llm=llm, memory_key="chat_history", return_messages=True
+#     )
+
+# chain = RetrievalQA.from_chain_type(
+#     llm=llm,
+#     chain_type="stuff",
+#     retriever=index.vectorstore.as_retriever(),
+#     input_key="question",
+#     memory=memory
+# )
+
+# answer = chain.run('What is machine translation?')
+# print(answer)
+
+
+pdf_folder_path = "/home/beast/langchain-test/1.pdf"
+pdf_loaders = PyPDFLoader(pdf_folder_path)
+
+text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+splits = text_splitter.split_documents(pdf_loaders.load())
+vectorstore = Chroma.from_documents(documents=splits, embedding=GooglePalmEmbeddings())
+retriever = vectorstore.as_retriever()
+
+pdf_chain = RetrievalQA.from_chain_type(
+    llm=llm, chain_type="stuff", retriever=retriever, input_key="question"
+)
+
+pdf_answer = pdf_chain.run("What are GANs?")
+print(pdf_answer)
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 langchain
 openai
-chromadb
+chromadb==0.3.29
 langchainhub
 python-dotenv
 bs4
@@ -11,4 +11,8 @@ google-cloud-aiplatform>=1.35.0
 google-api-python-client
 google-auth-httplib2
 google-auth-oauthlib
-google-generativeai
+google-generativeai
+unstructured
+# pdf2image
+# pdfminer
+pypdf
diff --git a/script.py b/script.py
@@ -7,5 +7,5 @@
 palm.configure(credentials=creds)
 
 print()
-print('Available base models:', [m.name for m in palm.list_tuned_models()])
-print('My tuned models:', [m.name for m in palm.list_tuned_models()])
+print("Available base models:", [m.name for m in palm.list_tuned_models()])
+print("My tuned models:", [m.name for m in palm.list_tuned_models()])