gcloud compatible

0xWTC · Feb 13, 2023 · b1587db · b1587db
1 parent 8fb0d24
commit b1587db
Show file tree

Hide file tree

Showing 4 changed files with 112 additions and 15 deletions.
diff --git a/Procfile b/Procfile
@@ -1 +1 @@
-web: gunicorn app:app
+web: gunicorn main:app
diff --git a/app.yaml b/app.yaml
@@ -0,0 +1,58 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+runtime: python310
+
+instance_class: F4_1G
+
+inbound_services:
+- warmup
+
+handlers:
+  # This configures Google App Engine to serve the files in the app's static
+  # directory.
+- url: /static
+  static_dir: static
+
+- url: /_ah/warmup
+  script: main.py
+
+- url: /process_pdf
+  script: main.py
+  secure: always
+
+- url: /download_pdf
+  script: main.py
+  secure: always
+
+- url: /reply
+  script: main.py
+  secure: always
+
+  # This handler routes all requests not caught above to your main app. It is
+  # required when static routes are defined, but can be omitted (along with
+  # the entire handlers section) when there are no static files defined.
+- url: /.*
+  script: auto
+
+automatic_scaling:
+  min_idle_instances: automatic
+  max_idle_instances: 1
+  min_pending_latency: 30ms
+  max_pending_latency: 1s
+  max_concurrent_requests: 50
+
+env_variables:
+  OPENAI_API_KEY: 'YOUR-OPENAI-KEY'
+  CLOUD_STORAGE_BUCKET: researchgpt.appspot.com
diff --git a/app.py → main.py b/app.py → main.py
@@ -7,16 +7,16 @@
 import os
 import requests
 from flask_cors import CORS
-import redis
 from _md5 import md5
-# from google.cloud import storage
+from google.cloud import storage
 
 app = Flask(__name__)
-db=redis.from_url(os.environ['REDISCLOUD_URL'])
+# db=redis.from_url(os.environ['REDISCLOUD_URL'])
 # db = redis.StrictRedis(host='localhost', port=6379, db=0)
+# os.environ['CLOUD_STORAGE_BUCKET'] = 'researchgpt.appspot.com'
+CLOUD_STORAGE_BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
 CORS(app)
 
-
 class Chatbot():
 
     def parse_paper(self, pdf):
@@ -146,21 +146,41 @@ def gpt(self, prompt):
 def index():
     return render_template("index.html")
 
+@app.route('/_ah/warmup')
+def warmup():
+    # Handle your warmup logic here, e.g. set up a database connection pool
+    return 200
+
 @app.route("/process_pdf", methods=['POST'])
 def process_pdf():
     print("Processing pdf")
-    key = md5(request.data).hexdigest()
-    print(key)
     file = request.data
+
+    key = md5(file).hexdigest()
+    print(key)
+    # Create a Cloud Storage client.
+    gcs = storage.Client()
+    name = key+'.json'
+
+    # Get the bucket that the file will be uploaded to.
+    bucket = gcs.get_bucket(CLOUD_STORAGE_BUCKET)
+    # Check if the file already exists
+    if bucket.blob(name).exists():
+        print("File already exists")
+        print("Done processing pdf")
+        return {"key": key}
+
     pdf = PdfReader(BytesIO(file))
     chatbot = Chatbot()
     paper_text = chatbot.parse_paper(pdf)
     df = chatbot.paper_df(paper_text)
     df = chatbot.calculate_embeddings(df)
-    if db.get(key) is None:
-        db.set(key, df.to_json())
-    # print(db.set(key, df.to_json()))
-    # print(db.get(key))
+
+    # Create a new blob and upload the file's content.
+    blob = bucket.blob(name)
+    blob.upload_from_string(df.to_json(), content_type='application/json')
+    # if db.get(key) is None:
+    #     db.set(key, df.to_json())
     print("Done processing pdf")
     return {"key": key}
 
@@ -170,12 +190,27 @@ def download_pdf():
     url = request.json['url']
     r = requests.get(str(url))
     key = md5(r.content).hexdigest()
+
+    # Create a Cloud Storage client.
+    gcs = storage.Client()
+    name = key+'.json'
+
+    # Get the bucket that the file will be uploaded to.
+    bucket = gcs.get_bucket(CLOUD_STORAGE_BUCKET)
+    # Check if the file already exists
+    if bucket.blob(name).exists():
+        print("File already exists")
+        print("Done processing pdf")
+        return {"key": key}
+
     pdf = PdfReader(BytesIO(r.content))
     paper_text = chatbot.parse_paper(pdf)
     df = chatbot.paper_df(paper_text)
     df = chatbot.calculate_embeddings(df)
-    if db.get(key) is None:
-        db.set(key, df.to_json())
+
+    # Create a new blob and upload the file's content.
+    blob = bucket.blob(name)
+    blob.upload_from_string(df.to_json(), content_type='application/json')
     print("Done processing pdf")
     return {"key": key}
 
@@ -185,7 +220,11 @@ def reply():
     key = request.json['key']
     query = request.json['query']
     query = str(query)
-    df = pd.read_json(BytesIO(db.get(key)))
+    # df = pd.read_json(BytesIO(db.get(key)))
+    gcs = storage.Client()
+    bucket = gcs.get_bucket(CLOUD_STORAGE_BUCKET)
+    blob = bucket.blob(key+'.json')
+    df = pd.read_json(BytesIO(blob.download_as_string()))
     print(df.head(5))
     prompt = chatbot.create_prompt(df, query)
     response = chatbot.gpt(prompt)

diff --git a/requirements.txt b/requirements.txt
@@ -7,6 +7,6 @@ flask-cors
 matplotlib
 scipy
 plotly
-redis==4.4.2
+google-cloud-storage
 gunicorn==20.1.0
 scikit-learn==0.24.1