Skip to content

Commit

Permalink
json config with comments and retry in rate-limited requests
Browse files Browse the repository at this point in the history
  • Loading branch information
fry committed Sep 1, 2024
1 parent 8929765 commit dbe6a44
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 4 deletions.
Binary file removed .DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from flask import Flask, render_template, jsonify
import pandas as pd
import sqlite3
import json
import pyjson5 as json
import openai
from pdfminer.high_level import extract_text
from flask_cors import CORS
Expand Down
10 changes: 7 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import requests
import json
import pyjson5 as json
import sqlite3
import sys
from sqlite3 import Error
Expand All @@ -18,15 +18,19 @@ def load_config(file_name):
with open(file_name) as f:
return json.load(f)

def get_with_retry(url, config, retries=3, delay=1):
def get_with_retry(url, config, retries=5, delay=10):
# Get the URL with retries and delay
for i in range(retries):
try:
if len(config['proxies']) > 0:
r = requests.get(url, headers=config['headers'], proxies=config['proxies'], timeout=5)
else:
r = requests.get(url, headers=config['headers'], timeout=5)
return BeautifulSoup(r.content, 'html.parser')
if r.status_code != 200:
print(f"request failed with status code: {r.status_code}")
tm.sleep(delay)
else:
return BeautifulSoup(r.content, 'html.parser')
except requests.exceptions.Timeout:
print(f"Timeout occurred for URL: {url}, retrying in {delay}s...")
tm.sleep(delay)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ pysocks
openai
pdfminer.six
flask_cors
pyjson5

0 comments on commit dbe6a44

Please sign in to comment.