Skip to content

Commit

Permalink
job listing scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
hanniabu committed Jun 20, 2024
1 parent e5110e6 commit 102812b
Show file tree
Hide file tree
Showing 17 changed files with 253 additions and 23 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/update_data_daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ jobs:
- name: Run Python script
run: python _scripts/collect_data.py
env:
GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }}
SHEETS_URL: ${{ secrets.SHEETS_URL }}
DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
ERROR_DISCORD_WEBHOOK: ${{ secrets.ERROR_DISCORD_WEBHOOK }}
JOB_LISTINGS_URL: ${{ secrets.JOB_LISTINGS_URL }}
FOR_HIRE_LISTINGS_URL: ${{ secrets.FOR_HIRE_LISTINGS_URL }}

- name: Commit and push changes
env:
Expand All @@ -38,4 +45,4 @@ jobs:
git config --global user.name "github-actions[bot]"
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add *
git diff --quiet && git diff --staged --quiet || (git commit -m "[BOT] Update data" && git push https://${GITHUB_TOKEN}@github.com/${{ github.repository }}.git HEAD:${{ github.ref }})
git diff --quiet && git diff --staged --quiet || (git commit -m "[BOT] Update data" && git push https://${GITHUB_TOKEN}@github.com/${{ github.repository }}.git HEAD:${{ github.ref }})
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
*~
_scripts/.env
__pycache__
_scripts/__pycache__
**/__pycache__
_site
.sass-cache
.jekyll-cache
.jekyll-metadata
vendor
.DS_Store
*.DS_Store
**/.DS_Store
**/.DS_Store
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
# github-actions
# github-actions


1. Clone project
1. Install dependencies: `pip3 install -r _scripts/requirements.txt`
1. Set `use_test_data = True` in `_scripts/utilities.py`
1. Run scripts with `python3 _scripts/collect_data.py`
1 change: 1 addition & 0 deletions _data/for-hire-listings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
1 change: 1 addition & 0 deletions _data/job-listings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
2 changes: 1 addition & 1 deletion _data/smoothing-pools-processed.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"smooth":{"validators_active":1914,"validators_pending":13,"validators_total":1927},"smoothly":{"validators_active":55,"validators_pending":66,"validators_total":121}}
{"smooth":{"validators_active":1966,"validators_pending":16,"validators_total":1982},"smoothly":{"validators_active":60,"validators_pending":62,"validators_total":122}}
Empty file removed _scripts/.env
Empty file.
Binary file removed _scripts/__pycache__/hardware.cpython-312.pyc
Binary file not shown.
Binary file removed _scripts/__pycache__/smoothing_pools.cpython-312.pyc
Binary file not shown.
Binary file removed _scripts/__pycache__/utilities.cpython-312.pyc
Binary file not shown.
5 changes: 5 additions & 0 deletions _scripts/collect_data.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import utilities
from smoothing_pools import update_smoothing_pool_data
from hardware import check_hardware_availability
from job_listings import update_job_listings
from for_hire_listings import update_for_hire_listings



def run_app():
update_smoothing_pool_data()
check_hardware_availability()
update_job_listings()
update_for_hire_listings()



run_app()
Expand Down
86 changes: 86 additions & 0 deletions _scripts/for_hire_listings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import utilities
import requests
import gspread
import json
from datetime import datetime


script_id = "for_hire_listings"



def get_listing_data():
if utilities.use_test_data:
sheet_data = [{'Timestamp': '2024-05-16T13:54:21.173Z', 'Approved': 'TRUE', 'Name': 'Name', 'Position': 'Position Sought', 'Your Location': '', 'Location': '', 'Type': '', 'About': 'About', 'Resume': 'https://resume.link', 'Cover': 'https://cover.link', 'Email': '[email protected]', 'Transaction': ''}, {'Timestamp': '2024-05-16T18:20:44.729Z', 'Approved': '', 'Name': 'Full name/pseudonym', 'Position': 'Position Sought', 'Your Location': 'Your Location', 'Location': 'Remote, Hybrid', 'Type': 'FullTime, Contract', 'About': 'About', 'Resume': 'https://resume.link', 'Cover': 'https://cover.link', 'Email': '[email protected]', 'Transaction': 'https://etherscan.io'}, {'Timestamp': '2024-05-22T00:08:46.573Z', 'Approved': '', 'Name': 'name', 'Position': 'position', 'Your Location': 'location', 'Location': 'Remote', 'Type': 'FullTime, Contract', 'About': 'About You (300 characters max)', 'Resume': 'https://resume.link', 'Cover': 'https://cover.link', 'Email': '[email protected]', 'Transaction': 'https://etherscan.io'}]
else:
credentials = utilities.GOOGLE_CREDENTIALS
# get the sheet data
# reference: https://docs.gspread.org/en/v5.7.0/user-guide.html
gc = gspread.service_account_from_dict(credentials)
sheet = gc.open_by_key(utilities.SHEETS_URL).worksheet("For-Hire Listings")
sheet_data = sheet.get_all_records()
# utilities.log(sheet_data, context=f"{script_id}__get_listing_data")
return sheet_data

def process_listing_data(raw_data):
processed_data = []
current_listings = utilities.read_file(f"_data/for-hire-listings.json")
approved_listings = []
new_listings = 0
# filter for approved listings and reformat
for row in raw_data:
if row["Approved"] == "TRUE":
entry = {
"id": row["Id"],
"epoch": utilities.current_time,
"name": row["Name"],
"position": row["Position"],
"location": row["Location"],
"work_location": row["Work Location"],
"about": row["About"],
"type": row["Type"],
"resume": row["Resume"],
"cover": row["Cover"],
"email": row["Email"]
}
approved_listings.append(entry)
elif row["Approved"] == "":
new_listings += 1
# filter for newly approved listings
for approved_listing in approved_listings:
is_new_listing = True
for listing in current_listings:
if approved_listing["id"] == listing["id"]:
is_new_listing = False
if is_new_listing:
processed_data.append(approved_listing)
# filter expired listings from current_listings
for listing in current_listings:
# active if less than 31 days old
active = (utilities.current_time - listing["epoch"]) < 2592000
if active:
processed_data.append(listing)
# send discord ping for new listings
if new_listings > 0:
plural = ""
if new_listings > 1:
plural = "s"
msg = f"[{new_listings} new for-hire listing{plural}](<{utilities.FOR_HIRE_LISTINGS_URL}>)"
utilities.sendDiscordMsg(msg)
utilities.log(processed_data, context=f"{script_id}__process_listing_data")
return processed_data

def save_listing_data(processed_data):
utilities.save_to_file(f"_data/for-hire-listings.json", processed_data, context=f"{script_id}__save_listing_data")


def update_for_hire_listings():
try:
raw_data = get_listing_data()
processed_data = process_listing_data(raw_data)
save_listing_data(processed_data)
except Exception as error:
utilities.log(f"{error}: {script_id}")
utilities.report_error(error, context=f"{script_id}__update_for_hire_listings")


3 changes: 1 addition & 2 deletions _scripts/hardware.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,10 @@ def check_hardware_availability():
save_unavailable_products_data(updated_unavailable_products)
else:
error = f"Bad response"
utilities.log(f"{error}: {metric_id}_{data_source}")
utilities.log(f"{error}: {script_id}")
utilities.report_error(error, context=f"{script_id}__check_hardware_availability")
return


check_hardware_availability()


85 changes: 85 additions & 0 deletions _scripts/job_listings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import utilities
import requests
import gspread
import json
from datetime import datetime


script_id = "job_listings"



def get_listing_data():
if utilities.use_test_data:
sheet_data = [{'Id': 'df3434f3f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': 'TRUE', 'Name': 'name', 'Position': 'title', 'Description': 'description', 'Location': 'location', 'Location Details': '', 'Type': 'Full-Time', 'Description Link': 'https://description.link', 'Application': 'application link/email', 'Transaction': 'https://etherscan.io', 'Contact': 'listing contact'}, {'Id': 'df3433f3f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': '', 'Name': 'name', 'Position': 'title', 'Description': 'description', 'Location': 'Remote', 'Location Details': '', 'Type': 'Full-Time', 'Description Link': 'https://long.description', 'Application': 'Application Link/Email *', 'Transaction': 'https://etherscan.io', 'Contact': 'contact'}, {'Id': 'df3423f3f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': '', 'Name': 'name', 'Position': 'title', 'Description': 'description', 'Location': 'Remote', 'Location Details': '', 'Type': 'Full-Time', 'Description Link': 'https://long.description', 'Application': 'link/email', 'Transaction': 'https://etherscan.io', 'Contact': 'contact'}, {'Id': 'df343f93f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': '', 'Name': 'fgdfg', 'Position': 'fgdfg', 'Description': 'sfgfsg', 'Location': 'Remote', 'Location Details': 'Location Details', 'Type': 'Full-Time', 'Description Link': 'https://docs.google.com/', 'Application': 'https://docs.google.com/', 'Transaction': 'https://docs.google.com/', 'Contact': 'contatd'}, {'Id': 'df3435f3f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': '', 'Name': 'name', 'Position': 'title', 'Description': 'Role Description (300 characters m', 'Location': 'Onsite', 'Location Details': 'onsite city, country', 'Type': 'Contract', 'Description Link': 'https://long.description', 'Application': 'link/email', 'Transaction': 'https://etherscan.io', 'Contact': 'contact'}]
else:
credentials = utilities.GOOGLE_CREDENTIALS
# get the sheet data
# reference: https://docs.gspread.org/en/v5.7.0/user-guide.html
gc = gspread.service_account_from_dict(credentials)
sheet = gc.open_by_key(utilities.SHEETS_URL).worksheet("Job Listings")
sheet_data = sheet.get_all_records()
# utilities.log(sheet_data, context=f"{script_id}__get_listing_data")
return sheet_data

def process_listing_data(raw_data):
processed_data = []
current_listings = utilities.read_file(f"_data/job-listings.json")
approved_listings = []
new_listings = 0
# filter for approved listings and reformat
for row in raw_data:
if row["Approved"] == "TRUE":
entry = {
"id": row["Id"],
"epoch": utilities.current_time,
"name": row["Name"],
"position": row["Position"],
"description": row["Description"],
"location": row["Location"],
"location_details": row["Location Details"],
"type": row["Type"],
"description_link": row["Description Link"],
"apply": row["Application"]
}
approved_listings.append(entry)
elif row["Approved"] == "":
new_listings += 1
# filter for newly approved listings
for approved_listing in approved_listings:
is_new_listing = True
for listing in current_listings:
if approved_listing["id"] == listing["id"]:
is_new_listing = False
if is_new_listing:
processed_data.append(approved_listing)
# filter expired listings from current_listings
for listing in current_listings:
# active if less than 31 days old
active = (utilities.current_time - listing["epoch"]) < 2592000
if active:
processed_data.append(listing)
# send discord ping for new listings
if new_listings > 0:
plural = ""
if new_listings > 1:
plural = "s"
msg = f"[{new_listings} new job listing{plural}](<{utilities.JOB_LISTINGS_URL}>)"
utilities.sendDiscordMsg(msg)
utilities.log(processed_data, context=f"{script_id}__process_listing_data")
return processed_data

def save_listing_data(processed_data):
utilities.save_to_file(f"_data/job-listings.json", processed_data, context=f"{script_id}__save_listing_data")


def update_job_listings():
try:
raw_data = get_listing_data()
processed_data = process_listing_data(raw_data)
save_listing_data(processed_data)
except Exception as error:
utilities.log(f"{error}: {script_id}")
utilities.report_error(error, context=f"{script_id}__update_job_listings")


13 changes: 12 additions & 1 deletion _scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
gspread==5.12.4
python-dotenv==1.0.1
PyYAML==6.0.1
Requests==2.31.0
PyYAML==6.0.1
Requests==2.32.2
xmltodict==0.13.0

# google-api-python-client==2.130.0
# google-auth-httplib2==0.2.0
# google-api-core==2.19.0
# googleapis-common-protos==1.63.0
# httplib2==0.22.0
# proto-plus==1.23.0
# pyparsing==3.1.2
# uritemplate==4.1.1
2 changes: 1 addition & 1 deletion _scripts/smoothing_pools.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def update_smoothing_pool_data():
save_smoothing_pool_data(smooth_subs, smoothly_subs)
else:
error = f"Bad response"
utilities.log(f"{error}: {metric_id}_{data_source}")
utilities.log(f"{error}: {script_id}")
utilities.report_error(error, context=f"{script_id}__update_smoothing_pool_data")
return

55 changes: 40 additions & 15 deletions _scripts/utilities.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import sys
sys.dont_write_bytecode = True

import requests
import os
import time
import json
import base64
import yaml
import xmltodict
import pprint
Expand Down Expand Up @@ -29,7 +33,13 @@


load_dotenv()
# GOOGLE_FORM_ERROR_REPORT_URL = os.environ.get("GOOGLE_FORM_ERROR_REPORT_URL")
# reference: https://stackoverflow.com/a/74584151/4462930
GOOGLE_CREDENTIALS = json.loads(base64.b64decode(str(os.environ.get("GOOGLE_CREDENTIALS"))[2:-1]).decode('utf-8'))
SHEETS_URL = os.environ.get("SHEETS_URL")
DISCORD_WEBHOOK = os.environ.get("DISCORD_WEBHOOK")
ERROR_DISCORD_WEBHOOK = os.environ.get("ERROR_DISCORD_WEBHOOK")
JOB_LISTINGS_URL = os.environ.get("JOB_LISTINGS_URL")
FOR_HIRE_LISTINGS_URL = os.environ.get("FOR_HIRE_LISTINGS_URL")



Expand Down Expand Up @@ -108,7 +118,6 @@ def read_file(rel_path, file_type="json", context=""):
rel_path = "/" + rel_path
abs_path = os.path.abspath(__file__ + "/../../") + rel_path
try:
print(abs_path)
with open(abs_path, 'r') as f:
if file_type == "json":
response = json.load(f)
Expand All @@ -134,24 +143,24 @@ def xml2json(xml):
def report_error(error, context=""):
global error_count
error_count += 1
# error_count = error_count + 1
if context == "":
msg = f"{error}"
else:
msg = f"{context}: {error}"
print(msg)
if use_test_data and not submit_error:
return
else:
data = {
# "entry.2112281434": "name", # text
# "entry.1600556346": "option3", # dropdown
# "entry.819260047": ["option2", "option3"], #checkbox multiple
# "entry.1682233942": "option5" # checkbox single
"entry.1558275967": error,
"entry.2071911865": context
}
data = {"content": msg}
attempts = 0
status = 0
try:
# requests.post(GOOGLE_FORM_ERROR_REPORT_URL, data)
print("Error submitted")
except:
while (attempts < 3) and (status < 200 or status >= 300):
attempts += 1
r = requests.post(ERROR_DISCORD_WEBHOOK, json=data)
status = r.status_code
except Exception as error:
error_count += 1
error = f"ERROR: {context} error"
if exit_on_report_error:
raise SystemExit(error)
else:
Expand Down Expand Up @@ -183,4 +192,20 @@ def pprint(data):
pp.pprint(data)


def sendDiscordMsg(msg):
data = {"content": msg}
attempts = 0
status = 0
try:
while (attempts < 3) and (status < 200 or status >= 300):
attempts += 1
r = requests.post(DISCORD_WEBHOOK, json=data)
status = r.status_code
except Exception as error:
report_error(error, f"sendDiscordMsg: {msg}")
if exit_on_report_error:
raise SystemExit(error)
else:
print(error)


0 comments on commit 102812b

Please sign in to comment.