job listing scripts

eth-educators · Jun 20, 2024 · 102812b · 102812b
1 parent e5110e6
commit 102812b
Show file tree

Hide file tree

Showing 17 changed files with 253 additions and 23 deletions.
diff --git a/.github/workflows/update_data_daily.yml b/.github/workflows/update_data_daily.yml
@@ -30,6 +30,13 @@ jobs:
 
     - name: Run Python script
       run: python _scripts/collect_data.py
+      env:
+        GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }}
+        SHEETS_URL: ${{ secrets.SHEETS_URL }}
+        DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
+        ERROR_DISCORD_WEBHOOK: ${{ secrets.ERROR_DISCORD_WEBHOOK }}
+        JOB_LISTINGS_URL: ${{ secrets.JOB_LISTINGS_URL }}
+        FOR_HIRE_LISTINGS_URL: ${{ secrets.FOR_HIRE_LISTINGS_URL }}
 
     - name: Commit and push changes
       env:
@@ -38,4 +45,4 @@ jobs:
         git config --global user.name "github-actions[bot]"
         git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
         git add *
-        git diff --quiet && git diff --staged --quiet || (git commit -m "[BOT] Update data" && git push https://${GITHUB_TOKEN}@github.com/${{ github.repository }}.git HEAD:${{ github.ref }})
+        git diff --quiet && git diff --staged --quiet || (git commit -m "[BOT] Update data" && git push https://${GITHUB_TOKEN}@github.com/${{ github.repository }}.git HEAD:${{ github.ref }})
diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,13 @@
 *~
+_scripts/.env
+__pycache__
+_scripts/__pycache__
+**/__pycache__
 _site
 .sass-cache
 .jekyll-cache
 .jekyll-metadata
 vendor
 .DS_Store
 *.DS_Store
-**/.DS_Store
+**/.DS_Store
diff --git a/README.md b/README.md
@@ -1 +1,7 @@
-# github-actions
+# github-actions
+
+
+1. Clone project
+1. Install dependencies: `pip3 install -r _scripts/requirements.txt`
+1. Set `use_test_data = True` in `_scripts/utilities.py`
+1. Run scripts with `python3 _scripts/collect_data.py`
diff --git a/_data/for-hire-listings.json b/_data/for-hire-listings.json
@@ -0,0 +1 @@
+[]
diff --git a/_data/job-listings.json b/_data/job-listings.json
@@ -0,0 +1 @@
+[]
diff --git a/_data/smoothing-pools-processed.json b/_data/smoothing-pools-processed.json
@@ -1 +1 @@
-{"smooth":{"validators_active":1914,"validators_pending":13,"validators_total":1927},"smoothly":{"validators_active":55,"validators_pending":66,"validators_total":121}}
+{"smooth":{"validators_active":1966,"validators_pending":16,"validators_total":1982},"smoothly":{"validators_active":60,"validators_pending":62,"validators_total":122}}
diff --git a/_scripts/.env b/_scripts/.env
diff --git a/_scripts/__pycache__/hardware.cpython-312.pyc b/_scripts/__pycache__/hardware.cpython-312.pyc
diff --git a/_scripts/__pycache__/smoothing_pools.cpython-312.pyc b/_scripts/__pycache__/smoothing_pools.cpython-312.pyc
diff --git a/_scripts/__pycache__/utilities.cpython-312.pyc b/_scripts/__pycache__/utilities.cpython-312.pyc
diff --git a/_scripts/collect_data.py b/_scripts/collect_data.py
@@ -1,12 +1,17 @@
 import utilities
 from smoothing_pools import update_smoothing_pool_data
 from hardware import check_hardware_availability
+from job_listings import update_job_listings
+from for_hire_listings import update_for_hire_listings
 
 
 
 def run_app():
   update_smoothing_pool_data()
   check_hardware_availability()
+  update_job_listings()
+  update_for_hire_listings()
+
 
 
 run_app()

diff --git a/_scripts/for_hire_listings.py b/_scripts/for_hire_listings.py
@@ -0,0 +1,86 @@
+import utilities
+import requests
+import gspread
+import json
+from datetime import datetime
+
+
+script_id = "for_hire_listings"
+
+
+
+def get_listing_data():
+  if utilities.use_test_data:
+    sheet_data = [{'Timestamp': '2024-05-16T13:54:21.173Z', 'Approved': 'TRUE', 'Name': 'Name', 'Position': 'Position Sought', 'Your Location': '', 'Location': '', 'Type': '', 'About': 'About', 'Resume': 'https://resume.link', 'Cover': 'https://cover.link', 'Email': '[email protected]', 'Transaction': ''}, {'Timestamp': '2024-05-16T18:20:44.729Z', 'Approved': '', 'Name': 'Full name/pseudonym', 'Position': 'Position Sought', 'Your Location': 'Your Location', 'Location': 'Remote, Hybrid', 'Type': 'FullTime, Contract', 'About': 'About', 'Resume': 'https://resume.link', 'Cover': 'https://cover.link', 'Email': '[email protected]', 'Transaction': 'https://etherscan.io'}, {'Timestamp': '2024-05-22T00:08:46.573Z', 'Approved': '', 'Name': 'name', 'Position': 'position', 'Your Location': 'location', 'Location': 'Remote', 'Type': 'FullTime, Contract', 'About': 'About You (300 characters max)', 'Resume': 'https://resume.link', 'Cover': 'https://cover.link', 'Email': '[email protected]', 'Transaction': 'https://etherscan.io'}]
+  else:
+    credentials = utilities.GOOGLE_CREDENTIALS
+    # get the sheet data
+    # reference: https://docs.gspread.org/en/v5.7.0/user-guide.html
+    gc = gspread.service_account_from_dict(credentials)
+    sheet = gc.open_by_key(utilities.SHEETS_URL).worksheet("For-Hire Listings")
+    sheet_data = sheet.get_all_records()
+  # utilities.log(sheet_data, context=f"{script_id}__get_listing_data")
+  return sheet_data
+
+def process_listing_data(raw_data):
+  processed_data = []
+  current_listings = utilities.read_file(f"_data/for-hire-listings.json")
+  approved_listings = []
+  new_listings = 0
+  # filter for approved listings and reformat
+  for row in raw_data:
+    if row["Approved"] == "TRUE":
+      entry = {
+        "id": row["Id"],
+        "epoch": utilities.current_time,
+        "name": row["Name"],
+        "position": row["Position"],
+        "location": row["Location"],
+        "work_location": row["Work Location"],
+        "about": row["About"],
+        "type": row["Type"],
+        "resume": row["Resume"],
+        "cover": row["Cover"],
+        "email": row["Email"]
+      }
+      approved_listings.append(entry)
+    elif row["Approved"] == "":
+      new_listings += 1
+  # filter for newly approved listings
+  for approved_listing in approved_listings:
+    is_new_listing = True
+    for listing in current_listings:
+      if approved_listing["id"] == listing["id"]:
+        is_new_listing = False
+    if is_new_listing:
+      processed_data.append(approved_listing)
+  # filter expired listings from current_listings
+  for listing in current_listings:
+    # active if less than 31 days old
+    active = (utilities.current_time - listing["epoch"]) < 2592000
+    if active:
+      processed_data.append(listing)
+  # send discord ping for new listings
+  if new_listings > 0:
+    plural = ""
+    if new_listings > 1:
+      plural = "s"
+    msg = f"[{new_listings} new for-hire listing{plural}](<{utilities.FOR_HIRE_LISTINGS_URL}>)"
+    utilities.sendDiscordMsg(msg)
+  utilities.log(processed_data, context=f"{script_id}__process_listing_data")
+  return processed_data
+
+def save_listing_data(processed_data):
+  utilities.save_to_file(f"_data/for-hire-listings.json", processed_data, context=f"{script_id}__save_listing_data")
+
+
+def update_for_hire_listings():
+  try:
+    raw_data = get_listing_data()
+    processed_data = process_listing_data(raw_data)
+    save_listing_data(processed_data)
+  except Exception as error:
+    utilities.log(f"{error}: {script_id}")
+    utilities.report_error(error, context=f"{script_id}__update_for_hire_listings")
+
+
diff --git a/_scripts/hardware.py b/_scripts/hardware.py
@@ -55,11 +55,10 @@ def check_hardware_availability():
     save_unavailable_products_data(updated_unavailable_products)
   else: 
     error = f"Bad response"
-    utilities.log(f"{error}: {metric_id}_{data_source}")
+    utilities.log(f"{error}: {script_id}")
     utilities.report_error(error, context=f"{script_id}__check_hardware_availability")
     return
 
 
-check_hardware_availability()
 
 
diff --git a/_scripts/job_listings.py b/_scripts/job_listings.py
@@ -0,0 +1,85 @@
+import utilities
+import requests
+import gspread
+import json
+from datetime import datetime
+
+
+script_id = "job_listings"
+
+
+
+def get_listing_data():
+  if utilities.use_test_data:
+    sheet_data = [{'Id': 'df3434f3f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': 'TRUE', 'Name': 'name', 'Position': 'title', 'Description': 'description', 'Location': 'location', 'Location Details': '', 'Type': 'Full-Time', 'Description Link': 'https://description.link', 'Application': 'application link/email', 'Transaction': 'https://etherscan.io', 'Contact': 'listing contact'}, {'Id': 'df3433f3f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': '', 'Name': 'name', 'Position': 'title', 'Description': 'description', 'Location': 'Remote', 'Location Details': '', 'Type': 'Full-Time', 'Description Link': 'https://long.description', 'Application': 'Application Link/Email *', 'Transaction': 'https://etherscan.io', 'Contact': 'contact'}, {'Id': 'df3423f3f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': '', 'Name': 'name', 'Position': 'title', 'Description': 'description', 'Location': 'Remote', 'Location Details': '', 'Type': 'Full-Time', 'Description Link': 'https://long.description', 'Application': 'link/email', 'Transaction': 'https://etherscan.io', 'Contact': 'contact'}, {'Id': 'df343f93f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': '', 'Name': 'fgdfg', 'Position': 'fgdfg', 'Description': 'sfgfsg', 'Location': 'Remote', 'Location Details': 'Location Details', 'Type': 'Full-Time', 'Description Link': 'https://docs.google.com/', 'Application': 'https://docs.google.com/', 'Transaction': 'https://docs.google.com/', 'Contact': 'contatd'}, {'Id': 'df3435f3f', 'Timestamp': '2024-05-23T17:13:11.505Z', 'Approved': '', 'Name': 'name', 'Position': 'title', 'Description': 'Role Description (300 characters m', 'Location': 'Onsite', 'Location Details': 'onsite city, country', 'Type': 'Contract', 'Description Link': 'https://long.description', 'Application': 'link/email', 'Transaction': 'https://etherscan.io', 'Contact': 'contact'}]
+  else:
+    credentials = utilities.GOOGLE_CREDENTIALS
+    # get the sheet data
+    # reference: https://docs.gspread.org/en/v5.7.0/user-guide.html
+    gc = gspread.service_account_from_dict(credentials)
+    sheet = gc.open_by_key(utilities.SHEETS_URL).worksheet("Job Listings")
+    sheet_data = sheet.get_all_records()
+  # utilities.log(sheet_data, context=f"{script_id}__get_listing_data")
+  return sheet_data
+
+def process_listing_data(raw_data):
+  processed_data = []
+  current_listings = utilities.read_file(f"_data/job-listings.json")
+  approved_listings = []
+  new_listings = 0
+  # filter for approved listings and reformat
+  for row in raw_data:
+    if row["Approved"] == "TRUE":
+      entry = {
+        "id": row["Id"],
+        "epoch": utilities.current_time,
+        "name": row["Name"],
+        "position": row["Position"],
+        "description": row["Description"],
+        "location": row["Location"],
+        "location_details": row["Location Details"],
+        "type": row["Type"],
+        "description_link": row["Description Link"],
+        "apply": row["Application"]
+      }
+      approved_listings.append(entry)
+    elif row["Approved"] == "":
+      new_listings += 1
+  # filter for newly approved listings
+  for approved_listing in approved_listings:
+    is_new_listing = True
+    for listing in current_listings:
+      if approved_listing["id"] == listing["id"]:
+        is_new_listing = False
+    if is_new_listing:
+      processed_data.append(approved_listing)
+  # filter expired listings from current_listings
+  for listing in current_listings:
+    # active if less than 31 days old
+    active = (utilities.current_time - listing["epoch"]) < 2592000
+    if active:
+      processed_data.append(listing)
+  # send discord ping for new listings
+  if new_listings > 0:
+    plural = ""
+    if new_listings > 1:
+      plural = "s"
+    msg = f"[{new_listings} new job listing{plural}](<{utilities.JOB_LISTINGS_URL}>)"
+    utilities.sendDiscordMsg(msg)
+  utilities.log(processed_data, context=f"{script_id}__process_listing_data")
+  return processed_data
+
+def save_listing_data(processed_data):
+  utilities.save_to_file(f"_data/job-listings.json", processed_data, context=f"{script_id}__save_listing_data")
+
+
+def update_job_listings():
+  try:
+    raw_data = get_listing_data()
+    processed_data = process_listing_data(raw_data)
+    save_listing_data(processed_data)
+  except Exception as error:
+    utilities.log(f"{error}: {script_id}")
+    utilities.report_error(error, context=f"{script_id}__update_job_listings")
+
+
diff --git a/_scripts/requirements.txt b/_scripts/requirements.txt
@@ -1,4 +1,15 @@
+gspread==5.12.4
 python-dotenv==1.0.1
 PyYAML==6.0.1
-Requests==2.31.0
+PyYAML==6.0.1
+Requests==2.32.2
 xmltodict==0.13.0
+
+# google-api-python-client==2.130.0
+# google-auth-httplib2==0.2.0
+# google-api-core==2.19.0
+# googleapis-common-protos==1.63.0
+# httplib2==0.22.0
+# proto-plus==1.23.0
+# pyparsing==3.1.2
+# uritemplate==4.1.1
diff --git a/_scripts/smoothing_pools.py b/_scripts/smoothing_pools.py
@@ -68,7 +68,7 @@ def update_smoothing_pool_data():
     save_smoothing_pool_data(smooth_subs, smoothly_subs)
   else: 
     error = f"Bad response"
-    utilities.log(f"{error}: {metric_id}_{data_source}")
+    utilities.log(f"{error}: {script_id}")
     utilities.report_error(error, context=f"{script_id}__update_smoothing_pool_data")
     return
 
diff --git a/_scripts/utilities.py b/_scripts/utilities.py
@@ -1,7 +1,11 @@
+import sys
+sys.dont_write_bytecode = True
+
 import requests
 import os
 import time
 import json
+import base64
 import yaml
 import xmltodict
 import pprint
@@ -29,7 +33,13 @@
 
 
 load_dotenv()
-# GOOGLE_FORM_ERROR_REPORT_URL = os.environ.get("GOOGLE_FORM_ERROR_REPORT_URL")
+# reference: https://stackoverflow.com/a/74584151/4462930
+GOOGLE_CREDENTIALS = json.loads(base64.b64decode(str(os.environ.get("GOOGLE_CREDENTIALS"))[2:-1]).decode('utf-8'))
+SHEETS_URL = os.environ.get("SHEETS_URL")
+DISCORD_WEBHOOK = os.environ.get("DISCORD_WEBHOOK")
+ERROR_DISCORD_WEBHOOK = os.environ.get("ERROR_DISCORD_WEBHOOK")
+JOB_LISTINGS_URL = os.environ.get("JOB_LISTINGS_URL")
+FOR_HIRE_LISTINGS_URL = os.environ.get("FOR_HIRE_LISTINGS_URL")
 
 
 
@@ -108,7 +118,6 @@ def read_file(rel_path, file_type="json", context=""):
     rel_path = "/" + rel_path
   abs_path = os.path.abspath(__file__ + "/../../") + rel_path
   try:
-    print(abs_path)
     with open(abs_path, 'r') as f:
       if file_type == "json":
         response = json.load(f)
@@ -134,24 +143,24 @@ def xml2json(xml):
 def report_error(error, context=""):
   global error_count
   error_count += 1
-  # error_count = error_count + 1
+  if context == "":
+      msg = f"{error}"
+  else:
+    msg = f"{context}: {error}"
+  print(msg)
   if use_test_data and not submit_error:
     return
   else:
-    data = {
-      # "entry.2112281434": "name",    # text
-      # "entry.1600556346": "option3", # dropdown
-      # "entry.819260047": ["option2", "option3"], #checkbox multiple
-      # "entry.1682233942": "option5"  # checkbox single
-      "entry.1558275967": error,
-      "entry.2071911865": context
-    }
+    data = {"content": msg}
+    attempts = 0
+    status = 0
     try:
-      # requests.post(GOOGLE_FORM_ERROR_REPORT_URL, data)
-      print("Error submitted")
-    except:
+      while (attempts < 3) and (status < 200 or status >= 300):
+        attempts += 1
+        r = requests.post(ERROR_DISCORD_WEBHOOK, json=data)
+        status = r.status_code
+    except Exception as error:
       error_count += 1
-      error = f"ERROR: {context} error"
       if exit_on_report_error:
         raise SystemExit(error)
       else:
@@ -183,4 +192,20 @@ def pprint(data):
   pp.pprint(data)
 
 
+def sendDiscordMsg(msg):
+  data = {"content": msg}
+  attempts = 0
+  status = 0
+  try:
+    while (attempts < 3) and (status < 200 or status >= 300):
+      attempts += 1
+      r = requests.post(DISCORD_WEBHOOK, json=data)
+      status = r.status_code
+  except Exception as error:
+    report_error(error, f"sendDiscordMsg: {msg}")
+    if exit_on_report_error:
+      raise SystemExit(error)
+    else:
+      print(error)
+
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"smooth":{"validators_active":1914,"validators_pending":13,"validators_total":1927},"smoothly":{"validators_active":55,"validators_pending":66,"validators_total":121}}
		{"smooth":{"validators_active":1966,"validators_pending":16,"validators_total":1982},"smoothly":{"validators_active":60,"validators_pending":62,"validators_total":122}}