Skip to content

Commit

Permalink
Added logging. (#43)
Browse files Browse the repository at this point in the history
  • Loading branch information
elad-pticha authored Sep 24, 2023
1 parent 84d0538 commit 8ff6c53
Show file tree
Hide file tree
Showing 12 changed files with 98 additions and 59 deletions.
2 changes: 2 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Config:

@staticmethod
def load_downloader_config(args):
Config.debug = args.get("debug")
Config.github_token = args.get("token")
Config.output_data_dir = args.get("output")
Config.min_stars = args.get("min_stars")
Expand All @@ -68,6 +69,7 @@ def load_downloader_config(args):

@staticmethod
def load_indexer_config(args):
Config.debug = args.get("debug")
Config.input_data_dir = args.get("input")
Config.neo4j_uri = args.get("neo4j_uri")
Config.neo4j_username = args.get("neo4j_user")
Expand Down
14 changes: 8 additions & 6 deletions downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
)
from utils import find_uses_strings, convert_workflow_to_unix_path
from dependency import UsesString, UsesStringType
import logger


def download_org_workflows_and_actions() -> None:
Expand Down Expand Up @@ -46,7 +47,7 @@ def download_all_workflows_and_actions() -> None:
We are trying to cache the downloads as much as we can to reduce redundant download attempts.
"""

print("[+] Starting repository iterator")
logger.debug("[+] Starting repository iterator")
generator = get_repository_generator(Config.min_stars, Config.max_stars)

# Clean redis
Expand All @@ -68,14 +69,14 @@ def download_workflows_and_actions(repo: str) -> None:
"""
with RedisConnection(Config.redis_sets_db) as sets_db:
if sets_db.exists_in_set(Config.workflow_download_history_set, repo):
print("[!] Already downloaded")
logger.debug("[!] Already downloaded")
return

workflows = get_repository_workflows(repo)
print(f"[+] Found {len(workflows)} workflows for {repo}")
logger.debug(f"[+] Found {len(workflows)} workflows for {repo}")

for name, url in workflows.items():
print(f"[+] Fetching {name}")
logger.debug(f"[+] Fetching {name}")
resp = get(url, timeout=10)
if resp.status_code != 200:
raise Exception(
Expand Down Expand Up @@ -121,8 +122,9 @@ def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None:
return

if url is None:
print(
f"[-] Couldn't download the action.yml for the dependent action referenced by '{uses_string}' (Maybe runs a local action that was checked out previously? Maybe the action is executed through a Dockerfile?)"
# Maybe runs a local action that was checked out previously? Maybe the action is executed through a Dockerfile?
logger.error(
f"[-] Couldn't download the action.yml for the dependent action referenced by '{uses_string}'"
)
return

Expand Down
22 changes: 0 additions & 22 deletions exceptions.py

This file was deleted.

7 changes: 4 additions & 3 deletions gh_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Dict, Any, Optional, Iterator, Optional
from http import HTTPStatus
from config import Config
import logger

"""
Current rate limiting:
Expand Down Expand Up @@ -75,7 +76,7 @@ def get_repository_generator(min_stars: int, max_stars: Optional[int]) -> Iterat
query = REPOSITORY_QUERY_MIN_MAX.format(
min_stars=min_stars, max_stars=max_stars
)
print(f"[*] Querying repository page: {page}, Query: {query}")
logger.debug(f"[*] Querying repository page: {page}, Query: {query}")
repos = get_repository_search(
query=query,
page=page,
Expand All @@ -84,7 +85,7 @@ def get_repository_generator(min_stars: int, max_stars: Optional[int]) -> Iterat
more_results = True
for repo in repos:
last_star_count = int(repo["stargazers_count"])
print(
logger.debug(
f"[+] About to download repository: {repo['full_name']}, Stars: {last_star_count}"
)
yield repo["full_name"]
Expand Down Expand Up @@ -130,7 +131,7 @@ def get_repository_workflows(repo: str) -> Dict[str, str]:
import time

time_to_sleep = int(r.headers["X-RateLimit-Reset"]) - time.time() + 1
print(
logger.error(
f"[*] Ratelimit for for contents API depleted. Sleeping {time_to_sleep} seconds"
)
time.sleep(time_to_sleep)
Expand Down
3 changes: 2 additions & 1 deletion graph_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from py2neo.ogm import GraphObject
from py2neo.data import Node
from typing import Tuple, Optional
import logger


class GraphDb(object):
Expand All @@ -28,7 +29,7 @@ def get_or_create(self, obj: GraphObject) -> Tuple[GraphObject, bool]:
"""
matched_obj = obj.__class__.match(self.graph, obj._id)
if not matched_obj.exists():
print(
logger.warning(
f"WARNING: We didn't found object {obj._id} of type {obj.__class__.__name__}, so we created it."
)
self.graph.push(obj)
Expand Down
30 changes: 14 additions & 16 deletions indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from config import Config
from workflow import Workflow
from composite_action import CompositeAction
from tqdm import tqdm
import logger


# A hack to deny PyYAML to convert "on" tags into Python boolean values.
Expand All @@ -28,23 +30,17 @@ def index_downloaded_workflows_and_actions() -> None:
def index_downloaded_actions() -> None:
with RedisConnection(Config.redis_actions_db) as actions_db:
actions = [a.decode() for a in actions_db.get_all_keys()]
for action in actions:
logger.debug(f"[*] Indexing actions...")
for action in tqdm(actions, desc="Indexing actions"):
index_action_file(action)
print(
f"[*] Indexing actions. {actions.index(action) + 1}/{len(actions)}",
end="\r",
)


def index_downloaded_workflows() -> None:
with RedisConnection(Config.redis_workflows_db) as workflows_db:
workflows = [w.decode() for w in workflows_db.get_all_keys()]
for workflow in workflows:
logger.debug(f"[*] Indexing workflows...")
for workflow in tqdm(workflows, desc="Indexing workflows"):
index_workflow_file(workflow)
print(
f"[*] Indexing workflows. {workflows.index(workflow) + 1}/{len(workflows)}",
end="\r",
)


def index_action_file(action: str) -> None:
Expand All @@ -65,7 +61,9 @@ def index_action_file(action: str) -> None:
try:
obj = yaml.load(f, yaml.loader.Loader)
except yaml.scanner.ScannerError as e:
print(f"[-] Failed loading: {action}. Exception: {e}. Skipping...")
logger.error(
f"[-] Failed loading: {action}. Exception: {e}. Skipping..."
)
return

# Could happen if the YAML is empty.
Expand All @@ -76,15 +74,15 @@ def index_action_file(action: str) -> None:
# TODO: This is a symlink. We should handle it.
# Only examples at the moment are for https://github.com/edgedb/edgedb-pkg
# E.g., https://github.com/edgedb/edgedb-pkg/blob/master/integration/linux/build/centos-8/action.yml
print(f"[-] Symlink detected: {content}. Skipping...")
logger.debug(f"[-] Symlink detected: {content}. Skipping...")
return

obj["path"] = action

Config.graph.push_object(CompositeAction.from_dict(obj))
sets_db.insert_to_set(Config.action_index_history_set, action)
except Exception as e:
print(f"[-] Error while indexing {action}. {e}")
logger.error(f"[-] Error while indexing {action}. {e}")


def index_workflow_file(workflow: str) -> None:
Expand All @@ -105,7 +103,7 @@ def index_workflow_file(workflow: str) -> None:
try:
obj = yaml.load(f, yaml.loader.Loader)
except yaml.scanner.ScannerError as e:
print(
logger.error(
f"[-] Failed loading: {workflow}. Exception: {e}. Skipping..."
)
return
Expand All @@ -118,7 +116,7 @@ def index_workflow_file(workflow: str) -> None:
# TODO: This is a symlink. We should handle it.
# Only examples at the moment are for https://github.com/edgedb/edgedb-pkg
# E.g., https://github.com/edgedb/edgedb-pkg/blob/master/integration/linux/build/centos-8/action.yml
print(f"[-] Symlink detected: {content}. Skipping...")
logger.debug(f"[-] Symlink detected: {content}. Skipping...")
return

obj["path"] = workflow
Expand All @@ -127,7 +125,7 @@ def index_workflow_file(workflow: str) -> None:
sets_db.insert_to_set(Config.workflow_index_history_set, workflow)

except Exception as e:
print(f"[-] Error while indexing {workflow}. {e}")
logger.error(f"[-] Error while indexing {workflow}. {e}")


def clean_index() -> None:
Expand Down
38 changes: 38 additions & 0 deletions logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import sys
from typing import Any
from loguru import logger

logger.remove()
logger.add(
sys.stdout,
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>",
colorize=True,
)


def debug(msg: str) -> None:
from config import Config

if Config.debug:
logger.debug(msg)


def error(msg: str) -> None:
logger.error(msg)


def warning(msg: str) -> None:
logger.warning(msg)


def catch_exit() -> None:
from config import Config

if Config.github_token:
print("""\n[x] Index results with: python main.py index""")

elif Config.neo4j_uri:
neo4j_server = Config.neo4j_uri.split("//")[1].split(":")[0]
print(f"""\n[x] View results at: http://{neo4j_server}:7474""")

sys.exit(0)
22 changes: 18 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
)
from indexer import index_downloaded_workflows_and_actions
from config import Config
import exceptions
import logger


def main() -> None:
Expand Down Expand Up @@ -40,6 +40,13 @@ def main() -> None:
required=True,
help="GITHUB_TOKEN to download data from Github API (Needed for effective rate-limiting)",
)
download_parser_options.add_argument(
"--debug",
action="store_const",
default=False,
const=True,
help="Whether to print debug statements",
)

download_parser = subparsers.add_parser(
"download", help="Download workflows into Redis database"
Expand Down Expand Up @@ -95,6 +102,13 @@ def main() -> None:
default="123456789",
help="Neo4j password, default: 123456789",
)
index_parser.add_argument(
"--debug",
action="store_const",
default=False,
const=True,
help="Whether to print debug statements",
)
# Currently there are issues in multi-threading
# (especially regarding composite actions/reusable workflows)
index_parser.add_argument(
Expand Down Expand Up @@ -136,8 +150,8 @@ def main() -> None:
if __name__ == "__main__":
try:
main()
exceptions.catch_exit()
logger.catch_exit()
except KeyboardInterrupt:
exceptions.catch_exit()
logger.catch_exit()
except Exception as e:
print(f"Exception: {e}")
logger.error(e)
9 changes: 5 additions & 4 deletions redis_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import redis
from config import Config
import logger


class RedisConnection:
Expand All @@ -17,7 +18,7 @@ def __enter__(self) -> RedisConnection:
host=self.redis_host, port=self.redis_port, db=self.redis_db
)
except Exception as err:
print(f"Failed to connect to Redis: {err}")
logger.error(f"Failed to connect to Redis: {err}")

return self

Expand All @@ -29,13 +30,13 @@ def insert_to_hash(self, hash: str, field: str, value: str) -> None:
try:
self.redis_client.hset(hash, field, value)
except redis.exceptions.ResponseError as e:
print(f"Failed to set value: {e}")
logger.error(f"Failed to set value: {e}")

def insert_to_string(self, key: str, value: str) -> None:
try:
self.redis_client.set(key, value)
except redis.exceptions.ResponseError as e:
print(f"Failed to set value: {e}")
logger.error(f"Failed to set value: {e}")

def get_string(self, key: str) -> str:
return self.redis_client.get(key)
Expand All @@ -44,7 +45,7 @@ def insert_to_set(self, set: str, value: str) -> str:
try:
self.redis_client.sadd(set, value)
except redis.exceptions.ResponseError as e:
print(f"Failed to set value: {e}")
logger.error(f"Failed to set value: {e}")

def get_value_from_hash(self, hash: str, field: str) -> str or None:
return self.redis_client.hget(hash, field)
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ pyyaml==6.0.0
argparse==1.4.0
py2neo==2021.2.3
pytest==7.4.2
redis==5.0.0
redis==5.0.0
loguru==0.7.2
tqdm==4.66.1
3 changes: 2 additions & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from redis_connection import RedisConnection
from config import Config
import logger


def get_dependencies_in_code(code: str) -> List[str]:
Expand Down Expand Up @@ -48,7 +49,7 @@ def find_workflow_by_name(repo: str, workflow_name: str) -> str:
try:
obj = yaml.load(f, yaml.loader.Loader)
except yaml.scanner.ScannerError as e:
print(
logger.error(
f"[-] Failed loading: {workflow}. Exception: {e}. Skipping..."
)
return
Expand Down
Loading

0 comments on commit 8ff6c53

Please sign in to comment.