diff --git a/workflows/README.md b/workflows/README.md index c95ace7475..59fc6514ff 100644 --- a/workflows/README.md +++ b/workflows/README.md @@ -102,17 +102,22 @@ If you are encountering difficulties at any point don't hesitate to ask for help A [Workflow Testing RO-Crate](https://crs4.github.io/life_monitor/workflow_testing_ro_crate) will be auto-generated after merging a workflow. You do not need to manually create this file, but if you want to try this today, these are the instructions: -This directory, along with the workflow category directories, contains a Python tool to generate [Workflow Testing RO-Crate](https://crs4.github.io/life_monitor/workflow_testing_ro_crate) metadata files (`ro-crate-metadata.json`). It also contains a requirements file to install the tool's dependencies (mainly [ro-crate-py](https://github.com/ResearchObject/ro-crate-py)): +This directory contains a Python tool to generate a Workflow Testing RO-Crate metadata file (`ro-crate-metadata.json`) in each workflow repository dir, along with a requirements file to install the tool's dependencies: ```bash python3 -m venv venv source venv/bin/activate pip install --upgrade pip pip install -r requirements.txt -python gen_crates.py sars-cov-2-variant-calling +python gen_crates.py ``` -The tool expects to find the workflow file and Planemo test file as described above. The `README.md` file is not expected, but it's included in the crate (i.e., listed among the metadata) if found. +Workflow repository dirs are searched for using the same logic and definition +of repository as the `planemo ci_find_repos` command (any directory with a +`.shed.yml` or `.dockstore.yml file`). The tool expects to find the workflow +file and Planemo test file as described above. The `README.md` file is not +expected, but it's included in the crate (i.e., listed among the metadata) if +found. The following metadata is not expected, but included in the crate if found in the workflow file: @@ -127,3 +132,5 @@ The following metadata is not expected, but included in the crate if found in th ``` With `--zip-dir=DIR_PATH`, the tool will zip each crate (i.e., the workflow repository directory with the `ro-crate-metadata.json` files in it) in the format required by [WorkflowHub](https://workflowhub.eu), and place the archive under `DIR_PATH`. + +Run `python gen_crates.py --help` for more information on the available options. diff --git a/workflows/gen_crates.py b/workflows/gen_crates.py index 6aeff03f3e..cca9780c72 100644 --- a/workflows/gen_crates.py +++ b/workflows/gen_crates.py @@ -21,19 +21,16 @@ """\ Generate RO-Crate metadata for workflow repositories. -Assumes a two-tier directory structure where: +Workflow repositories are searched for starting from the specified root +directories (the default is to search below the current directory). Uses the +same searching logic and definition of repository as the ci_find_repos Planemo +command (any directory with a .shed.yml or .dockstore.yml file). -- the top-level directory represents a workflow category, e.g., - `sars-cov-2-variant-calling`; -- level 2 directories represent individual workflow repositories, e.g., - `sars-cov-2-consensus-from-variation`. +Workflow repositories are expected to contain: -Level 2 directories are expected to contain: - -- the `.ga` workflow file, e.g., `consensus-from-variation.ga`; -- a [Planemo](https://github.com/galaxyproject/planemo) test file with the - same name as the workflow file, but with a `-test.yml` extension, e.g., - `consensus-from-variation-test.yml`; +- the .ga workflow file, e.g., "consensus-from-variation.ga"; +- a Planemo test file with the same name as the workflow file, but with a + "-test.yml" extension, e.g., "consensus-from-variation-test.yml". """ import argparse @@ -42,7 +39,10 @@ import shutil from pathlib import Path -# pip install 'rocrate==0.4.0' +import planemo +from planemo.context import PlanemoContext +from planemo.shed import find_raw_repositories +from planemo.ci import filter_paths from rocrate.rocrate import ROCrate from rocrate.model.person import Person from rocrate.model.entity import Entity @@ -53,7 +53,7 @@ GH_WORKFLOW = "workflow_test.yml" TARGET_OWNER = "iwc-workflows" GH_API_URL = "https://api.github.com" -PLANEMO_VERSION = ">=0.74.4" +PLANEMO_VERSION = f">={planemo.__version__}" PLANEMO_TEST_SUFFIXES = ["-tests", "_tests", "-test", "_test"] PLANEMO_TEST_EXTENSIONS = [".yml", ".yaml", ".json"] @@ -101,8 +101,7 @@ def handle_creator(ga_json, crate, workflow): workflow["creator"] = ro_creators -def make_crate(repo_dir_entry, target_owner, resource, planemo_version): - crate_dir = repo_dir_entry.path +def make_crate(crate_dir, target_owner, resource, planemo_version): wf_id = get_wf_id(crate_dir) planemo_id, planemo_source = get_planemo_id(crate_dir, wf_id) crate = ROCrate(gen_preview=False) @@ -112,12 +111,12 @@ def make_crate(repo_dir_entry, target_owner, resource, planemo_version): workflow = crate.add_workflow(wf_source, wf_id, main=True, lang="galaxy", gen_cwl=False) handle_creator(code, crate, workflow) - workflow["name"] = code.get("name", repo_dir_entry.name) + workflow["name"] = code.get("name", crate_dir.name) try: workflow["version"] = code["release"] except KeyError: pass - wf_url = f"https://github.com/{target_owner}/{repo_dir_entry.name}" + wf_url = f"https://github.com/{target_owner}/{crate_dir.name}" workflow["url"] = crate.root_dataset["isBasedOn"] = wf_url try: crate.root_dataset["license"] = code["license"] @@ -135,25 +134,34 @@ def make_crate(repo_dir_entry, target_owner, resource, planemo_version): crate.metadata.write(crate_dir) +def find_repos(paths, exclude=()): + """\ + Find all workflow directories below each path in ``paths``. + + Same as ``planemo ci_find_repos``. + """ + ctx = PlanemoContext() + kwargs = dict(recursive=True, fail_fast=True, chunk_count=1, chunk=0, exclude=exclude) + raw_repos = [_.path for _ in find_raw_repositories(ctx, paths, **kwargs)] + return [Path(_) for _ in filter_paths(ctx, raw_repos, path_type="repo", **kwargs)] + + def main(args): if args.zip_dir: zip_dir = Path(args.zip_dir) zip_dir.mkdir(parents=True, exist_ok=True) resource = f"repos/{args.owner}/{args.repo}/actions/workflows/{args.workflow}" - for root in args.root: - for entry in os.scandir(root): - if not entry.is_dir(): - continue - print(f"processing {entry.path}") - if args.no_overwrite and (Path(entry.path) / "ro-crate-metadata.json").is_file(): - print(" crate exists, not overwriting") - else: - make_crate(entry, args.target_owner, resource, args.planemo_version) - if args.zip_dir: - # if args.no_overwrite, zip existing crates - path = zip_dir / f"{entry.name}.crate" - archive = shutil.make_archive(path, "zip", entry.path) - print(f" archived as {archive}") + for repo in find_repos(args.root, exclude=args.exclude): + print(f"processing {repo}") + if args.no_overwrite and (repo / "ro-crate-metadata.json").is_file(): + print(" crate exists, not overwriting") + else: + make_crate(repo, args.target_owner, resource, args.planemo_version) + if args.zip_dir: + # if args.no_overwrite, zip existing crates + path = zip_dir / f"{repo.name}.crate" + archive = shutil.make_archive(path, "zip", repo) + print(f" archived as {archive}") if __name__ == "__main__": @@ -162,6 +170,8 @@ def main(args): ) parser.add_argument("root", metavar="ROOT_DIR", help="top-level directory", nargs="*", default=[os.getcwd()]) + parser.add_argument("--exclude", metavar="PATH", nargs="*", default=(), + help="paths to exclude while searching for workflow repos") parser.add_argument("--owner", metavar="STRING", default=OWNER, help="owner of the github workflow that runs the tests") parser.add_argument("--repo", metavar="STRING", default=REPO, diff --git a/workflows/requirements.txt b/workflows/requirements.txt index 174d066957..cb5a48e83e 100644 --- a/workflows/requirements.txt +++ b/workflows/requirements.txt @@ -1 +1,2 @@ rocrate~=0.4.0 +planemo>=0.74.5