Skip to content

Commit

Permalink
gen_crates: add repo discovery
Browse files Browse the repository at this point in the history
  • Loading branch information
simleo committed Jul 20, 2021
1 parent b6c63e3 commit a01578c
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 34 deletions.
13 changes: 10 additions & 3 deletions workflows/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,22 @@ If you are encountering difficulties at any point don't hesitate to ask for help
A [Workflow Testing RO-Crate](https://crs4.github.io/life_monitor/workflow_testing_ro_crate) will be auto-generated after merging a workflow.
You do not need to manually create this file, but if you want to try this today, these are the instructions:
This directory, along with the workflow category directories, contains a Python tool to generate [Workflow Testing RO-Crate](https://crs4.github.io/life_monitor/workflow_testing_ro_crate) metadata files (`ro-crate-metadata.json`). It also contains a requirements file to install the tool's dependencies (mainly [ro-crate-py](https://github.com/ResearchObject/ro-crate-py)):
This directory contains a Python tool to generate a Workflow Testing RO-Crate metadata file (`ro-crate-metadata.json`) in each workflow repository dir, along with a requirements file to install the tool's dependencies:

```bash
python3 -m venv venv
source venv/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
python gen_crates.py sars-cov-2-variant-calling
python gen_crates.py
```

The tool expects to find the workflow file and Planemo test file as described above. The `README.md` file is not expected, but it's included in the crate (i.e., listed among the metadata) if found.
Workflow repository dirs are searched for using the same logic and definition
of repository as the `planemo ci_find_repos` command (any directory with a
`.shed.yml` or `.dockstore.yml file`). The tool expects to find the workflow
file and Planemo test file as described above. The `README.md` file is not
expected, but it's included in the crate (i.e., listed among the metadata) if
found.

The following metadata is not expected, but included in the crate if found in the workflow file:

Expand All @@ -127,3 +132,5 @@ The following metadata is not expected, but included in the crate if found in th
```

With `--zip-dir=DIR_PATH`, the tool will zip each crate (i.e., the workflow repository directory with the `ro-crate-metadata.json` files in it) in the format required by [WorkflowHub](https://workflowhub.eu), and place the archive under `DIR_PATH`.

Run `python gen_crates.py --help` for more information on the available options.
72 changes: 41 additions & 31 deletions workflows/gen_crates.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,16 @@
"""\
Generate RO-Crate metadata for workflow repositories.
Assumes a two-tier directory structure where:
Workflow repositories are searched for starting from the specified root
directories (the default is to search below the current directory). Uses the
same searching logic and definition of repository as the ci_find_repos Planemo
command (any directory with a .shed.yml or .dockstore.yml file).
- the top-level directory represents a workflow category, e.g.,
`sars-cov-2-variant-calling`;
- level 2 directories represent individual workflow repositories, e.g.,
`sars-cov-2-consensus-from-variation`.
Workflow repositories are expected to contain:
Level 2 directories are expected to contain:
- the `.ga` workflow file, e.g., `consensus-from-variation.ga`;
- a [Planemo](https://github.com/galaxyproject/planemo) test file with the
same name as the workflow file, but with a `-test.yml` extension, e.g.,
`consensus-from-variation-test.yml`;
- the .ga workflow file, e.g., "consensus-from-variation.ga";
- a Planemo test file with the same name as the workflow file, but with a
"-test.yml" extension, e.g., "consensus-from-variation-test.yml".
"""

import argparse
Expand All @@ -42,7 +39,10 @@
import shutil
from pathlib import Path

# pip install 'rocrate==0.4.0'
import planemo
from planemo.context import PlanemoContext
from planemo.shed import find_raw_repositories
from planemo.ci import filter_paths
from rocrate.rocrate import ROCrate
from rocrate.model.person import Person
from rocrate.model.entity import Entity
Expand All @@ -53,7 +53,7 @@
GH_WORKFLOW = "workflow_test.yml"
TARGET_OWNER = "iwc-workflows"
GH_API_URL = "https://api.github.com"
PLANEMO_VERSION = ">=0.74.4"
PLANEMO_VERSION = f">={planemo.__version__}"
PLANEMO_TEST_SUFFIXES = ["-tests", "_tests", "-test", "_test"]
PLANEMO_TEST_EXTENSIONS = [".yml", ".yaml", ".json"]

Expand Down Expand Up @@ -101,8 +101,7 @@ def handle_creator(ga_json, crate, workflow):
workflow["creator"] = ro_creators


def make_crate(repo_dir_entry, target_owner, resource, planemo_version):
crate_dir = repo_dir_entry.path
def make_crate(crate_dir, target_owner, resource, planemo_version):
wf_id = get_wf_id(crate_dir)
planemo_id, planemo_source = get_planemo_id(crate_dir, wf_id)
crate = ROCrate(gen_preview=False)
Expand All @@ -112,12 +111,12 @@ def make_crate(repo_dir_entry, target_owner, resource, planemo_version):
workflow = crate.add_workflow(wf_source, wf_id, main=True,
lang="galaxy", gen_cwl=False)
handle_creator(code, crate, workflow)
workflow["name"] = code.get("name", repo_dir_entry.name)
workflow["name"] = code.get("name", crate_dir.name)
try:
workflow["version"] = code["release"]
except KeyError:
pass
wf_url = f"https://github.com/{target_owner}/{repo_dir_entry.name}"
wf_url = f"https://github.com/{target_owner}/{crate_dir.name}"
workflow["url"] = crate.root_dataset["isBasedOn"] = wf_url
try:
crate.root_dataset["license"] = code["license"]
Expand All @@ -135,25 +134,34 @@ def make_crate(repo_dir_entry, target_owner, resource, planemo_version):
crate.metadata.write(crate_dir)


def find_repos(paths, exclude=()):
"""\
Find all workflow directories below each path in ``paths``.
Same as ``planemo ci_find_repos``.
"""
ctx = PlanemoContext()
kwargs = dict(recursive=True, fail_fast=True, chunk_count=1, chunk=0, exclude=exclude)
raw_repos = [_.path for _ in find_raw_repositories(ctx, paths, **kwargs)]
return [Path(_) for _ in filter_paths(ctx, raw_repos, path_type="repo", **kwargs)]


def main(args):
if args.zip_dir:
zip_dir = Path(args.zip_dir)
zip_dir.mkdir(parents=True, exist_ok=True)
resource = f"repos/{args.owner}/{args.repo}/actions/workflows/{args.workflow}"
for root in args.root:
for entry in os.scandir(root):
if not entry.is_dir():
continue
print(f"processing {entry.path}")
if args.no_overwrite and (Path(entry.path) / "ro-crate-metadata.json").is_file():
print(" crate exists, not overwriting")
else:
make_crate(entry, args.target_owner, resource, args.planemo_version)
if args.zip_dir:
# if args.no_overwrite, zip existing crates
path = zip_dir / f"{entry.name}.crate"
archive = shutil.make_archive(path, "zip", entry.path)
print(f" archived as {archive}")
for repo in find_repos(args.root, exclude=args.exclude):
print(f"processing {repo}")
if args.no_overwrite and (repo / "ro-crate-metadata.json").is_file():
print(" crate exists, not overwriting")
else:
make_crate(repo, args.target_owner, resource, args.planemo_version)
if args.zip_dir:
# if args.no_overwrite, zip existing crates
path = zip_dir / f"{repo.name}.crate"
archive = shutil.make_archive(path, "zip", repo)
print(f" archived as {archive}")


if __name__ == "__main__":
Expand All @@ -162,6 +170,8 @@ def main(args):
)
parser.add_argument("root", metavar="ROOT_DIR", help="top-level directory",
nargs="*", default=[os.getcwd()])
parser.add_argument("--exclude", metavar="PATH", nargs="*", default=(),
help="paths to exclude while searching for workflow repos")
parser.add_argument("--owner", metavar="STRING", default=OWNER,
help="owner of the github workflow that runs the tests")
parser.add_argument("--repo", metavar="STRING", default=REPO,
Expand Down
1 change: 1 addition & 0 deletions workflows/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
rocrate~=0.4.0
planemo>=0.74.5

0 comments on commit a01578c

Please sign in to comment.