Skip to content

Commit

Permalink
Merge pull request #346 from Ecogenomics/staging
Browse files Browse the repository at this point in the history
1.7.0
  • Loading branch information
aaronmussig authored Oct 15, 2021
2 parents 80a4801 + 3734926 commit eaccdd6
Show file tree
Hide file tree
Showing 27 changed files with 411 additions and 284 deletions.
File renamed without changes.
31 changes: 0 additions & 31 deletions .github/workflows/python-publish.yml

This file was deleted.

49 changes: 49 additions & 0 deletions .github/workflows/release-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: Upload Python Package

on:
release:
types: [published]

jobs:
pypi:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.6'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python setup.py sdist bdist_wheel
twine upload dist/*
docker:
needs: pypi
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push
uses: docker/build-push-action@v2
with:
push: true
build-args: |
VER=${{ github.event.release.tag_name }}
tags: ecogenomic/gtdbtk:latest,ecogenomic/gtdbtk:${{ github.event.release.tag_name }}
37 changes: 14 additions & 23 deletions docker/Dockerfile → Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,28 @@
# docker build --build-arg VER=1.2.3 --no-cache -t ecogenomic/gtdbtk:latest -t ecogenomic/gtdbtk:1.2.3 .
# docker push ecogenomic/gtdbtk:latest && sudo docker push ecogenomic/gtdbtk:1.2.3

FROM ubuntu:18.04
FROM python:3.8-slim-bullseye

ARG VER

# ---------------------------------------------------------------------------- #
# --------------------- INSTALL HMMER, PYTHON3, FASTTREE---------------------- #
# ---------------------------------------------------------------------------- #
RUN apt-get update -y -m && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
wget \
libgomp1 \
hmmer=3.1b2+dfsg-5ubuntu1 \
mash=2.0-2 \
prodigal=1:2.6.3-1 \
fasttree=2.1.10-1 \
unzip \
python3.8 \
python3-pip \
python3-setuptools \
python3-wheel && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --no-install-suggests -y \
wget \
libgomp1 \
libgsl25 \
libgslcblas0 \
hmmer=3.* \
mash=2.2.* \
prodigal=1:2.6.* \
fasttree=2.1.* \
unzip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
ln -s /usr/bin/fasttree /usr/bin/FastTree && \
ln -s /usr/bin/fasttreeMP /usr/bin/FastTreeMP

# ---------------------------------------------------------------------------- #
# ------------------------------ ALIAS PYTHON3 ------------------------------ #
# ---------------------------------------------------------------------------- #
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1 && \
update-alternatives --set python /usr/bin/python3.8

# ---------------------------------------------------------------------------- #
# ----------------------------- INSTALL PPLACER ------------------------------ #
# ---------------------------------------------------------------------------- #
Expand All @@ -45,9 +36,9 @@ RUN wget https://github.com/matsen/pplacer/releases/download/v1.1.alpha19/pplace
# ---------------------------------------------------------------------------- #
# ----------------------------- INSTALL FASTANI ------------------------------ #
# ---------------------------------------------------------------------------- #
RUN wget https://github.com/ParBLiSS/FastANI/releases/download/v1.32/fastANI-Linux64-v1.32.zip -q && \
unzip fastANI-Linux64-v1.32.zip -d /usr/bin && \
rm fastANI-Linux64-v1.32.zip
RUN wget https://github.com/ParBLiSS/FastANI/releases/download/v1.33/fastANI-Linux64-v1.33.zip -q && \
unzip fastANI-Linux64-v1.33.zip -d /usr/bin && \
rm fastANI-Linux64-v1.33.zip

# ---------------------------------------------------------------------------- #
# --------------------- SET GTDB-TK MOUNTED DIRECTORIES ---------------------- #
Expand Down
2 changes: 0 additions & 2 deletions docker/hooks/build

This file was deleted.

9 changes: 9 additions & 0 deletions docs/src/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
Change log
==========

1.7.0
-----

* (`#336 <https://github.com/Ecogenomics/GTDBTk/issues/336>`_) Warn the user if they have provided an incorrectly formatted taxonomy file.
* (`#348 <https://github.com/Ecogenomics/GTDBTk/issues/348>`_) Gracefully exit the program if no single copy hits could be identified.
* (`#351 <https://github.com/Ecogenomics/GTDBTk/issues/351>`_) Fixed an issue where GTDB-Tk would crash if spaces were present in the reference data path.
* (`#354 <https://github.com/Ecogenomics/GTDBTk/pull/354>`_) Added optional ``--tmpdir`` argument to set temporary directory (thanks `tr11-sanger <https://github.com/tr11-sanger>`_!).


1.6.0
-----

Expand Down
2 changes: 1 addition & 1 deletion docs/src/commands/de_novo_wf.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ de_novo_wf

For arguments and output files, see each of the individual steps:

* :ref:`commands/infer`
* :ref:`commands/identify`
* :ref:`commands/align`
* :ref:`commands/infer`
* :ref:`commands/root`
Expand Down
8 changes: 4 additions & 4 deletions docs/src/installing/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Hardware requirements
- ~27 GB
- ~1 hour / 1,000 genomes @ 64 CPUs
* - Bacteria
- ~204 GB
- ~215 GB
- ~27 GB
- ~1 hour / 1,000 genomes @ 64 CPUs

Expand Down Expand Up @@ -66,7 +66,7 @@ GTDB-Tk is designed for Python >=3.6 and requires the following libraries, which
- >= 1.9.0
- Harris, C.R., Millman, K.J., van der Walt, S.J. et al. Array programming with NumPy. Nature 585, 357–362 (2020). DOI: `0.1038/s41586-020-2649-2 <https://doi.org/10.1038/s41586-020-2649-2>`_
* - `tqdm <https://github.com/tqdm/tqdm>`_
- >= 4.31.0
- >= 4.35.0
- DOI: `10.5281/zenodo.595120 <https://doi.org/10.5281/zenodo.595120>`_


Expand Down Expand Up @@ -124,7 +124,7 @@ GTDB-Tk requires ~27G of external data that needs to be downloaded and unarchive
wget https://data.gtdb.ecogenomic.org/releases/latest/auxillary_files/gtdbtk_data.tar.gz
wget https://data.ace.uq.edu.au/public/gtdb/data/releases/latest/auxillary_files/gtdbtk_data.tar.gz (or, mirror)
tar xvzf gtdbtk_r95_data.tar.gz
tar xvzf gtdbtk_data.tar.gz
Note that different versions of the GTDB release data may not run on all versions of GTDB-Tk, below are all supported versions:
Expand All @@ -139,7 +139,7 @@ Note that different versions of the GTDB release data may not run on all version
- Maximum version
* - R202
- 1.5.0
- N/A
- Current
* - R95
- 1.3.0
- 1.4.2
Expand Down
2 changes: 1 addition & 1 deletion gtdbtk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@
__status__ = 'Production'
__title__ = 'GTDB-Tk'
__url__ = 'https://github.com/Ecogenomics/GTDBTk'
__version__ = '1.6.0'
__version__ = '1.7.0'
2 changes: 1 addition & 1 deletion gtdbtk/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def main():
# Warn the user they are not using the latest version (if possible)
latest_ver = get_gtdbtk_latest_version()
if latest_ver and latest_ver != __version__:
print(f'Note: There is a newer version of GTDB-Tk available: v{latest_ver}')
print(f'There is a newer version of GTDB-Tk available: v{latest_ver}')
sys.exit(0)
elif sys.argv[1] in {'-h', '--h', '-help', '--help'}:
print_help()
Expand Down
2 changes: 1 addition & 1 deletion gtdbtk/biolib_lite/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from .exceptions import BioLibFileNotFound, BioLibDirNotFound, BioLibIOException


def canonical_gid(gid):
def canonical_gid(gid: str) -> str:
"""Get canonical form of NCBI genome accession.
Example:
Expand Down
15 changes: 8 additions & 7 deletions gtdbtk/biolib_lite/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@
import logging
import re
from collections import defaultdict
from typing import Dict, List

import dendropy

from gtdbtk.biolib_lite.common import canonical_gid, is_float
from gtdbtk.exceptions import GTDBTkExit

"""
To do:
Expand Down Expand Up @@ -791,33 +793,32 @@ def read_from_tree(self, tree, warnings=True):

return taxonomy

def read(self, taxonomy_file, canonical_ids=False):
def read(self, taxonomy_file: str, canonical_ids: bool = False) -> Dict[str, List[str]]:
"""Read Greengenes-style taxonomy file.
Expected format is:
<id>\t<taxonomy string>
where the taxonomy string has the formats:
d__; c__; o__; f__; g__; s__
d__; p__; c__; o__; f__; g__; s__
Parameters
----------
taxonomy_file : str
Path to a Greengenes-style taxonomy file.
canonical_ids : bool
True if to use the canonical ID format, False otherwise.
Returns
-------
dict[str, tuple[str, str, str, str, str, str, str]]
d[unique_id] -> [d__<taxon>, ..., s__<taxon>]
"""

try:
d = {}
with open(taxonomy_file, 'r') as f:
for row, line in enumerate(f.readlines()):
line_split = line.split('\t')

if len(line_split) != 2:
raise GTDBTkExit(f'Not a tab-separated line: {line}')

unique_id = line_split[0]
if canonical_ids:
unique_id = canonical_gid(unique_id)
Expand Down
20 changes: 19 additions & 1 deletion gtdbtk/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import argparse
import tempfile
from contextlib import contextmanager

from gtdbtk.biolib_lite.custom_help_formatter import ChangeTempAction
from gtdbtk.biolib_lite.custom_help_formatter import CustomHelpFormatter
from gtdbtk.config.config import AF_THRESHOLD

Expand All @@ -24,6 +26,11 @@ def arg_group(parser, name):
yield parser.add_argument_group(name)


def __temp_dir(group):
group.add_argument('--tmpdir', action=ChangeTempAction, default=tempfile.gettempdir(),
help="specify alternative directory for temporary files")


def __genome_dir(group):
group.add_argument('--genome_dir', help="directory containing genome files in FASTA format")

Expand Down Expand Up @@ -136,7 +143,8 @@ def __gtdbtk_classification_file(group):
def __custom_taxonomy_file(group):
group.add_argument('--custom_taxonomy_file', type=str, default=None,
help="file indicating custom taxonomy strings for user "
"genomes, that should contain any genomes belonging to the outgroup")
"genomes, that should contain any genomes belonging to the outgroup. "
"Format: GENOME_ID<TAB>d__;p__;c__;o__;f__;g__;s__")


def __prefix(group):
Expand Down Expand Up @@ -320,6 +328,7 @@ def get_main_parser():
__prefix(grp)
__cpus(grp)
__force(grp)
__temp_dir(grp)
__debug(grp)
__help(grp)

Expand All @@ -341,6 +350,7 @@ def get_main_parser():
# __recalculate_red(grp)
# __split_tree(grp)
__min_af(grp)
__temp_dir(grp)
__debug(grp)
__help(grp)

Expand All @@ -357,6 +367,7 @@ def get_main_parser():
__cpus(grp)
__force(grp)
__write_single_copy_genes(grp)
__temp_dir(grp)
__debug(grp)
__help(grp)

Expand All @@ -376,6 +387,7 @@ def get_main_parser():
__rnd_seed(grp)
__prefix(grp)
__cpus(grp)
__temp_dir(grp)
__debug(grp)
__help(grp)
with mutex_group(parser, required=False) as grp:
Expand All @@ -393,6 +405,7 @@ def get_main_parser():
__gamma(grp)
__prefix(grp)
__cpus(grp)
__temp_dir(grp)
__debug(grp)
__help(grp)

Expand All @@ -413,6 +426,7 @@ def get_main_parser():
# __split_tree(grp)
# __recalculate_red(grp)
__min_af(grp)
__temp_dir(grp)
__debug(grp)
__help(grp)

Expand All @@ -425,6 +439,7 @@ def get_main_parser():
with arg_group(parser, 'optional arguments') as grp:
__gtdbtk_classification_file(grp)
__custom_taxonomy_file(grp)
__temp_dir(grp)
__debug(grp)
__help(grp)

Expand All @@ -436,6 +451,7 @@ def get_main_parser():
with arg_group(parser, 'optional arguments') as grp:
__gtdbtk_classification_file(grp)
__custom_taxonomy_file(grp)
__temp_dir(grp)
__debug(grp)
__help(grp)

Expand All @@ -446,6 +462,7 @@ def get_main_parser():
__ingroup_taxon(grp, required=True)
__output_tree(grp, required=True)
with arg_group(parser, 'optional arguments') as grp:
__temp_dir(grp)
__debug(grp)
__help(grp)

Expand All @@ -469,6 +486,7 @@ def get_main_parser():
__extension(grp)
__prefix(grp)
__cpus(grp)
__temp_dir(grp)
__debug(grp)
__help(grp)

Expand Down
Loading

0 comments on commit eaccdd6

Please sign in to comment.