From 47b5671b0b11984474ba68c4ed14f6336297f514 Mon Sep 17 00:00:00 2001 From: pchaumeil Date: Wed, 17 Apr 2024 09:42:49 +1000 Subject: [PATCH] docs(update doco for R220): update md5sum, changelog, and announcement. --- docs/src/announcements.rst | 11 +++++++++++ docs/src/changelog.rst | 12 ++++++++---- docs/src/installing/index.rst | 12 ++++++------ gtdbtk/config/common.py | 35 ++++++++++++----------------------- gtdbtk/external/pplacer.py | 3 +++ 5 files changed, 40 insertions(+), 33 deletions(-) diff --git a/docs/src/announcements.rst b/docs/src/announcements.rst index 2a6bfdf8..f30a3290 100644 --- a/docs/src/announcements.rst +++ b/docs/src/announcements.rst @@ -1,6 +1,17 @@ Announcements ============= +GTDB-Tk 2.4.0 available +----------------------- + +*TBA* + +* GTDB-Tk version ``2.4.0`` is now available. +* This version of GTDB-Tk requires a new version of the GTDB-Tk reference package (Release 220). + `gtdbtk_r220_data.tar.gz `_. + + + GTDB-Tk 2.3.0 available ----------------------- diff --git a/docs/src/changelog.rst b/docs/src/changelog.rst index baccbf67..ad20e241 100644 --- a/docs/src/changelog.rst +++ b/docs/src/changelog.rst @@ -8,11 +8,18 @@ Change log Bug Fixes: +* (`#576 `_) When all genomes fail the prodigal step in the classify_wf, The +bac120 summary file is still produced with the all failed genomes listed as 'Unclassified' +* (`#573 `_) When running the 3 classify steps independently, a genome can be filtered out in the align +step but still be classified in the identify step. To avoid duplication of row, the genome is classified with a warning. +* (`#540 `_) Empty files are skipped during the sketch step of Mash, +they are then catch in the prodigal step and are returned as 'Unclassified' +* (`#549 `_) : `--force` has been modified to deal with #540. Prodigal +wasn't returning the empty files as failed genomes, it was only skipping them. These genomes are now returned in the summary file and flagged as Unclassified. Major Changes: * FastANI has been replaced by skani as the primary tool for computing Average Nucleotide Identity (ANI).Users may notice slight variations in the results compared to those obtained using FastANI. - * In the generated `summary.tsv` files, several columns have been renamed for clarity and consistency. The following columns have been affected: - "`fastani_reference`" column has been renamed to "`closest_genome_reference`". @@ -23,9 +30,6 @@ Major Changes: These changes have been implemented to improve the readability and understanding of the data within the `summary.tsv` files. Users should update their scripts or processes accordingly to reflect these renamed column headers. -Minor Changes: - - 2.3.2 diff --git a/docs/src/installing/index.rst b/docs/src/installing/index.rst index 8bfa0f7b..758281ee 100644 --- a/docs/src/installing/index.rst +++ b/docs/src/installing/index.rst @@ -33,13 +33,13 @@ Hardware requirements - Storage - Time * - Archaea - - ~45 GB - - ~85 GB - - ~1 hour / 1,000 genomes @ 64 CPUs + - ~60 GB + - ~106 GB + - ~90 minutes / 1,000 genomes @ 64 CPUs * - Bacteria - - ~65GB (410 GB when using --full_tree) - - ~85 GB - - ~1 hour / 1,000 genomes @ 64 CPUs + - ~90GB (545 GB when using --full_tree) + - ~106 GB + - ~90 minutes / 1,000 genomes @ 64 CPUs .. note:: The amount reported of memory reported can vary depending on the number of pplacer threads. diff --git a/gtdbtk/config/common.py b/gtdbtk/config/common.py index 998cd6ec..2c016c70 100644 --- a/gtdbtk/config/common.py +++ b/gtdbtk/config/common.py @@ -11,8 +11,8 @@ class __GTDBTkCommonConfig: that requires the setting of the GTDB-Tk reference data path. """ - MIN_REF_DATA_VERSION = 'r207' - COMPATIBLE_REF_DATA_VERSIONS = ['r207','r214'] + MIN_REF_DATA_VERSION = 'r220' + COMPATIBLE_REF_DATA_VERSIONS = ['r220'] BACKBONE_PPLACER_REF_PKG = 'gtdbtk_package_backbone.refpkg' CLASS_LEVEL_PPLACER_REF_PKG = 'gtdbtk.package.{iter}.refpkg' @@ -333,31 +333,20 @@ def get_REF_HASHES(self,version=None): if version is not None and version not in compatible_versions: raise ValueError(f"Version {version} is not compatible with this version of GTDB-Tk. Compatible versions are {compatible_versions}") - if version is None or version==214: + if version is None or version==220: return { - self.PPLACER_DIR: '6786e9fc16b31db7d6eaaa9f8cfa87a8a4974434', - self.MASK_DIR: '8d5a2139feabbb70789c62155f3761d2aeed1601', + self.PPLACER_DIR: '75fdd0e093c9af6a73cb510c3d0cd2041265e093', + self.MASK_DIR: 'f4b8ebfa59526a7a86f09752b47e8de1efc384c7', self.MARKER_DIR: '163f542c3f0a40f59df45d453aa235b39aa96e27', - self.RADII_DIR: '4753acc920001a1400788ee89cb4632900449055', - self.MSA_FOLDER: '75df495678a121497e14346b453caf42f4b03922', - self.METADATA_DIR: 'a089cc36bf79a40c7506019accc5f93e940d9fed', - self.TAX_FOLDER: '89b12cf8106f326887599dcb30ef94ebba142035', - self.SKANI_DIR: 'e12824beccc15fe67a373e2aa8eee72feecf89c6', - self.RED_DIR: 'c24a2f48bb0c1df38f92a8f526aa846f596c94c6' - } - elif version==207: - return { - self.PPLACER_DIR: '20903925a856a58b102a7b0ce160c5cbd2cf675b', - self.MASK_DIR: '50e414a9de18170e8cb97f990f89ff60a0fe29d5', - self.MARKER_DIR: '163f542c3f0a40f59df45d453aa235b39aa96e27', - self.RADII_DIR: '8fd13b1c5d7a7b073ba96fb628581613b293a374', - self.MSA_FOLDER: '24f250d7cf0eb0bc65dccd2f3c9247e553ea322f', - self.METADATA_DIR: '9772fbeac1311b31e10293fa610eb33aa1ec8e15', - self.TAX_FOLDER: '6fb0233b05633242369b40c026fd1ee53e266afa', - self.SKANI_DIR: '973c456c02f55bb82908a6811c7076e207e9b206', - self.RED_DIR: '7b8b67b3157204b470c9eb809d3c39c4effffabc' + self.RADII_DIR: '63d06ecc8b4547addd22c5b06ada4a28c5332bcc', + self.MSA_FOLDER: '3d5c1cf5346b244fcb0a9d48d2f1a9358a71cc7a', + self.METADATA_DIR: '01b8c23253cef097b1bc233d609dae9eb84c98e2', + self.TAX_FOLDER: '6758173fa61ae4a77f5588ec2874ea52ed345feb', + self.SKANI_DIR: 'ff58a1d7e0584da324d140701ee12cead4f0df9d', + self.RED_DIR: '206bd781997fffbac951b4437dd75e6543139fd6' } + REF_HASHES = property(get_REF_HASHES) diff --git a/gtdbtk/external/pplacer.py b/gtdbtk/external/pplacer.py index c91c5aeb..cb7ef2b4 100644 --- a/gtdbtk/external/pplacer.py +++ b/gtdbtk/external/pplacer.py @@ -21,6 +21,7 @@ import queue import re import subprocess +import sys from tqdm import tqdm @@ -76,6 +77,8 @@ def run(self, cpus, model, ref_pkg, json_out, msa_file, pplacer_out, out_q = mp.Queue() pid = mp.Value('i', 0) + # print(f'Running pplacer with the following command: {" ".join(args)}') + # sys.exit(0) p_worker = mp.Process(target=self._worker, args=( args, out_q, pplacer_out, pid)) p_writer = mp.Process(target=self._writer, args=(out_q, pid))