diff --git a/CHEWBBACA/tests/data/allelecall_data/test_invalid_input_names/pdb_prefix/5454.fasta b/CHEWBBACA/tests/data/allelecall_data/test_invalid_input_names/pdb_prefix/5454.fasta new file mode 100644 index 00000000..1fc8a190 --- /dev/null +++ b/CHEWBBACA/tests/data/allelecall_data/test_invalid_input_names/pdb_prefix/5454.fasta @@ -0,0 +1,2 @@ +>GCA_000007265-protein1 +ATGGTACAATATAACAATAATTATCCACAAGACAATAAGGAAGAAGCTATGACGGAAAACGAACAACTATTTTGGAATAGAGTACTAGAGCTATCTCGTTCTCAAATAGCACCAGCAGCTTATGAATTTTTTGTTCTAGAGGCTAGACTCCTCAAAATTGAACATCAAACTGCAGTTATTACTTTAGATAACATTGAAATGAAAAAGCTATTCTGGGAACAAAATTTGGGGCCTGTTATCCTAACAGCTGGTTTTGAAATTTTCAATGCTGAAATTACAGCTAACTATGTCTCAAACGATTTACATTTACAAGAAACTAGTTTTTCTAACTACCAGCAATCTAGCAATGAAGTAAATACTTTACCAATTAGAAAAATCGACTCTAACCTTAAAGAGAAATATACTTTTGCTAATTTTGTTCAAGGAGATGAAAATAGATGGGCTGTTTCAGCATCAATTGCTGTAGCTGATAGTCCTGGCACGACTTATAATCCTCTATTTATCTGGGGGGGACCTGGTCTAGGAAAGACGCATCTACTAAATGCTATTGGAAATCAAGTCTTAAGAGATAATCCAAACGCGAGGGTTTTATACATCACTGCTGAGAATTTTATTAATGAATTTGTCAGTCATATTCGTTTAGATTCGATGGAAGAATTAAAAGAAAAGTTTCGCAACTTAGACTTACTCCTGATTGATGATATTCAGTCGCTTGCTAAGAAAACCTTAGGGGGGACCCAAGAGGAGTTCTTCAATACTTTCAATGCTTTACATACAAACGATAAACAAATCGTATTGACCAGTGACCGAAATCCAAATCAATTAAATGATCTAGAAGAACGTCTAGTCACGCGCTTTAGTTGGGGACTCCCAGTAAATATCACACCACCTGATTTTGAAACACGAGTTGCTATTTTAACCAATAAAATTCAAGAATATCCTTATGATTTTCCTCAAGATACCATTGAATACTTAGCAGGAGAATTTGATTCCAACGTACGTGAATTAGAAGGAGCCTTGAAAAATATTAGTCTAGTTGCTGACTTTAAGCATGCTAAAACTATTACAGTAGATATAGCTGCAGAAGCTATCAGAGCACGTAAAAATGATGGTCCTATTGTTACTGTCATTCCTATAGAAGAAATTCAAATACAAGTTGGTAAATTCTATGGCGTAACTGTAAAAGAGATAAAAGCAACTAAAAGAACACAAGATATTGTCCTTGCAAGACAGGTAGCCATGTACTTAGCTCGTGAGATGACAGATAACAGTCTCCCAAAAATAGGTAAAGAATTTGGGGGACGAGATCACTCAACTGTTCTCCACGCTTATAATAAAATAAAAAATATGGTTGCTCAAGATGACAACTTACGAATTGAGATAGAAACTATCAAAAATAAAATCAGATAG diff --git a/CHEWBBACA/tests/test_allelecall.py b/CHEWBBACA/tests/test_allelecall.py index b38008f8..50da5dbb 100644 --- a/CHEWBBACA/tests/test_allelecall.py +++ b/CHEWBBACA/tests/test_allelecall.py @@ -85,7 +85,8 @@ def test_allelecall_valid_input(monkeypatch, args_fixture): (ta.ALLELECALL_TEST_FAKE_PATH, ct.INVALID_INPUT_PATH), (ta.ALLELECALL_TEST_BLANK_SPACE, ct.INPUTS_INCLUDE_BLANKS[:46]), (ta.ALLELECALL_TEST_LONG_PREFIX, ct.INPUTS_LONG_PREFIX[:65]), - (ta.ALLELECALL_TEST_SAME_PREFIX, ct.INPUTS_SHARE_PREFIX[:56]) + (ta.ALLELECALL_TEST_SAME_PREFIX, ct.INPUTS_SHARE_PREFIX[:56]), + (ta.ALLELECALL_TEST_PDB_CHAIN, ct.INPUTS_PDB_PREFIX[:70]), ], indirect=True ) diff --git a/CHEWBBACA/tests/test_arguments.py b/CHEWBBACA/tests/test_arguments.py index abeb81b0..23e6a5f5 100644 --- a/CHEWBBACA/tests/test_arguments.py +++ b/CHEWBBACA/tests/test_arguments.py @@ -130,6 +130,12 @@ '-g', 'data/allelecall_data/sagalactiae_schema', '-o', 'allelecall_results'] +# AlleleCall input prefix interpreted as PDB chain ID +ALLELECALL_TEST_PDB_CHAIN = ['chewBBACA.py', 'AlleleCall', + '-i', 'data/allelecall_data/test_invalid_input_names/pdb_prefix', + '-g', 'data/allelecall_data/sagalactiae_schema', + '-o', 'allelecall_results'] + # CreateSchema template command CREATESCHEMA_TEST_GENOME_TEMPLATE = ['chewBBACA.py', 'CreateSchema', '-i', 'data/createschema_data/mock_genome_dir', @@ -179,6 +185,30 @@ '-o', 'createschema_results', '--ptf', 'data/createschema_data/Streptococcus_agalactiae.trn'] +# CreateSchema input name includes blank space +CREATESCHEMA_TEST_BLANK_SPACE = ['chewBBACA.py', 'CreateSchema', + '-i', 'data/allelecall_data/test_invalid_input_names/blank_spaces', + '-o', 'createschema_results', + '--ptf', 'data/createschema_data/Streptococcus_agalactiae.trn'] + +# CreateSchema input file has unique prefix longer than 30 chars +CREATESCHEMA_TEST_LONG_PREFIX = ['chewBBACA.py', 'CreateSchema', + '-i', 'data/allelecall_data/test_invalid_input_names/long_prefix', + '-o', 'createschema_results', + '--ptf', 'data/createschema_data/Streptococcus_agalactiae.trn'] + +# CreateSchema some input files have the same prefix +CREATESCHEMA_TEST_SAME_PREFIX = ['chewBBACA.py', 'CreateSchema', + '-i', 'data/allelecall_data/test_invalid_input_names/same_prefix', + '-o', 'createschema_results', + '--ptf', 'data/createschema_data/Streptococcus_agalactiae.trn'] + +# CreateSchema input prefix interpreted as PDB chain ID +CREATESCHEMA_TEST_PDB_CHAIN = ['chewBBACA.py', 'CreateSchema', + '-i', 'data/allelecall_data/test_invalid_input_names/pdb_prefix', + '-o', 'createschema_results', + '--ptf', 'data/createschema_data/Streptococcus_agalactiae.trn'] + # AlleleCallEvaluator # AlleleCallEvaluator invalid path ALLELECALL_EVALUATOR_INVALID_PATH = ['chewBBACA.py', 'AlleleCallEvaluator', @@ -229,6 +259,26 @@ '-o', 'preped_schema', '--gl', 'data/prep_data/test_genes_list/test_genes_extension.txt'] +# PrepExternalSchema input name includes blank space +PREPEXTERNALSCHEMA_TEST_BLANK_SPACE = ['chewBBACA.py', 'PrepExternalSchema', + '-g', 'data/allelecall_data/test_invalid_input_names/blank_spaces', + '-o', 'preped_schema'] + +# PrepExternalSchema input file has unique prefix longer than 30 chars +PREPEXTERNALSCHEMA_TEST_LONG_PREFIX = ['chewBBACA.py', 'PrepExternalSchema', + '-g', 'data/allelecall_data/test_invalid_input_names/long_prefix', + '-o', 'preped_schema'] + +# PrepExternalSchema some input files have the same prefix +PREPEXTERNALSCHEMA_TEST_SAME_PREFIX = ['chewBBACA.py', 'PrepExternalSchema', + '-g', 'data/allelecall_data/test_invalid_input_names/same_prefix', + '-o', 'preped_schema'] + +# PrepExternalSchema input prefix interpreted as PDB chain ID +PREPEXTERNALSCHEMA_TEST_PDB_CHAIN = ['chewBBACA.py', 'PrepExternalSchema', + '-g', 'data/allelecall_data/test_invalid_input_names/pdb_prefix', + '-o', 'preped_schema'] + # SchemaEvaluator # SchemaEvaluator empty input files SCHEMAEVALUATOR_TEST_EMPTY_FILES = ['chewBBACA.py', 'SchemaEvaluator', diff --git a/CHEWBBACA/tests/test_createschema.py b/CHEWBBACA/tests/test_createschema.py index 5424dc63..d192c193 100644 --- a/CHEWBBACA/tests/test_createschema.py +++ b/CHEWBBACA/tests/test_createschema.py @@ -94,7 +94,11 @@ def test_createschema_valid_input(monkeypatch, args_fixture): (ta.CREATESCHEMA_TEST_ZERO_BYTES, ct.MISSING_FASTAS_EXCEPTION), (ta.CREATESCHEMA_INVALID_PTF_PATH, ct.INVALID_PTF_PATH), (ta.CREATESCHEMA_TEST_HEADER_ONLY, ct.CANNOT_PREDICT), - (ta.CREATESCHEMA_TEST_INVALID_GENOME, ct.CANNOT_PREDICT) + (ta.CREATESCHEMA_TEST_INVALID_GENOME, ct.CANNOT_PREDICT), + (ta.CREATESCHEMA_TEST_BLANK_SPACE, ct.INPUTS_INCLUDE_BLANKS[:46]), + (ta.CREATESCHEMA_TEST_LONG_PREFIX, ct.INPUTS_LONG_PREFIX[:65]), + (ta.CREATESCHEMA_TEST_SAME_PREFIX, ct.INPUTS_SHARE_PREFIX[:56]), + (ta.CREATESCHEMA_TEST_PDB_CHAIN, ct.INPUTS_PDB_PREFIX[:70]) ], indirect=True ) diff --git a/CHEWBBACA/tests/test_prepexternalschema.py b/CHEWBBACA/tests/test_prepexternalschema.py index 5f5a64e4..ffcb4db7 100644 --- a/CHEWBBACA/tests/test_prepexternalschema.py +++ b/CHEWBBACA/tests/test_prepexternalschema.py @@ -89,7 +89,11 @@ def test_prep_valid_input(monkeypatch, args_fixture): (ta.PREPEXTERNALSCHEMA_TEST_EMPTY_DIR, ct.MISSING_FASTAS_EXCEPTION), (ta.PREPEXTERNALSCHEMA_TEST_EMPTY_FILES, ct.MISSING_FASTAS_EXCEPTION), (ta.PREPEXTERNALSCHEMA_TEST_ZERO_BYTES, ct.MISSING_FASTAS_EXCEPTION), - (ta.PREPEXTERNALSCHEMA_TEST_INVALID_PATH, ct.INVALID_INPUT_PATH) + (ta.PREPEXTERNALSCHEMA_TEST_INVALID_PATH, ct.INVALID_INPUT_PATH), + (ta.PREPEXTERNALSCHEMA_TEST_BLANK_SPACE, ct.INPUTS_INCLUDE_BLANKS[:46]), + (ta.PREPEXTERNALSCHEMA_TEST_LONG_PREFIX, ct.INPUTS_LONG_PREFIX[:65]), + (ta.PREPEXTERNALSCHEMA_TEST_SAME_PREFIX, ct.INPUTS_SHARE_PREFIX[:56]), + (ta.PREPEXTERNALSCHEMA_TEST_PDB_CHAIN, ct.INPUTS_PDB_PREFIX[:70]) ], indirect=True ) diff --git a/CHEWBBACA/utils/constants.py b/CHEWBBACA/utils/constants.py index 9f2a2f7c..e6d31a4e 100755 --- a/CHEWBBACA/utils/constants.py +++ b/CHEWBBACA/utils/constants.py @@ -637,8 +637,8 @@ '(e.g. BLAST does not accept sequence IDs longer than 50 ' 'characters when creating a database).') -INPUTS_PDB_PREFIX = ('The following input files have prefixes that are ' - 'interpreted by BLAST as chain PDB IDs:\n{0}\nBLAST modifies the ' +INPUTS_PDB_PREFIX = ('The following input files have prefixes that are interpreted by BLAST ' + 'as chain PDB IDs:\n{0}\nBLAST modifies the ' 'IDs of the CDSs that include these prefixes when creating a database, ' 'which leads to issues when chewBBACA cannot find the original ' 'IDs in the results. Please ensure that the file prefixes (substring '