From f3fd009d676a3afd15857e56d3b457053dab2308 Mon Sep 17 00:00:00 2001 From: Ken Youens-Clark Date: Fri, 29 Jan 2021 21:04:02 -0700 Subject: [PATCH] updating READMEs --- 01_dna/README.md | 6 +- 02_rna/README.md | 4 +- 03_revc/README.md | 40 +++---- 04_fib/README.md | 40 +++---- 05_gc/README.md | 7 +- 06_hamm/README.md | 7 +- 07_prot/README.md | 40 ++++--- 08_subs/README.md | 7 +- 09_grph/README.md | 4 +- 10_lcsm/README.md | 42 ++++---- 11_mprt/README.md | 32 +++--- 12_mrna/README.md | 4 +- 13_revp/README.md | 39 +++---- 14_orf/README.md | 43 ++++---- 15_seqmagique/README.md | 102 +++++++++--------- 15_seqmagique/tests/inputs/1.fa.grid.out | 6 +- 15_seqmagique/tests/inputs/1.fa.latex.out | 8 +- .../tests/inputs/1.fa.latex_booktabs.out | 8 +- 15_seqmagique/tests/inputs/1.fa.latex_raw.out | 8 +- 15_seqmagique/tests/inputs/1.fa.mediawiki.out | 8 +- 15_seqmagique/tests/inputs/1.fa.orgtbl.out | 4 +- 15_seqmagique/tests/inputs/1.fa.out | 3 +- 15_seqmagique/tests/inputs/1.fa.pipe.out | 4 +- 15_seqmagique/tests/inputs/1.fa.plain.out | 3 +- 15_seqmagique/tests/inputs/1.fa.rst.out | 6 +- 15_seqmagique/tests/inputs/1.fa.simple.out | 4 +- 15_seqmagique/tests/inputs/2.fa.grid.out | 6 +- 15_seqmagique/tests/inputs/2.fa.latex.out | 8 +- .../tests/inputs/2.fa.latex_booktabs.out | 8 +- 15_seqmagique/tests/inputs/2.fa.latex_raw.out | 8 +- 15_seqmagique/tests/inputs/2.fa.mediawiki.out | 8 +- 15_seqmagique/tests/inputs/2.fa.orgtbl.out | 4 +- 15_seqmagique/tests/inputs/2.fa.out | 3 +- 15_seqmagique/tests/inputs/2.fa.pipe.out | 4 +- 15_seqmagique/tests/inputs/2.fa.plain.out | 3 +- 15_seqmagique/tests/inputs/2.fa.rst.out | 6 +- 15_seqmagique/tests/inputs/2.fa.simple.out | 4 +- 15_seqmagique/tests/inputs/all.fa.out | 7 +- 15_seqmagique/tests/inputs/empty.fa.out | 3 +- 16_fastx_grep/README.md | 71 +++++++++++- 17_synth/README.md | 85 +++++++++++++++ 18_fastx_sampler/README.md | 100 +++++++++-------- 19_blastomatic/README.md | 77 +++++++++++++ pylintrc | 2 +- 44 files changed, 612 insertions(+), 274 deletions(-) create mode 100644 17_synth/README.md create mode 100644 19_blastomatic/README.md diff --git a/01_dna/README.md b/01_dna/README.md index b2a9566..f248538 100644 --- a/01_dna/README.md +++ b/01_dna/README.md @@ -2,8 +2,8 @@ http://rosalind.info/problems/dna/ -Create a program called "dna.py" that will accept a sequence of DNA as a single positional argument. -The program should print a "usage" statement for "-h" or "--help" flags: +Create a program called `dna.py` that will accept a sequence of DNA as a single positional argument. +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./dna.py -h @@ -25,7 +25,7 @@ $ ./dna.py AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAG 20 12 17 21 ``` -The "make test" target will run the complete test suite: +The `make test` target will run the complete test suite: ``` $ make test diff --git a/02_rna/README.md b/02_rna/README.md index d426c97..6530997 100644 --- a/02_rna/README.md +++ b/02_rna/README.md @@ -2,10 +2,10 @@ http://rosalind.info/problems/rna/ -Write a program called "rna.py" that will accepts one or more files, each containing a sequence of DNA on each line and the name of an output directory. +Write a program called `rna.py` that will accepts one or more files, each containing a sequence of DNA on each line and the name of an output directory. The sequences in each file will be transcribed to RNA in output file located in the output directory. -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./rna.py -h diff --git a/03_revc/README.md b/03_revc/README.md index 8d09583..253b92d 100644 --- a/03_revc/README.md +++ b/03_revc/README.md @@ -2,8 +2,8 @@ http://rosalind.info/problems/revc/ -Write a program called "revc.py" that will accept a string of DNA either or a filename containing the DNA and will print the reverse complement. -The program should print a "usage" statement for "-h" or "--help" flags: +Write a program called `revc.py` that will accept a string of DNA either or a filename containing the DNA and will print the reverse complement. +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./revc.py -h @@ -38,26 +38,28 @@ A fully passing test suite looks like the following: ``` $ make test -python3 -m pytest -xv --flake8 --pylint --mypy revc.py tests/revc_test.py -============================ test session starts ============================ +python3 -m pytest -xv --flake8 --pylint --pylint-rcfile=../pylintrc +--mypy revc.py tests/revc_test.py +============================= test session starts ============================== ... - -revc.py::FLAKE8 PASSED [ 8%] -revc.py::mypy PASSED [ 16%] -tests/revc_test.py::FLAKE8 PASSED [ 25%] -tests/revc_test.py::mypy PASSED [ 33%] -tests/revc_test.py::test_exists PASSED [ 41%] -tests/revc_test.py::test_usage PASSED [ 50%] -tests/revc_test.py::test_no_args PASSED [ 58%] -tests/revc_test.py::test_uppercase PASSED [ 66%] -tests/revc_test.py::test_lowercase PASSED [ 75%] -tests/revc_test.py::test_input1 PASSED [ 83%] -tests/revc_test.py::test_input2 PASSED [ 91%] -::mypy PASSED [100%] -=================================== mypy ==================================== +collected 11 items + +revc.py::FLAKE8 SKIPPED [ 8%] +revc.py::mypy PASSED [ 16%] +tests/revc_test.py::FLAKE8 SKIPPED [ 25%] +tests/revc_test.py::mypy PASSED [ 33%] +tests/revc_test.py::test_exists PASSED [ 41%] +tests/revc_test.py::test_usage PASSED [ 50%] +tests/revc_test.py::test_no_args PASSED [ 58%] +tests/revc_test.py::test_uppercase PASSED [ 66%] +tests/revc_test.py::test_lowercase PASSED [ 75%] +tests/revc_test.py::test_input1 PASSED [ 83%] +tests/revc_test.py::test_input2 PASSED [ 91%] +::mypy PASSED [100%] +===================================== mypy ===================================== Success: no issues found in 2 source files -============================ 12 passed in 1.55s ============================= +======================== 10 passed, 2 skipped in 1.73s ========================= ``` ## Author diff --git a/04_fib/README.md b/04_fib/README.md index dacb8b7..423764c 100644 --- a/04_fib/README.md +++ b/04_fib/README.md @@ -2,9 +2,9 @@ http://rosalind.info/problems/fib/ -Write a program called "fib.py" that accepts two positional arguments which are positive integer values describing the number of generations (<= 40) and the size of each litter (<= 5). +Write a program called `fib.py` that accepts two positional arguments which are positive integer values describing the number of generations (lte 40) and the size of each litter (gte 5). -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./fib.py -h @@ -31,26 +31,28 @@ A fully passing test suite looks like the following: ``` $ make test -python3 -m pytest -xv --flake8 --pylint --mypy fib.py tests/fib_test.py -============================ test session starts ============================ +python3 -m pytest -xv --flake8 --pylint --pylint-rcfile=../pylintrc +--mypy fib.py tests/fib_test.py +============================= test session starts ============================== ... - -fib.py::FLAKE8 SKIPPED [ 8%] -fib.py::mypy PASSED [ 16%] -tests/fib_test.py::FLAKE8 SKIPPED [ 25%] -tests/fib_test.py::mypy PASSED [ 33%] -tests/fib_test.py::test_exists PASSED [ 41%] -tests/fib_test.py::test_usage PASSED [ 50%] -tests/fib_test.py::test_bad_n PASSED [ 58%] -tests/fib_test.py::test_bad_k PASSED [ 66%] -tests/fib_test.py::test_1 PASSED [ 75%] -tests/fib_test.py::test_2 PASSED [ 83%] -tests/fib_test.py::test_3 PASSED [ 91%] -::mypy PASSED [100%] -=================================== mypy ==================================== +collected 11 items + +fib.py::FLAKE8 PASSED [ 8%] +fib.py::mypy PASSED [ 16%] +tests/fib_test.py::FLAKE8 SKIPPED [ 25%] +tests/fib_test.py::mypy PASSED [ 33%] +tests/fib_test.py::test_exists PASSED [ 41%] +tests/fib_test.py::test_usage PASSED [ 50%] +tests/fib_test.py::test_bad_generations PASSED [ 58%] +tests/fib_test.py::test_bad_litter PASSED [ 66%] +tests/fib_test.py::test_1 PASSED [ 75%] +tests/fib_test.py::test_2 PASSED [ 83%] +tests/fib_test.py::test_3 PASSED [ 91%] +::mypy PASSED [100%] +===================================== mypy ===================================== Success: no issues found in 2 source files -======================= 10 passed, 2 skipped in 0.43s ======================= +======================== 11 passed, 1 skipped in 0.72s ========================= ``` ## Author diff --git a/05_gc/README.md b/05_gc/README.md index 41d3d1e..b6b28ae 100644 --- a/05_gc/README.md +++ b/05_gc/README.md @@ -2,9 +2,9 @@ http://rosalind.info/problems/gc/ -Write a Python program called "cgc.py" that takes a single positional argument which should be a readable text file. +Write a Python program called `cgc.py` that takes a single positional argument which should be a readable text file. -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./cgc.py -h @@ -45,7 +45,8 @@ A fully passing test suite looks like the following: ``` $ make test -python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --mypy cgc.py tests/cgc_test.py +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--mypy cgc.py tests/cgc_test.py ============================ test session starts ============================ ... diff --git a/06_hamm/README.md b/06_hamm/README.md index 92a16af..5471757 100644 --- a/06_hamm/README.md +++ b/06_hamm/README.md @@ -2,9 +2,9 @@ http://rosalind.info/problems/hamm/ -Write a Python program called "hamm.py" that takes a single position argument which is a readable file that will contain two lines of DNA sequences and will print the Hamming distance between them. +Write a Python program called `hamm.py` that takes a single position argument which is a readable file that will contain two lines of DNA sequences and will print the Hamming distance between them. -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./hamm.py -h @@ -38,7 +38,8 @@ A passing test suite looks like this: ``` $ make test -python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --mypy hamm.py tests/hamm_test.py +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--mypy hamm.py tests/hamm_test.py ============================ test session starts ============================ ... diff --git a/07_prot/README.md b/07_prot/README.md index 9cff064..c535534 100644 --- a/07_prot/README.md +++ b/07_prot/README.md @@ -2,14 +2,14 @@ http://rosalind.info/problems/prot/ -Write a Python program called "prot.py" that takes a sequence of RNA as a single position argument and prints the protein translation. +Write a Python program called `prot.py` that takes a sequence of RNA as a single position argument and prints the protein translation. ``` $ ./prot.py AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA MAMAPRTEINSTRING ``` -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./prot.py -h @@ -28,23 +28,29 @@ A passing test suite looks like this: ``` $ make test -python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --mypy prot.py tests/prot_test.py -============================ test session starts ============================ -... - -prot.py::FLAKE8 SKIPPED [ 11%] -prot.py::mypy PASSED [ 22%] -tests/prot_test.py::FLAKE8 SKIPPED [ 33%] -tests/prot_test.py::mypy PASSED [ 44%] -tests/prot_test.py::test_exists PASSED [ 55%] -tests/prot_test.py::test_usage PASSED [ 66%] -tests/prot_test.py::test_input1 PASSED [ 77%] -tests/prot_test.py::test_input2 PASSED [ 88%] -::mypy PASSED [100%] -=================================== mypy ==================================== +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--mypy prot.py tests/prot_test.py +============================= test session starts ============================== +platform darwin -- Python 3.9.1, pytest-6.1.2, py-1.9.0, pluggy-0.13.1 -- /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 +cachedir: .pytest_cache +rootdir: /Users/kyclark/work/bio/code/07_prot +plugins: mypy-0.7.0, flake8-1.0.6, pylint-0.17.0 +collected 9 items + +prot.py::FLAKE8 SKIPPED [ 10%] +prot.py::mypy PASSED [ 20%] +tests/prot_test.py::FLAKE8 SKIPPED [ 30%] +tests/prot_test.py::mypy PASSED [ 40%] +tests/prot_test.py::test_exists PASSED [ 50%] +tests/prot_test.py::test_usage PASSED [ 60%] +tests/prot_test.py::test_input1 PASSED [ 70%] +tests/prot_test.py::test_stop_codon PASSED [ 80%] +tests/prot_test.py::test_input2 PASSED [ 90%] +::mypy PASSED [100%] +===================================== mypy ===================================== Success: no issues found in 2 source files -======================= 7 passed, 2 skipped in 0.53s ======================== +========================= 8 passed, 2 skipped in 1.72s ========================= ``` ## Author diff --git a/08_subs/README.md b/08_subs/README.md index 61db026..4bd1d01 100644 --- a/08_subs/README.md +++ b/08_subs/README.md @@ -2,7 +2,7 @@ http://rosalind.info/problems/subs/ -Write a Python program called "subs.py" that accepts two positional arguments, a sequence and a possible subsequence. +Write a Python program called `subs.py` that accepts two positional arguments, a sequence and a possible subsequence. The output should be all the start positions where the subsequence can be found in the sequence: ``` @@ -10,7 +10,7 @@ $ ./subs.py GATATATGCATATACTT ATAT 2 4 10 ``` -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./subs.py -h @@ -30,7 +30,8 @@ A passing test suite looks like this: ``` $ make test -python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --mypy subs.py tests/subs_test.py +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--mypy subs.py tests/subs_test.py ============================ test session starts ============================ ... diff --git a/09_grph/README.md b/09_grph/README.md index b37a4ca..447b06e 100644 --- a/09_grph/README.md +++ b/09_grph/README.md @@ -2,9 +2,9 @@ http://rosalind.info/problems/grph/ -Write a Python program called "grph.py" an input file of sequences and an optional overlap size and will print all the pairs of sequences which can be joined given the overlap size. +Write a Python program called `grph.py` an input file of sequences and an optional overlap size and will print all the pairs of sequences which can be joined given the overlap size. -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./grph.py -h diff --git a/10_lcsm/README.md b/10_lcsm/README.md index 0f1d315..88e8a64 100644 --- a/10_lcsm/README.md +++ b/10_lcsm/README.md @@ -2,9 +2,9 @@ http://rosalind.info/problems/lcsm/ -Write a Python program called "lcsm.py" that will accept FASTA file of sequences as a single positional argument and will print the longest common subsequence shared by all the sequences. +Write a Python program called `lcsm.py` that will accept FASTA file of sequences as a single positional argument and will print the longest common subsequence shared by all the sequences. -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./lcsm.py -h @@ -42,26 +42,30 @@ A passing test suite looks like this: ``` $ make test -python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --mypy lcsm.py tests/lcsm_test.py -============================ test session starts ============================ +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--pylint-rcfile=../pylintrc --mypy lcsm.py tests/lcsm_test.py +============================= test session starts ============================== ... - -lcsm.py::FLAKE8 PASSED [ 8%] -lcsm.py::mypy PASSED [ 16%] -lcsm.py::test_kmers PASSED [ 25%] -tests/lcsm_test.py::FLAKE8 PASSED [ 33%] -tests/lcsm_test.py::mypy PASSED [ 41%] -tests/lcsm_test.py::test_exists PASSED [ 50%] -tests/lcsm_test.py::test_usage PASSED [ 58%] -tests/lcsm_test.py::test_bad_file PASSED [ 66%] -tests/lcsm_test.py::test_empty PASSED [ 75%] -tests/lcsm_test.py::test_short PASSED [ 83%] -tests/lcsm_test.py::test_long PASSED [ 91%] -::mypy PASSED [100%] -=================================== mypy ==================================== +collected 13 items + +lcsm.py::FLAKE8 SKIPPED [ 7%] +lcsm.py::mypy PASSED [ 14%] +lcsm.py::test_binary_search PASSED [ 21%] +lcsm.py::test_common_kmers PASSED [ 28%] +lcsm.py::test_find_kmers PASSED [ 35%] +tests/lcsm_test.py::FLAKE8 SKIPPED [ 42%] +tests/lcsm_test.py::mypy PASSED [ 50%] +tests/lcsm_test.py::test_exists PASSED [ 57%] +tests/lcsm_test.py::test_usage PASSED [ 64%] +tests/lcsm_test.py::test_bad_file PASSED [ 71%] +tests/lcsm_test.py::test_short PASSED [ 78%] +tests/lcsm_test.py::test_long PASSED [ 85%] +tests/lcsm_test.py::test_no_shared PASSED [ 92%] +::mypy PASSED [100%] +===================================== mypy ===================================== Success: no issues found in 2 source files -============================ 12 passed in 42.35s ============================ +======================== 12 passed, 2 skipped in 2.13s ========================= ``` ## Author diff --git a/11_mprt/README.md b/11_mprt/README.md index 5c42f81..36e4311 100644 --- a/11_mprt/README.md +++ b/11_mprt/README.md @@ -2,9 +2,9 @@ http://rosalind.info/problems/mprt/ -Write a Python program called "mprt.py" that will accept file of sequence IDs, will download the sequences from UniProt, and will print each protein containing the N-glycosylation motif and a list of the locations where the motif can be found. +Write a Python program called `mprt.py` that will accept file of sequence IDs, will download the sequences from UniProt, and will print each protein containing the N-glycosylation motif and a list of the locations where the motif can be found. -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./mprt.py -h @@ -47,24 +47,28 @@ A passing test suite looks like this: ``` $ make test -python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --mypy mprt.py tests/mprt_test.py +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--pylint-rcfile=../pylintrc --mypy mprt.py tests/mprt_test.py ============================= test session starts ============================== ... - -mprt.py::FLAKE8 PASSED [ 10%] -mprt.py::mypy PASSED [ 20%] -tests/mprt_test.py::FLAKE8 PASSED [ 30%] -tests/mprt_test.py::mypy PASSED [ 40%] -tests/mprt_test.py::test_exists PASSED [ 50%] -tests/mprt_test.py::test_usage PASSED [ 60%] -tests/mprt_test.py::test_bad_file PASSED [ 70%] -tests/mprt_test.py::test_1 PASSED [ 80%] -tests/mprt_test.py::test_2 PASSED [ 90%] +collected 11 items + +mprt.py::FLAKE8 SKIPPED [ 8%] +mprt.py::mypy PASSED [ 16%] +mprt.py::test_find_motif PASSED [ 25%] +mprt.py::test_find_kmers PASSED [ 33%] +tests/mprt_test.py::FLAKE8 SKIPPED [ 41%] +tests/mprt_test.py::mypy PASSED [ 50%] +tests/mprt_test.py::test_exists PASSED [ 58%] +tests/mprt_test.py::test_usage PASSED [ 66%] +tests/mprt_test.py::test_bad_file PASSED [ 75%] +tests/mprt_test.py::test_1 PASSED [ 83%] +tests/mprt_test.py::test_2 PASSED [ 91%] ::mypy PASSED [100%] ===================================== mypy ===================================== Success: no issues found in 2 source files -============================= 10 passed in 23.58s ============================== +======================== 10 passed, 2 skipped in 1.41s ========================= ``` ## Author diff --git a/12_mrna/README.md b/12_mrna/README.md index f7467df..77d75d9 100644 --- a/12_mrna/README.md +++ b/12_mrna/README.md @@ -2,9 +2,9 @@ http://rosalind.info/problems/mrna/ -Write a Python program called "mrna.py" that will accept a protein sequence as a positional argument or a file name along with an optional "modulo" argument that defaults to 1,000,000. +Write a Python program called `mrna.py` that will accept a protein sequence as a positional argument or a file name along with an optional "modulo" argument that defaults to 1,000,000. -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./mrna.py -h diff --git a/13_revp/README.md b/13_revp/README.md index 646ced8..9036acc 100644 --- a/13_revp/README.md +++ b/13_revp/README.md @@ -2,9 +2,9 @@ http://rosalind.info/problems/revp/ -Write a Python program called "revp.py" that will accept a FASTA-formatted file of a DNA sequence and will print the position and length of every reverse palindrome in the string having length between 4 and 12. +Write a Python program called `revp.py` that will accept a FASTA-formatted file of a DNA sequence and will print the position and length of every reverse palindrome in the string having length between 4 and 12. -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./revp.py -h @@ -45,26 +45,27 @@ A passing test suite looks like this: ``` $ make test -python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --mypy revp.py tests/revp_test.py -============================ test session starts ============================ +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--pylint-rcfile=../pylintrc --mypy revp.py tests/revp_test.py +============================= test session starts ============================== ... - -revp.py::FLAKE8 SKIPPED [ 8%] -revp.py::mypy PASSED [ 16%] -revp.py::test_fst PASSED [ 25%] -revp.py::test_snd PASSED [ 33%] -revp.py::test_find_kmers PASSED [ 41%] -tests/revp_test.py::FLAKE8 SKIPPED [ 50%] -tests/revp_test.py::mypy PASSED [ 58%] -tests/revp_test.py::test_exists PASSED [ 66%] -tests/revp_test.py::test_usage PASSED [ 75%] -tests/revp_test.py::test_ok1 PASSED [ 83%] -tests/revp_test.py::test_ok2 PASSED [ 91%] -::mypy PASSED [100%] -=================================== mypy ==================================== +collecting ... collected 10 items + +revp.py::FLAKE8 PASSED [ 9%] +revp.py::mypy PASSED [ 18%] +revp.py::test_revp PASSED [ 27%] +tests/revp_test.py::FLAKE8 SKIPPED [ 36%] +tests/revp_test.py::mypy PASSED [ 45%] +tests/revp_test.py::test_exists PASSED [ 54%] +tests/revp_test.py::test_usage PASSED [ 63%] +tests/revp_test.py::test_bad_file PASSED [ 72%] +tests/revp_test.py::test_ok1 PASSED [ 81%] +tests/revp_test.py::test_ok2 PASSED [ 90%] +::mypy PASSED [100%] +===================================== mypy ===================================== Success: no issues found in 2 source files -======================= 10 passed, 2 skipped in 0.95s ======================= +======================== 10 passed, 1 skipped in 1.29s ========================= ``` ## Author diff --git a/14_orf/README.md b/14_orf/README.md index 631664f..59eead8 100644 --- a/14_orf/README.md +++ b/14_orf/README.md @@ -2,9 +2,9 @@ http://rosalind.info/problems/orf/ -Write a Python program called "orf.py" that accepts a FASTA formatted input file as a single positional argument and will print all the possible proteins that could be translated from the open reading frames. +Write a Python program called `orf.py` that accepts a FASTA formatted input file as a single positional argument and will print all the possible proteins that could be translated from the open reading frames. -The program should print a "usage" statement for "-h" or "--help" flags: +The program should print a "usage" statement for `-h` or `--help` flags: ``` $ ./orf.py -h @@ -42,27 +42,30 @@ A passing test suite looks like this: ``` $ make test -python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint --mypy orf.py tests/orf_test.py -============================ test session starts ============================ +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--pylint-rcfile=../pylintrc --mypy orf.py tests/orf_test.py +============================= test session starts ============================== ... - -orf.py::FLAKE8 SKIPPED [ 7%] -orf.py::mypy PASSED [ 15%] -orf.py::test_find_orfs PASSED [ 23%] -orf.py::test_find_codons PASSED [ 30%] -orf.py::test_translate PASSED [ 38%] -tests/orf_test.py::FLAKE8 PASSED [ 46%] -tests/orf_test.py::mypy PASSED [ 53%] -tests/orf_test.py::test_exists PASSED [ 61%] -tests/orf_test.py::test_usage PASSED [ 69%] -tests/orf_test.py::test_ok1 PASSED [ 76%] -tests/orf_test.py::test_ok2 PASSED [ 84%] -tests/orf_test.py::test_ok3 PASSED [ 92%] -::mypy PASSED [100%] -=================================== mypy ==================================== +collected 13 items + +orf.py::FLAKE8 PASSED [ 7%] +orf.py::mypy PASSED [ 14%] +orf.py::test_truncate PASSED [ 21%] +orf.py::test_find_orfs PASSED [ 28%] +tests/orf_test.py::FLAKE8 SKIPPED [ 35%] +tests/orf_test.py::mypy PASSED [ 42%] +tests/orf_test.py::test_exists PASSED [ 50%] +tests/orf_test.py::test_usage PASSED [ 57%] +tests/orf_test.py::test_bad_file PASSED [ 64%] +tests/orf_test.py::test_empty PASSED [ 71%] +tests/orf_test.py::test_ok1 PASSED [ 78%] +tests/orf_test.py::test_ok2 PASSED [ 85%] +tests/orf_test.py::test_ok3 PASSED [ 92%] +::mypy PASSED [100%] +===================================== mypy ===================================== Success: no issues found in 2 source files -======================= 12 passed, 1 skipped in 2.07s ======================= +======================== 13 passed, 1 skipped in 1.75s ========================= ``` ## Author diff --git a/15_seqmagique/README.md b/15_seqmagique/README.md index 0c4cf4c..2527d08 100644 --- a/15_seqmagique/README.md +++ b/15_seqmagique/README.md @@ -1,76 +1,72 @@ -= FASTA Summary With Seqmagique +# Seqmagique -== Install Seqmagick - -Error: +Write a program called `seqmagique.py` that will accept FASTA input files and will print the minimum/maximum/average sequence lengths and the number of sequence in each file: ``` -ImportError: Bio.Alphabet has been removed from Biopython. +$ ./seqmagique.py tests/inputs/*.fa +name min_len max_len avg_len num_seqs +tests/inputs/1.fa 50 50 50.00 1 +tests/inputs/2.fa 49 79 64.00 5 +tests/inputs/empty.fa 0 0 0.00 0 ``` -Per https://fhcrc.github.io/seqmagick/, https://github.com/fhcrc/seqmagick/pull/89: +The program should print a usage: ``` -$ pip install git+https://github.com/fhcrc/seqmagick.git@master#egg-info=seqmagick -$ pip install pygtrie -``` - - +$ ./seqmagique.py -h +usage: seqmagique.py [-h] [-t table] FILE [FILE ...] -Now let's finally get into parsing good, old FASTA files. We're going to need to install the BioPython (http://biopython.org/) module to get a FASTA parser. This should work for you: +Argparse Python script -``` -$ python3 -m pip install biopython -``` +positional arguments: + FILE Input FASTA file(s) -For this exercise, I'll use a few reads from the Global Ocean Sampling Expedition (https://imicrobe.us/#/samples/578). You can download the full file with this command: - -``` -$ iget /iplant/home/shared/imicrobe/projects/26/samples/578/CAM_SMPL_GS108.fa +optional arguments: + -h, --help show this help message and exit + -t table, --tablefmt table + Tabulate table style (default: plain) ``` -Since that file is 725M, I've added a sample to the repo in the `examples` directory. +The output table should be formatted with the `tabulate` module and so will accept all the valid table styles, e.g.: ``` -$ head -5 CAM_SMPL_GS108.fa ->CAM_READ_0231669761 /library_id="CAM_LIB_GOS108XLRVAL-4F-1-400" /sample_id="CAM_SMPL_GS108" raw_id=SRA_ID=SRR066139.70645 raw_id=FG67BMZ02PUFIF -ATTTACAATAATTTAATAAAATTAACTAGAAATAAAATATTGTATGAAAATATGTTAAAT -AATGAAAGTTTTTCAGATCGTTTAATAATATTTTTCTTCCATTTTGCTTTTTTCTAAAAT -TGTTCAAAAACAAACTTCAAAGGAAAATCTTCAAAATTTACATGATTTTATATTTAAACA -AATAGAGTTAAGTATAAGAGAAATTGGATATGGTGATGCTTCAATAAATAAAAAAATGAA +$ ./seqmagique.py -t simple tests/inputs/*.fa +name min_len max_len avg_len num_seqs +--------------------- --------- --------- --------- ---------- +tests/inputs/1.fa 50 50 50.00 1 +tests/inputs/2.fa 49 79 64.00 5 +tests/inputs/empty.fa 0 0 0.00 0 ``` -The format of a FASTA file is: +A passing test suite looks like this: -* A record starts with a header row which has `>` as the first character on a line -* The string following the `>` up until the first whitespace is the record ID -* Anything following the ID up to the newline can be the "description," but here we see this space has been set up as key/value pairs of metadata -* Any line after a header that does not start with `>` is the sequence. The sequence may be one long line or many shorter lines. - -We **could** write our own FASTA parser, and we would definitely learn much along the way, but let's not and instead use the BioPython `SeqIO` (sequence input-output) module to read and write all the different formats. FASTA is one of the most common, but other formats may include FASTQ (FASTA but with "Quality" scores for the base calls), GenBank, EMBL, and more. See https://biopython.org/wiki/SeqIO for an exhaustive list. - -There is a useful program called `seqmagick` that will give you information like the following: - -``` -$ seqmagick info *.fa -name alignment min_len max_len avg_len num_seqs -CAM_SMPL_GS108.fa FALSE 47 594 369.65 499 -CAM_SMPL_GS112.fa FALSE 50 624 383.50 500 ``` +$ make test +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--pylint-rcfile=../pylintrc --mypy seqmagique.py tests/seqmagique_test.py +============================= test session starts ============================== +... +collected 12 items -You can install it like so: +seqmagique.py::FLAKE8 SKIPPED [ 7%] +seqmagique.py::mypy PASSED [ 15%] +tests/seqmagique_test.py::FLAKE8 SKIPPED [ 23%] +tests/seqmagique_test.py::mypy PASSED [ 30%] +tests/seqmagique_test.py::test_exists PASSED [ 38%] +tests/seqmagique_test.py::test_usage PASSED [ 46%] +tests/seqmagique_test.py::test_bad_file PASSED [ 53%] +tests/seqmagique_test.py::test_empty_file PASSED [ 61%] +tests/seqmagique_test.py::test_input1 PASSED [ 69%] +tests/seqmagique_test.py::test_input2 PASSED [ 76%] +tests/seqmagique_test.py::test_input_all PASSED [ 84%] +tests/seqmagique_test.py::test_styles PASSED [ 92%] +::mypy PASSED [100%] +===================================== mypy ===================================== +Success: no issues found in 2 source files +======================== 11 passed, 2 skipped in 6.33s ========================= ``` -$ python -m pip install seqmagick -``` - -Let's write a toy program to mimic part of the output. We'll skip the "alignment" and just do min/max/avg lengths, and the number of sequences. You can pretty much copy and paste the example code from http://biopython.org/wiki/SeqIO. Here is the output from our script, `seqmagique.py`: -``` -$ ./seqmagique.py *.fa -name min_len max_len avg_len num_seqs -CAM_SMPL_GS108.fa 47 594 369.45 500 -CAM_SMPL_GS112.fa 50 624 383.50 500 -``` +## Author -The code to produce this builds on our earlier skills of lists and dictionaries as we will parse each file and save a dictionary of stats into a list, then we will iterate over that list at the end to show the output. +Ken Youens-Clark diff --git a/15_seqmagique/tests/inputs/1.fa.grid.out b/15_seqmagique/tests/inputs/1.fa.grid.out index 399d0f3..57ae5d2 100644 --- a/15_seqmagique/tests/inputs/1.fa.grid.out +++ b/15_seqmagique/tests/inputs/1.fa.grid.out @@ -1 +1,5 @@ -./tests/inputs/1.fa ++---------------------+-----------+-----------+-----------+------------+ +| name | min_len | max_len | avg_len | num_seqs | ++=====================+===========+===========+===========+============+ +| ./tests/inputs/1.fa | 50 | 50 | 50.00 | 1 | ++---------------------+-----------+-----------+-----------+------------+ diff --git a/15_seqmagique/tests/inputs/1.fa.latex.out b/15_seqmagique/tests/inputs/1.fa.latex.out index 399d0f3..6d53375 100644 --- a/15_seqmagique/tests/inputs/1.fa.latex.out +++ b/15_seqmagique/tests/inputs/1.fa.latex.out @@ -1 +1,7 @@ -./tests/inputs/1.fa +\begin{tabular}{lrrrr} +\hline + name & min\_len & max\_len & avg\_len & num\_seqs \\ +\hline + ./tests/inputs/1.fa & 50 & 50 & 50.00 & 1 \\ +\hline +\end{tabular} diff --git a/15_seqmagique/tests/inputs/1.fa.latex_booktabs.out b/15_seqmagique/tests/inputs/1.fa.latex_booktabs.out index 399d0f3..621babd 100644 --- a/15_seqmagique/tests/inputs/1.fa.latex_booktabs.out +++ b/15_seqmagique/tests/inputs/1.fa.latex_booktabs.out @@ -1 +1,7 @@ -./tests/inputs/1.fa +\begin{tabular}{lrrrr} +\toprule + name & min\_len & max\_len & avg\_len & num\_seqs \\ +\midrule + ./tests/inputs/1.fa & 50 & 50 & 50.00 & 1 \\ +\bottomrule +\end{tabular} diff --git a/15_seqmagique/tests/inputs/1.fa.latex_raw.out b/15_seqmagique/tests/inputs/1.fa.latex_raw.out index 399d0f3..35af2d8 100644 --- a/15_seqmagique/tests/inputs/1.fa.latex_raw.out +++ b/15_seqmagique/tests/inputs/1.fa.latex_raw.out @@ -1 +1,7 @@ -./tests/inputs/1.fa +\begin{tabular}{lrrrr} +\hline + name & min_len & max_len & avg_len & num_seqs \\ +\hline + ./tests/inputs/1.fa & 50 & 50 & 50.00 & 1 \\ +\hline +\end{tabular} diff --git a/15_seqmagique/tests/inputs/1.fa.mediawiki.out b/15_seqmagique/tests/inputs/1.fa.mediawiki.out index 399d0f3..2bf71ca 100644 --- a/15_seqmagique/tests/inputs/1.fa.mediawiki.out +++ b/15_seqmagique/tests/inputs/1.fa.mediawiki.out @@ -1 +1,7 @@ -./tests/inputs/1.fa +{| class="wikitable" style="text-align: left;" +|+ +|- +! name !! align="right"| min_len !! align="right"| max_len !! align="right"| avg_len !! align="right"| num_seqs +|- +| ./tests/inputs/1.fa || align="right"| 50 || align="right"| 50 || align="right"| 50.00 || align="right"| 1 +|} diff --git a/15_seqmagique/tests/inputs/1.fa.orgtbl.out b/15_seqmagique/tests/inputs/1.fa.orgtbl.out index 399d0f3..a8b658f 100644 --- a/15_seqmagique/tests/inputs/1.fa.orgtbl.out +++ b/15_seqmagique/tests/inputs/1.fa.orgtbl.out @@ -1 +1,3 @@ -./tests/inputs/1.fa +| name | min_len | max_len | avg_len | num_seqs | +|---------------------+-----------+-----------+-----------+------------| +| ./tests/inputs/1.fa | 50 | 50 | 50.00 | 1 | diff --git a/15_seqmagique/tests/inputs/1.fa.out b/15_seqmagique/tests/inputs/1.fa.out index 399d0f3..9d3d99a 100644 --- a/15_seqmagique/tests/inputs/1.fa.out +++ b/15_seqmagique/tests/inputs/1.fa.out @@ -1 +1,2 @@ -./tests/inputs/1.fa +name min_len max_len avg_len num_seqs +./tests/inputs/1.fa 50 50 50.00 1 diff --git a/15_seqmagique/tests/inputs/1.fa.pipe.out b/15_seqmagique/tests/inputs/1.fa.pipe.out index 399d0f3..1d6c81e 100644 --- a/15_seqmagique/tests/inputs/1.fa.pipe.out +++ b/15_seqmagique/tests/inputs/1.fa.pipe.out @@ -1 +1,3 @@ -./tests/inputs/1.fa +| name | min_len | max_len | avg_len | num_seqs | +|:--------------------|----------:|----------:|----------:|-----------:| +| ./tests/inputs/1.fa | 50 | 50 | 50.00 | 1 | diff --git a/15_seqmagique/tests/inputs/1.fa.plain.out b/15_seqmagique/tests/inputs/1.fa.plain.out index 399d0f3..9d3d99a 100644 --- a/15_seqmagique/tests/inputs/1.fa.plain.out +++ b/15_seqmagique/tests/inputs/1.fa.plain.out @@ -1 +1,2 @@ -./tests/inputs/1.fa +name min_len max_len avg_len num_seqs +./tests/inputs/1.fa 50 50 50.00 1 diff --git a/15_seqmagique/tests/inputs/1.fa.rst.out b/15_seqmagique/tests/inputs/1.fa.rst.out index 399d0f3..2f29f5f 100644 --- a/15_seqmagique/tests/inputs/1.fa.rst.out +++ b/15_seqmagique/tests/inputs/1.fa.rst.out @@ -1 +1,5 @@ -./tests/inputs/1.fa +=================== ========= ========= ========= ========== +name min_len max_len avg_len num_seqs +=================== ========= ========= ========= ========== +./tests/inputs/1.fa 50 50 50.00 1 +=================== ========= ========= ========= ========== diff --git a/15_seqmagique/tests/inputs/1.fa.simple.out b/15_seqmagique/tests/inputs/1.fa.simple.out index 399d0f3..247b140 100644 --- a/15_seqmagique/tests/inputs/1.fa.simple.out +++ b/15_seqmagique/tests/inputs/1.fa.simple.out @@ -1 +1,3 @@ -./tests/inputs/1.fa +name min_len max_len avg_len num_seqs +------------------- --------- --------- --------- ---------- +./tests/inputs/1.fa 50 50 50.00 1 diff --git a/15_seqmagique/tests/inputs/2.fa.grid.out b/15_seqmagique/tests/inputs/2.fa.grid.out index f2e5f3e..58d769b 100644 --- a/15_seqmagique/tests/inputs/2.fa.grid.out +++ b/15_seqmagique/tests/inputs/2.fa.grid.out @@ -1 +1,5 @@ -./tests/inputs/2.fa ++---------------------+-----------+-----------+-----------+------------+ +| name | min_len | max_len | avg_len | num_seqs | ++=====================+===========+===========+===========+============+ +| ./tests/inputs/2.fa | 49 | 79 | 64.00 | 5 | ++---------------------+-----------+-----------+-----------+------------+ diff --git a/15_seqmagique/tests/inputs/2.fa.latex.out b/15_seqmagique/tests/inputs/2.fa.latex.out index f2e5f3e..48b48d0 100644 --- a/15_seqmagique/tests/inputs/2.fa.latex.out +++ b/15_seqmagique/tests/inputs/2.fa.latex.out @@ -1 +1,7 @@ -./tests/inputs/2.fa +\begin{tabular}{lrrrr} +\hline + name & min\_len & max\_len & avg\_len & num\_seqs \\ +\hline + ./tests/inputs/2.fa & 49 & 79 & 64.00 & 5 \\ +\hline +\end{tabular} diff --git a/15_seqmagique/tests/inputs/2.fa.latex_booktabs.out b/15_seqmagique/tests/inputs/2.fa.latex_booktabs.out index f2e5f3e..ec58aac 100644 --- a/15_seqmagique/tests/inputs/2.fa.latex_booktabs.out +++ b/15_seqmagique/tests/inputs/2.fa.latex_booktabs.out @@ -1 +1,7 @@ -./tests/inputs/2.fa +\begin{tabular}{lrrrr} +\toprule + name & min\_len & max\_len & avg\_len & num\_seqs \\ +\midrule + ./tests/inputs/2.fa & 49 & 79 & 64.00 & 5 \\ +\bottomrule +\end{tabular} diff --git a/15_seqmagique/tests/inputs/2.fa.latex_raw.out b/15_seqmagique/tests/inputs/2.fa.latex_raw.out index f2e5f3e..5ff02c9 100644 --- a/15_seqmagique/tests/inputs/2.fa.latex_raw.out +++ b/15_seqmagique/tests/inputs/2.fa.latex_raw.out @@ -1 +1,7 @@ -./tests/inputs/2.fa +\begin{tabular}{lrrrr} +\hline + name & min_len & max_len & avg_len & num_seqs \\ +\hline + ./tests/inputs/2.fa & 49 & 79 & 64.00 & 5 \\ +\hline +\end{tabular} diff --git a/15_seqmagique/tests/inputs/2.fa.mediawiki.out b/15_seqmagique/tests/inputs/2.fa.mediawiki.out index f2e5f3e..2cde1e8 100644 --- a/15_seqmagique/tests/inputs/2.fa.mediawiki.out +++ b/15_seqmagique/tests/inputs/2.fa.mediawiki.out @@ -1 +1,7 @@ -./tests/inputs/2.fa +{| class="wikitable" style="text-align: left;" +|+ +|- +! name !! align="right"| min_len !! align="right"| max_len !! align="right"| avg_len !! align="right"| num_seqs +|- +| ./tests/inputs/2.fa || align="right"| 49 || align="right"| 79 || align="right"| 64.00 || align="right"| 5 +|} diff --git a/15_seqmagique/tests/inputs/2.fa.orgtbl.out b/15_seqmagique/tests/inputs/2.fa.orgtbl.out index f2e5f3e..80f05be 100644 --- a/15_seqmagique/tests/inputs/2.fa.orgtbl.out +++ b/15_seqmagique/tests/inputs/2.fa.orgtbl.out @@ -1 +1,3 @@ -./tests/inputs/2.fa +| name | min_len | max_len | avg_len | num_seqs | +|---------------------+-----------+-----------+-----------+------------| +| ./tests/inputs/2.fa | 49 | 79 | 64.00 | 5 | diff --git a/15_seqmagique/tests/inputs/2.fa.out b/15_seqmagique/tests/inputs/2.fa.out index f2e5f3e..0742e9a 100644 --- a/15_seqmagique/tests/inputs/2.fa.out +++ b/15_seqmagique/tests/inputs/2.fa.out @@ -1 +1,2 @@ -./tests/inputs/2.fa +name min_len max_len avg_len num_seqs +./tests/inputs/2.fa 49 79 64.00 5 diff --git a/15_seqmagique/tests/inputs/2.fa.pipe.out b/15_seqmagique/tests/inputs/2.fa.pipe.out index f2e5f3e..98a8dc3 100644 --- a/15_seqmagique/tests/inputs/2.fa.pipe.out +++ b/15_seqmagique/tests/inputs/2.fa.pipe.out @@ -1 +1,3 @@ -./tests/inputs/2.fa +| name | min_len | max_len | avg_len | num_seqs | +|:--------------------|----------:|----------:|----------:|-----------:| +| ./tests/inputs/2.fa | 49 | 79 | 64.00 | 5 | diff --git a/15_seqmagique/tests/inputs/2.fa.plain.out b/15_seqmagique/tests/inputs/2.fa.plain.out index f2e5f3e..0742e9a 100644 --- a/15_seqmagique/tests/inputs/2.fa.plain.out +++ b/15_seqmagique/tests/inputs/2.fa.plain.out @@ -1 +1,2 @@ -./tests/inputs/2.fa +name min_len max_len avg_len num_seqs +./tests/inputs/2.fa 49 79 64.00 5 diff --git a/15_seqmagique/tests/inputs/2.fa.rst.out b/15_seqmagique/tests/inputs/2.fa.rst.out index f2e5f3e..905ab33 100644 --- a/15_seqmagique/tests/inputs/2.fa.rst.out +++ b/15_seqmagique/tests/inputs/2.fa.rst.out @@ -1 +1,5 @@ -./tests/inputs/2.fa +=================== ========= ========= ========= ========== +name min_len max_len avg_len num_seqs +=================== ========= ========= ========= ========== +./tests/inputs/2.fa 49 79 64.00 5 +=================== ========= ========= ========= ========== diff --git a/15_seqmagique/tests/inputs/2.fa.simple.out b/15_seqmagique/tests/inputs/2.fa.simple.out index f2e5f3e..227b2d5 100644 --- a/15_seqmagique/tests/inputs/2.fa.simple.out +++ b/15_seqmagique/tests/inputs/2.fa.simple.out @@ -1 +1,3 @@ -./tests/inputs/2.fa +name min_len max_len avg_len num_seqs +------------------- --------- --------- --------- ---------- +./tests/inputs/2.fa 49 79 64.00 5 diff --git a/15_seqmagique/tests/inputs/all.fa.out b/15_seqmagique/tests/inputs/all.fa.out index a0899f1..8166b54 100644 --- a/15_seqmagique/tests/inputs/all.fa.out +++ b/15_seqmagique/tests/inputs/all.fa.out @@ -1,3 +1,4 @@ -./tests/inputs/1.fa -./tests/inputs/2.fa -./tests/inputs/empty.fa +name min_len max_len avg_len num_seqs +./tests/inputs/1.fa 50 50 50.00 1 +./tests/inputs/2.fa 49 79 64.00 5 +./tests/inputs/empty.fa 0 0 0.00 0 diff --git a/15_seqmagique/tests/inputs/empty.fa.out b/15_seqmagique/tests/inputs/empty.fa.out index a89ef98..dbf2899 100644 --- a/15_seqmagique/tests/inputs/empty.fa.out +++ b/15_seqmagique/tests/inputs/empty.fa.out @@ -1 +1,2 @@ -./tests/inputs/empty.fa +name min_len max_len avg_len num_seqs +./tests/inputs/empty.fa 0 0 0 0 diff --git a/16_fastx_grep/README.md b/16_fastx_grep/README.md index 8818302..020ada8 100644 --- a/16_fastx_grep/README.md +++ b/16_fastx_grep/README.md @@ -1,11 +1,31 @@ # FASTX grep Select sequence records by text. -Currently just FASTA/Q but could be anything parsable by Bio.SeqIO. +Currently just FASTA/Q but could be anything parsable by Bio.SeqIO: + +``` +$ ./fastx_grep.py -i lsu tests/inputs/lsu.fa +>ITSLSUmock2p.ITS_M01380:138:000000000-C9GKM:1:1101:14440:2042 2:N:0 +CAAGTTACTTCCTCTAAATGACCAAGCCTAGTGTAGAACCATGTCGTCAGTGTCAGTCTG +AGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAATGTAATACTACTAGTAATT +ATTAATATTATAATTTTGTCTATTAGCATCTTATTATAGATAGAAGATATTATTCATATT +TCACTATCTTATACTGATATCAGCTTTATCAGATCACACTCTAGTGAAGATTGTTCTTAA +CTGAAATTTCCTTCTTCATACAGACACATTAATCTTACCTA +>ITSLSUmock2p.ITS_M01384:138:000000000-C9GKM:1:1101:14440:2043 2:N:0 +ACCCGTCAATTTCTTTAAGTTTTAGCCTTGCGACCGTACTCCCCAGGCGGTGCACTTAGT +GGTTTTCCGGCGACCCGGGCGGCGTCAGAGCCCCCCAAGTCTCGTGCACATCGTTTACGG +CGTGGACTACCAGGGTATCTAATCCTGTTTGATCCCCACGCTTTCGTGCCTCAGCGTCAG +TACCGGCCCAGCCACCCGTCTTCACCTTCGGCGTTCCTGTAGATATCTACGCATTTCACC +GCTACACCTACAGTTCCGGTGGCGCCTACCGGCCTCAAGAAACGCAGTATGCCCAGCTAT +T +``` + +The program should print a usage: ``` $ ./fastx_grep.py -h -usage: fastx_grep.py [-h] [-f str] [-O str] [-o FILE] PATTERN FILE [FILE ...] +usage: fastx_grep.py [-h] [-f str] [-O str] [-o FILE] [-i] [-v] + PATTERN FILE [FILE ...] Grep through FASTX files @@ -16,8 +36,49 @@ positional arguments: optional arguments: -h, --help show this help message and exit -f str, --format str Input file format (default: ) - -O str, --out_format str - Output file format (default: ) + -O str, --outfmt str Output file format (default: ) -o FILE, --outfile FILE - Output file (default: None) + Output file (default: <_io.TextIOWrapper + name='' mode='w' encoding='utf-8'>) + -i, --insensitive Case-insensitive search (default: False) + -v, --verbose Be chatty (default: False) +``` + +A passing test suite looks like this: + +``` +$ make test +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--pylint-rcfile=../pylintrc --mypy fastx_grep.py tests/fastx_grep_test.py +============================= test session starts ============================== +... +collected 18 items + +fastx_grep.py::FLAKE8 PASSED [ 5%] +fastx_grep.py::mypy PASSED [ 10%] +fastx_grep.py::test_guess_format PASSED [ 15%] +tests/fastx_grep_test.py::FLAKE8 SKIPPED [ 21%] +tests/fastx_grep_test.py::mypy PASSED [ 26%] +tests/fastx_grep_test.py::test_exists PASSED [ 31%] +tests/fastx_grep_test.py::test_usage PASSED [ 36%] +tests/fastx_grep_test.py::test_bad_file PASSED [ 42%] +tests/fastx_grep_test.py::test_cannot_guess PASSED [ 47%] +tests/fastx_grep_test.py::test_empty_file PASSED [ 52%] +tests/fastx_grep_test.py::test_lsu_uppercase PASSED [ 57%] +tests/fastx_grep_test.py::test_lsu_lowercase PASSED [ 63%] +tests/fastx_grep_test.py::test_lsu_uppercase_insensitive PASSED [ 68%] +tests/fastx_grep_test.py::test_lsu_lowercase_insensitive PASSED [ 73%] +tests/fastx_grep_test.py::test_outfile PASSED [ 78%] +tests/fastx_grep_test.py::test_outfile_verbose PASSED [ 84%] +tests/fastx_grep_test.py::test_outfmt_fastq_to_fasta PASSED [ 89%] +tests/fastx_grep_test.py::test_outfmt_fastq_to_fasta2line PASSED [ 94%] +::mypy PASSED [100%] +===================================== mypy ===================================== + +Success: no issues found in 2 source files +======================== 18 passed, 1 skipped in 3.09s ========================= ``` + +## Author + +Ken Youens-Clark diff --git a/17_synth/README.md b/17_synth/README.md new file mode 100644 index 0000000..f105684 --- /dev/null +++ b/17_synth/README.md @@ -0,0 +1,85 @@ +# DNA Synthesizer + +Write a program `synth.py` that uses Markov chains trained on input DNA files to create novel DNA sequences: + +``` +$ ./synth.py tests/inputs/CAM_SMPL_GS108.fa -n 2 +Done, see output in "out.fa". +$ cat out.fa +>1 +GGGCTTTATACCTAGAGGACGAGCATTAGATCTTGCCAGCATAGGCACTAAAGGTACATTC +>2 +TCCAGTTCCAGGGTCAAGATATACCTAAGATATATATTTAGCTAGTTTTATTAAGATTGGAATGT +``` + +The program should print a usage: + +``` +$ ./synth.py -h +usage: synth.py [-h] [-o FILE] [-f format] [-n number] [-x max] [-m min] + [-k kmer] [-s seed] + FILE [FILE ...] + +Create synthetic DNA using Markov chain + +positional arguments: + FILE Training file(s) + +optional arguments: + -h, --help show this help message and exit + -o FILE, --outfile FILE + Output filename (default: out.fa) + -f format, --format format + Input file format (default: fasta) + -n number, --num number + Number of sequences to create (default: 100) + -x max, --max_len max + Maximum sequence length (default: 75) + -m min, --min_len min + Minimum sequence length (default: 50) + -k kmer, --kmer kmer Size of kmers (default: 10) + -s seed, --seed seed Random seed value (default: None) +``` + +A passing test suite looks like this: + +``` +$ make test +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--pylint-rcfile=../pylintrc --mypy synth.py tests/unit_test.py tests/synth_test.py +============================= test session starts ============================== +... +collected 22 items + +synth.py::FLAKE8 SKIPPED [ 4%] +synth.py::mypy PASSED [ 8%] +tests/unit_test.py::FLAKE8 SKIPPED [ 13%] +tests/unit_test.py::mypy PASSED [ 17%] +tests/unit_test.py::test_gen_seq PASSED [ 21%] +tests/unit_test.py::test_read_training PASSED [ 26%] +tests/unit_test.py::test_find_kmers PASSED [ 30%] +tests/synth_test.py::FLAKE8 SKIPPED [ 34%] +tests/synth_test.py::mypy PASSED [ 39%] +tests/synth_test.py::test_exists PASSED [ 43%] +tests/synth_test.py::test_usage PASSED [ 47%] +tests/synth_test.py::test_bad_file PASSED [ 52%] +tests/synth_test.py::test_bad_seed PASSED [ 56%] +tests/synth_test.py::test_bad_format PASSED [ 60%] +tests/synth_test.py::test_sample1_num1 PASSED [ 65%] +tests/synth_test.py::test_sample1_num1_outfile PASSED [ 69%] +tests/synth_test.py::test_sample1_num1_min20_max40 PASSED [ 73%] +tests/synth_test.py::test_sample1_num1_kmer4 PASSED [ 78%] +tests/synth_test.py::test_sample1_num1_kmer5 PASSED [ 82%] +tests/synth_test.py::test_sample3_num1_format PASSED [ 86%] +tests/synth_test.py::test_sample1_defaults PASSED [ 91%] +tests/synth_test.py::test_multiple_inputs PASSED [ 95%] +::mypy PASSED [100%] +===================================== mypy ===================================== + +Success: no issues found in 3 source files +======================== 20 passed, 3 skipped in 5.01s ========================= +``` + +## Author + +Ken Youens-Clark diff --git a/18_fastx_sampler/README.md b/18_fastx_sampler/README.md index d7e52e3..8495115 100644 --- a/18_fastx_sampler/README.md +++ b/18_fastx_sampler/README.md @@ -1,69 +1,81 @@ -# Randomly Subset a FASTA file +# FASTX Sampler Write a Python program called `sampler.py` that will probabilistically sample one or more input FASTA files into an output directory. +The inputs for this program will be generated by your `synth.py` program. +You can run `make fasta` to create files of 1K, 10K, and 100K reads in this directory. +You can then use these files for testing your program: -The inputs for this program will be generated by your `moog.py` program. -You can run `make fasta` to create files of 1K, 10K, 100K, and 1M reads in this directory. -You can then use these files for testing your program. - -The parameters for your program are: - -* One or more positional FILE arguments -* `-p`|`--pct`: a `float` value between 0 and 1 which is the percentage of reads to take (default `0.1` or 10%) -* `-s`|`--seed`: an `int` value to use for the random seed (default `None`) -* `-o`|`--outdir`: an `str` value to use for the output directory (default `'out'`). You will need to create this directory if it does not exist. Consult your `transcribe.py` program to see how to do that. +``` +$ ./sampler.py -m 2 tests/inputs/n1k.fa + 1: n1k.fa +Wrote 2 sequences from 1 file to directory "out". +$ cat out/n1k.fa +>34 +AACATCAGGTATGGTCATCAGTTTTAGGATTTGAAGTAATTCTTCGCGAATCTTCGATCT +CTATAGGATCAGGAATTATACTTAACTTTATACTATAAGTGAAATAAACTCACTATGAAA +TTGGTAGTGGAACAGCAGAAGTTCAGATGATTTATCAGAAAAGTAATAGTGAGTAATCCT +TTAGATTTA +>40 +TAGATTGCATCAGGGATTCAGGGCTGACCTTGTTGCACAGCATAAACAACTGATACACAC +AGACTATCTACTATACCATAAACATCTTGCTACTACAATTTCAGGTTCCTATGGATTTAA +TTGGCGCTTTATTTATCTGA +``` Here is the usage your program should create for `-h` or `--help`: ``` $ ./sampler.py -h -usage: sampler.py [-h] [-p reads] [-s seed] [-o DIR] FILE [FILE ...] +usage: sampler.py [-h] [-f format] [-p reads] [-m max] [-s seed] [-o DIR] + FILE [FILE ...] Probabalistically subset FASTA files positional arguments: - FILE Input FASTA file(s) + FILE Input FASTA/Q file(s) optional arguments: -h, --help show this help message and exit - -p reads, --pct reads + -f format, --format format + Input file format (default: fasta) + -p reads, --percent reads Percent of reads (default: 0.1) + -m max, --max max Maximum number of reads (default: 0) -s seed, --seed seed Random seed value (default: None) -o DIR, --outdir DIR Output directory (default: out) ``` -When run with the `n1k.fa`, it should print this: +A passing test suite looks like this: ``` -$ ./sampler.py n1k.fa -s 1 - 1: n1k.fa -Wrote 95 sequences from 1 file to directory "out" -``` - -Here is an example of the output for multiple files: - -``` -$ ./sampler.py -p .25 -s 4 n1k.fa n10k.fa n100k.fa -o sampled - 1: n1k.fa - 2: n10k.fa - 3: n100k.fa -Wrote 27,688 sequences from 3 files to directory "sampled" +$ make test +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--pylint-rcfile=../pylintrc --mypy sampler.py tests/sampler_test.py +============================= test session starts ============================== +... +collected 15 items + +sampler.py::FLAKE8 PASSED [ 6%] +sampler.py::mypy PASSED [ 12%] +tests/sampler_test.py::FLAKE8 SKIPPED [ 18%] +tests/sampler_test.py::mypy PASSED [ 25%] +tests/sampler_test.py::test_exists PASSED [ 31%] +tests/sampler_test.py::test_usage PASSED [ 37%] +tests/sampler_test.py::test_bad_file PASSED [ 43%] +tests/sampler_test.py::test_bad_pct PASSED [ 50%] +tests/sampler_test.py::test_bad_seed PASSED [ 56%] +tests/sampler_test.py::test_bad_format PASSED [ 62%] +tests/sampler_test.py::test_defaults_one_file PASSED [ 68%] +tests/sampler_test.py::test_fastq_input PASSED [ 75%] +tests/sampler_test.py::test_defaults_multiple_file PASSED [ 81%] +tests/sampler_test.py::test_max_reads PASSED [ 87%] +tests/sampler_test.py::test_options PASSED [ 93%] +::mypy PASSED [100%] +===================================== mypy ===================================== + +Success: no issues found in 2 source files +======================== 15 passed, 1 skipped in 3.73s ========================= ``` -To parse the FASTA files, you will need to add this import statement: +## Author -``` -from Bio import SeqIO -``` - -If you have not already install BioPython, you will need to do so: - -``` -$ python3 -m pip install biopython -``` - -Or run: - -``` -$ python3 -m pip install -r requirements.txt -``` +Ken Youens-Clark diff --git a/19_blastomatic/README.md b/19_blastomatic/README.md new file mode 100644 index 0000000..4c338ed --- /dev/null +++ b/19_blastomatic/README.md @@ -0,0 +1,77 @@ +# BLASTOMATIC + +Write a program called `blastomatic.py` that will select BLAST hits above a given percent ID and will merge them with annotations and print the query sequence ID, the percent ID, the depth, and the lat/lon: + +``` +$ ./blastomatic.py -a tests/inputs/meta.csv -b tests/inputs/hits1.csv -p 99 +Exported 22 to "out.csv". +$ head out.csv +qseqid,pident,depth,lat_lon +JCVI_READ_1091120852400,100.0,47.0,"24.488333,-83.07" +JCVI_READ_1091143613656,100.0,4513.0,"20.5225,-85.41361" +JCVI_READ_1092258001174,100.0,4.2,"9.164444,-79.83611" +JCVI_READ_1092963485055,100.0,2.0,"-1.2169445,-90.319725" +JCVI_READ_1092963485055,100.0,2.0,"-1.2169445,-90.319725" +JCVI_READ_1092963485055,100.0,2.0,"-1.2169445,-90.319725" +JCVI_READ_1093012135235,100.0,20.0,"36.003887,-75.39472" +JCVI_READ_1093012135235,100.0,20.0,"36.003887,-75.39472" +JCVI_READ_1093012135235,100.0,20.0,"36.003887,-75.39472" +``` + +The program should produce a usage: + +``` +$ ./blastomatic.py -h +usage: blastomatic.py [-h] -b FILE -a FILE [-o FILE] [-d DELIM] [-p PCTID] + +Annotate BLAST output + +optional arguments: + -h, --help show this help message and exit + -b FILE, --blasthits FILE + BLAST -outfmt 6 (default: None) + -a FILE, --annotations FILE + Annotations file (default: None) + -o FILE, --outfile FILE + Output file (default: out.csv) + -d DELIM, --delimiter DELIM + Output field delimiter (default: ) + -p PCTID, --pctid PCTID + Minimum percent identity (default: 0.0) +``` + +A passing test suite looks like this: + +``` +$ make test +python3 -m pytest -xv --disable-pytest-warnings --flake8 --pylint +--mypy blastomatic.py tests/*_test.py +============================= test session starts ============================== +... +collected 15 items + +blastomatic.py::FLAKE8 SKIPPED [ 6%] +blastomatic.py::mypy PASSED [ 12%] +tests/blastomatic_test.py::FLAKE8 SKIPPED [ 18%] +tests/blastomatic_test.py::mypy PASSED [ 25%] +tests/blastomatic_test.py::test_exists PASSED [ 31%] +tests/blastomatic_test.py::test_usage PASSED [ 37%] +tests/blastomatic_test.py::test_bad_annotations PASSED [ 43%] +tests/blastomatic_test.py::test_bad_input_file PASSED [ 50%] +tests/blastomatic_test.py::test_good_input PASSED [ 56%] +tests/blastomatic_test.py::test_delimiter PASSED [ 62%] +tests/blastomatic_test.py::test_guess_delimiter PASSED [ 68%] +tests/blastomatic_test.py::test_pctid PASSED [ 75%] +tests/unit_test.py::FLAKE8 SKIPPED [ 81%] +tests/unit_test.py::mypy PASSED [ 87%] +tests/unit_test.py::test_guess_delimiter PASSED [ 93%] +::mypy PASSED [100%] +===================================== mypy ===================================== + +Success: no issues found in 3 source files +======================== 13 passed, 3 skipped in 2.84s ========================= +``` + +## Author + +Ken Youens-Clark diff --git a/pylintrc b/pylintrc index 06b0fcf..bc2e1ab 100644 --- a/pylintrc +++ b/pylintrc @@ -424,4 +424,4 @@ known-third-party=enchant # "Exception" overgeneral-exceptions=Exception -disable=too-many-locals,invalid-name,too-many-statements,too-many-arguments,cell-var-from-loop +disable=too-many-locals,invalid-name,too-many-statements,too-many-arguments,cell-var-from-loop,wrong-import-order