Skip to content

Commit

Permalink
add download database script and meta.yalm info, pre release 1.5
Browse files Browse the repository at this point in the history
  • Loading branch information
pedroscampoy committed Mar 16, 2020
1 parent e31c341 commit 715ff1c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 14 deletions.
8 changes: 4 additions & 4 deletions bin/download_plasmid_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def get_arguments():
#remove duplicates
plasmid_reference = set(plasmid_reference)
#Set terms to exclude
terms_to_exclude = ['gene', 'protein', 'partial', 'putative', 'hypothetical']
terms_to_exclude = ['gene ', 'protein', 'partial', 'putative', 'hypothetical']
#Dictionary with erroneous accession numbers to determine the reason
erroneous = {}

Expand All @@ -132,12 +132,12 @@ def get_arguments():
if sum(terms_present) > 0:
terms_true = [terms_to_exclude[i] for i, x in enumerate(terms_present) if x == True]
erroneous[record.id] = "Include terms: " + ', '.join(terms_true) + " => " + record.description
logger.info(" %s/%s Invalid terms in record %s" % (current_record,total_sequences, record.id))
logger.debug(" %s/%s Invalid terms in record %s" % (current_record,total_sequences, record.id))
else:
logger.info(" %s/%s Downloading record %s" % (current_record,total_sequences, record.id))
logger.debug(" %s/%s Downloading record %s" % (current_record,total_sequences, record.id))
SeqIO.write(record, output_handle, "fasta")
except:
logger.info(" %s/%s Failed to download %s" % (current_record,total_sequences, record.id))
logger.debug(" %s/%s Failed to download %s" % (current_record,total_sequences, record.id))
erroneous[record.id] = "failed to download"
current_record = current_record + 1

Expand Down
7 changes: 3 additions & 4 deletions plasmidID.sh → plasmidID
Original file line number Diff line number Diff line change
Expand Up @@ -1013,19 +1013,18 @@ done
#obtain list of contigs per plasmid in a separate file
for i in $(cat $reconstruct_fasta | grep ">" | awk 'gsub(">","",$1) {print $1}')
do
cat $output_dir/$group/$sample/data/$sample".plasmids.complete" \
| awk '/'$i'/ && !x[$4]++ {print "_"$4}' > $output_dir/$group/$sample/fasta_files/$i.ac &>> $log_file || error ${LINENO} $(basename $0) "See $output_dir/logs/plasmidID.log for more information.\ncommand:\n cat $output_dir/$group/$sample/data/$sample\".plasmids.complete\" | awk \'/\'$i\'/ && !x[$4]++ {print \"_\"$4}\' > $output_dir/$group/$sample/fasta_files/$i.ac"
cat $output_dir/$group/$sample/data/$sample".plasmids.complete" | awk '/'"${i}"'/ && !x[$4]++ {print "_"$4}' > $output_dir/$group/$sample/fasta_files/$i.ac
done

#Extract fasta from contig file, oe per plasmid
#Extract fasta from contig file, one per plasmid
for i in $(ls $output_dir/$group/$sample/fasta_files/*.ac)
do
if [ -s $i ]; then
filter_fasta.sh -i $output_dir/$group/$sample/data/$sample".fna" -f $i -n $(basename $i .ac) -o $output_dir/$group/$sample/fasta_files &>> $log_file || error ${LINENO} $(basename $0) "See $output_dir/logs/plasmidID.log for more information.\ncommand:\nfilter_fasta.sh -i $output_dir/$group/$sample/data/$sample\".fna\" -f $i -n $(basename $i .ac) -o $output_dir/$group/$sample/fasta_files"
fi
done

#Remove previous
#Remove temp files in fasta_files
for i in $(ls $output_dir/$group/$sample/fasta_files/*.ac)
do
if [ -e $i ]; then
Expand Down
29 changes: 23 additions & 6 deletions plasmidID_191122.yml → plasmidID_200316.ylm
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
name: plasmidID
channels:
- conda-forge
- anaconda
- bioconda
- defaults
dependencies:
Expand All @@ -10,37 +12,42 @@ dependencies:
- blast=2.9.0=pl526h3066fca_4
- bowtie2=2.3.5=py36he860b03_0
- bzip2=1.0.8=h7b6447c_0
- ca-certificates=2019.10.16=0
- ca-certificates=2019.11.27=0
- cairo=1.14.12=h8948797_3
- cd-hit=4.8.1=hdbcaa40_0
- certifi=2019.9.11=py36_0
- certifi=2019.11.28=py36_0
- circos=0.69.8=0
- curl=7.67.0=hbc83047_0
- entrez-direct=11.0=pl526_1
- expat=2.2.6=he6710b0_0
- fontconfig=2.13.0=h9420a91_0
- freetype=2.9.1=h8a8886c_1
- fribidi=1.0.5=h7b6447c_0
- gawk=5.0.1=h7b6447c_0
- giflib=5.1.4=h14c3975_1
- glib=2.63.1=h5a9c865_0
- graphite2=1.3.13=h23475e2_0
- graphviz=2.40.1=h21bd128_2
- gsl=2.5=h294904e_1
- harfbuzz=1.8.8=hffaf4a1_0
- hmmer=3.2.1=he1b5a44_2
- htslib=1.9=h4da6232_3
- icu=58.2=h9c2bf20_1
- infernal=1.1.2=h14c3975_2
- jpeg=9b=h024ee3a_2
- krb5=1.16.1=h173b8e3_7
- krb5=1.16.4=h173b8e3_0
- libblas=3.8.0=14_openblas
- libcblas=3.8.0=14_openblas
- libcurl=7.67.0=h20c2e04_0
- libdb=6.1.26=0
- libdb=6.1.26=he6710b0_0
- libdeflate=1.2=h516909a_1
- libedit=3.1.20181209=hc058e9b_0
- libffi=3.2.1=hd88cf55_4
- libgcc=7.2.0=h69d50b8_2
- libgcc-ng=9.1.0=hdf63c60_0
- libgd=2.2.5=hceca4fd_3
- libgfortran-ng=7.3.0=hdf63c60_0
- libidn=7.45.0=2
- libopenblas=0.3.7=h6e990d7_3
- libpng=1.6.37=hbc83047_0
- libssh2=1.8.2=h1ba5d50_0
- libstdcxx-ng=9.1.0=hdf63c60_0
Expand All @@ -50,6 +57,7 @@ dependencies:
- libxcb=1.13=h1bed415_1
- libxml2=2.9.9=hea5a465_1
- libxslt=1.1.33=h7d1a2b0_0
- mash=2.2.1=h3d38be6_1
- minced=0.4.2=0
- ncurses=6.1=he6710b0_1
- openjdk=8.0.152=h46b5887_1
Expand Down Expand Up @@ -280,7 +288,7 @@ dependencies:
- python=3.6.9=h265db76_0
- readline=7.0=h7b6447c_5
- samtools=1.9=h10a08f8_12
- setuptools=41.6.0=py36_0
- setuptools=44.0.0=py36_0
- spades=3.13.1=0
- sqlite=3.30.1=h7b6447c_0
- tbb=2019.8=hfd86e86_0
Expand All @@ -292,5 +300,14 @@ dependencies:
- xz=5.2.4=h14c3975_4
- zlib=1.2.11=h7b6447c_3
- zstd=1.3.7=h0b5b093_0
- pip:
- joblib==0.14.1
- numpy==1.18.1
- pandas==1.0.1
- python-dateutil==2.8.1
- pytz==2019.3
- scikit-learn==0.22.2.post1
- scipy==1.4.1
- six==1.14.0
prefix: /home/pjsola/.conda/envs/plasmidID

0 comments on commit 715ff1c

Please sign in to comment.