Skip to content

Commit

Permalink
Merge pull request DRL#82 from douglasgscofield/master
Browse files Browse the repository at this point in the history
refine nodesDB handling and writing of default db
  • Loading branch information
DRL authored Feb 25, 2019
2 parents a14630e + af55ced commit 366f455
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 11 deletions.
26 changes: 19 additions & 7 deletions lib/BtIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,9 @@ def parseNodesDB(**kwargs):
Parsing names.dmp and nodes.dmp into the 'nodes_db' dict of dicts that
gets JSON'ed into blobtools/data/nodes_db.json if this file
does not exist. Nodes_db.json is used if neither "--names" and "--nodes"
nor "--db" is specified.
nor "--db" is specified. If all three are specified and "--db" does not
exist, then write 'nodes_db' to file specified by "--db". If all three
are specified and "--db" exists, error out.
'''
nodesDB = {}
names_f = kwargs['names']
Expand All @@ -624,7 +626,12 @@ def parseNodesDB(**kwargs):
BtLog.error('0', names_f)
if not isfile(nodes_f):
BtLog.error('0', nodes_f)
print BtLog.status_d['3'] % (nodes_f, names_f)
if (nodesDB_f):
if isfile(nodesDB_f):
BtLog.error('47', nodesDB_f)
BtLog.status_d['27'] % (nodesDB_f, nodes_f, names_f)
else:
print BtLog.status_d['3'] % (nodes_f, names_f)
try:
nodesDB = readNamesNodes(names_f, nodes_f)
except:
Expand All @@ -645,11 +652,17 @@ def parseNodesDB(**kwargs):
nodesDB = readNodesDB(nodesDB_default)
except:
BtLog.error('27', nodesDB_default)
nodesDB_f = nodesDB_default

# Write nodesDB if not available
if not isfile(nodesDB_default):
writeNodesDB(nodesDB, nodesDB_default)
# Write nodesDB if names, nodes, nodesDB all given and nodesDB does not
# exist. Otherwise, write to nodesDB_default if it does not exist, unless
# nodesDB given, then do nothing with nodesDB_default.
if (nodes_f and names_f and nodesDB_f):
print BtLog.status_d['28'] % nodesDB_f
writeNodesDB(nodesDB, nodesDB_f)
elif (not nodesDB_f and not isfile(nodesDB_default)):
nodesDB_f = nodesDB_default
print BtLog.status_d['5'] % nodesDB_f
writeNodesDB(nodesDB, nodesDB_f)

return nodesDB, nodesDB_f

Expand Down Expand Up @@ -691,7 +704,6 @@ def readNodesDB(nodesDB_f):
return nodesDB

def writeNodesDB(nodesDB, nodesDB_f):
print BtLog.status_d['5'] % nodesDB_f
nodes_count = nodesDB['nodes_count']
i = 0
with open(nodesDB_f, 'w') as fh:
Expand Down
8 changes: 5 additions & 3 deletions lib/BtLog.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def progress(iteration, steps, max_value, no_limit=False):
'43': '[ERROR:43] : %s could not be found.',
'44': '[ERROR:44] : Please specify integers for --map_col_sseqid and --map_col_taxid.',
'45': '[ERROR:45] : Both --min_score and --min_diff must be numbers.',
'46': '[ERROR:46] : Score in %s must be a float, not \'%s\'.'

'46': '[ERROR:46] : Score in %s must be a float, not \'%s\'.',
'47': '[ERROR:47] : Cannot create new "--db" file from "--names", "--nodes", "--db" file exists. %s'
}

warn_d = {
Expand Down Expand Up @@ -125,7 +125,9 @@ def progress(iteration, steps, max_value, no_limit=False):
'23': '[+] Filtered %s (pairs=%s) ...',
'24': '[+] Writing %s',
'25': '[+] Gzip\'ing %s',
'26': '[+] Reading %s'
'26': '[+] Reading %s',
'27': '[+] Creating nodesDB %s from %s and %s',
'28': '[+] Store nodesDB in %s',
}

info_d = {
Expand Down
3 changes: 2 additions & 1 deletion lib/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
taxonomies have equal scores (otherwise "unresolved") [default: False]
--nodes <NODES> NCBI nodes.dmp file. Not required if '--db'
--names <NAMES> NCBI names.dmp file. Not required if '--db'
--db <NODESDB> NodesDB file (default: $BLOBTOOLS/data/nodesDB.txt).
--db <NODESDB> NodesDB file (default: $BLOBTOOLS/data/nodesDB.txt). If --nodes, --names and --db
are all given and NODESDB does not exist, create it from NODES and NAMES.
-b, --bam <BAM>... BAM file(s), can be specified multiple times
-s, --sam <SAM>... SAM file(s), can be specified multiple times
-a, --cas <CAS>... CAS file(s) (requires clc_mapping_info in $PATH), can be specified multiple times
Expand Down

0 comments on commit 366f455

Please sign in to comment.