Skip to content

Commit

Permalink
Remove --spades_tmp_dir (not needed with --spades_options)
Browse files Browse the repository at this point in the history
  • Loading branch information
rrwick committed Jan 20, 2022
1 parent 9d7f061 commit 550c082
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 35 deletions.
44 changes: 44 additions & 0 deletions test/test_spades_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,47 @@ def test_bad_fastq_2(self):
test_fastq = os.path.join(os.path.dirname(__file__), 'test_bad_reads_2.fastq')
with self.assertRaises(unicycler.spades_func.BadFastq):
unicycler.spades_func.get_read_count(test_fastq)

def test_build_spades_command_1(self):
command = unicycler.spades_func.build_spades_command('spades.py', 'out', 16, [21, 31, 41],
0, '1.fq.gz', '2.fq.gz', None, True,
False, None)
self.assertEqual(command, ['spades.py', '-o', 'out', '-k', '21', '--threads', '16',
'--isolate', '-1', '1.fq.gz', '-2', '2.fq.gz', '-m', '1024'])

def test_build_spades_command_2(self):
command = unicycler.spades_func.build_spades_command('spades.py', 'out', 16, [21, 31, 41],
1, '1.fq.gz', '2.fq.gz', None, True,
False, None)
self.assertEqual(command, ['spades.py', '-o', 'out', '-k', '21,31', '--threads', '16',
'--restart-from', 'k21', '-m', '1024'])

def test_build_spades_command_3(self):
command = unicycler.spades_func.build_spades_command('spades.py', 'out', 16, [21, 31, 41],
2, '1.fq.gz', '2.fq.gz', None, True,
False, None)
self.assertEqual(command, ['spades.py', '-o', 'out', '-k', '21,31,41', '--threads', '16',
'--restart-from', 'k31', '-m', '1024'])

def test_build_spades_command_4(self):
command = unicycler.spades_func.build_spades_command('spades.py', 'out', 16, [21, 31, 41],
0, None, None, 's.fq.gz', False, True,
None)
self.assertEqual(command, ['spades.py', '-o', 'out', '-k', '21', '--threads', '16',
'--isolate', '-s', 's.fq.gz', '-m', '1024'])

def test_build_spades_command_5(self):
command = unicycler.spades_func.build_spades_command('spades.py', 'out', 16, [21, 31, 41],
0, '1.fq.gz', '2.fq.gz', None, True,
False, '-m 123')
self.assertEqual(command, ['spades.py', '-o', 'out', '-k', '21', '--threads', '16',
'--isolate', '-1', '1.fq.gz', '-2', '2.fq.gz', '-m', '123'])

def test_build_spades_command_6(self):
command = unicycler.spades_func.build_spades_command('spades.py', 'out', 16, [21, 31, 41],
0, '1.fq.gz', '2.fq.gz', None, True,
False, '--tmp-dir abc')
self.assertEqual(command, ['spades.py', '-o', 'out', '-k', '21', '--threads', '16',
'--isolate', '-1', '1.fq.gz', '-2', '2.fq.gz', '--tmp-dir',
'abc', '-m', '1024'])

56 changes: 31 additions & 25 deletions unicycler/spades_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class BadFastq(Exception):

def get_best_spades_graph(short1, short2, short_unpaired, out_dir, read_depth_filter, verbosity,
spades_path, threads, keep, kmer_count, min_k_frac, max_k_frac, kmers,
expected_linear_seqs, spades_tmp_dir, largest_component,
spades_graph_prefix, spades_options):
expected_linear_seqs, largest_component, spades_graph_prefix,
spades_options):
"""
This function tries a SPAdes assembly at different k-mers and returns the best one.
"""
Expand Down Expand Up @@ -61,7 +61,7 @@ def get_best_spades_graph(short1, short2, short_unpaired, out_dir, read_depth_fi

graph_files, insert_size_mean, insert_size_deviation = \
run_spades_all_kmers(reads, spades_dir, kmer_range, threads, spades_path,
spades_tmp_dir, spades_graph_prefix, spades_options)
spades_graph_prefix, spades_options)

existing_graph_files = [x for x in graph_files if x is not None]
if not existing_graph_files:
Expand Down Expand Up @@ -157,15 +157,12 @@ def get_best_spades_graph(short1, short2, short_unpaired, out_dir, read_depth_fi
if keep < 3 and os.path.isdir(spades_dir):
log.log('Deleting ' + spades_dir + '/')
shutil.rmtree(spades_dir, ignore_errors=True)
if keep < 3 and spades_tmp_dir is not None and os.path.isdir(spades_tmp_dir):
log.log('Deleting ' + spades_tmp_dir + '/')
shutil.rmtree(spades_tmp_dir, ignore_errors=True)

return assembly_graph


def run_spades_all_kmers(read_files, spades_dir, kmers, threads, spades_path, tmp_dir,
spades_graph_prefix, spades_options):
def run_spades_all_kmers(read_files, spades_dir, kmers, threads, spades_path, spades_graph_prefix,
spades_options):
"""
SPAdes is run with all k-mers up to the top one. For example:
* round 1: 25
Expand All @@ -183,24 +180,11 @@ def run_spades_all_kmers(read_files, spades_dir, kmers, threads, spades_path, tm

graph_files, insert_size_means, insert_size_deviations = [], [], []
for i in range(len(kmers)):
kmer_string = ','.join([str(x) for x in kmers[:i+1]])
biggest_kmer = kmers[i]
command = [spades_path, '-o', spades_dir, '-k', kmer_string, '--threads', str(threads),
'-m', '1024']
if tmp_dir is not None:
command += ['--tmp-dir', tmp_dir]
if i == 0: # first k-mer
command += ['--isolate']
if using_paired_reads:
command += ['-1', short1, '-2', short2]
if using_unpaired_reads:
command += ['-s', unpaired]
else: # subsequent k-mer
previous_k = kmers[i-1]
command += ['--restart-from', f'k{previous_k}']
if spades_options:
command += spades_options.split()

command = build_spades_command(spades_path, spades_dir, threads, kmers, i, short1, short2,
unpaired, using_paired_reads, using_unpaired_reads,
spades_options)
log.log(' '.join(command))
graph_file, insert_size_mean, insert_size_deviation = \
run_spades_one_kmer(command, spades_dir, biggest_kmer)

Expand All @@ -209,6 +193,7 @@ def run_spades_all_kmers(read_files, spades_dir, kmers, threads, spades_path, tm
graph_files.append(copy_path)
insert_size_means.append(insert_size_mean)
insert_size_deviations.append(insert_size_deviation)
log.log('')

insert_size_means = [x for x in insert_size_means if x is not None]
insert_size_deviations = [x for x in insert_size_deviations if x is not None]
Expand All @@ -232,6 +217,27 @@ def run_spades_all_kmers(read_files, spades_dir, kmers, threads, spades_path, tm
return graph_files, insert_size_mean, insert_size_deviation


def build_spades_command(spades_path, spades_dir, threads, kmers, i, short1, short2, unpaired,
using_paired_reads, using_unpaired_reads, spades_options):
kmer_string = ','.join([str(x) for x in kmers[:i+1]])

command = [spades_path, '-o', spades_dir, '-k', kmer_string, '--threads', str(threads)]
if i == 0: # first k-mer
command += ['--isolate']
if using_paired_reads:
command += ['-1', short1, '-2', short2]
if using_unpaired_reads:
command += ['-s', unpaired]
else: # subsequent k-mer
previous_k = kmers[i - 1]
command += ['--restart-from', f'k{previous_k}']
if spades_options:
command += spades_options.split()
if not spades_options or '-m' not in spades_options.split():
command += ['-m', '1024']
return command


def run_spades_one_kmer(command, spades_dir, biggest_kmer):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

Expand Down
12 changes: 2 additions & 10 deletions unicycler/unicycler.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,8 @@ def main():
args.depth_filter, args.verbosity,
args.spades_path, args.threads, args.keep,
args.kmer_count, args.min_kmer_frac, args.max_kmer_frac,
args.kmers, args.linear_seqs, args.spades_tmp_dir,
args.largest_component, spades_graph_prefix,
args.spades_options)
args.kmers, args.linear_seqs, args.largest_component,
spades_graph_prefix, args.spades_options)
determine_copy_depth(graph)
if args.keep > 0 and not os.path.isfile(best_spades_graph):
graph.save_to_gfa(best_spades_graph, save_copy_depth_info=True, newline=True,
Expand Down Expand Up @@ -320,11 +319,6 @@ def get_arguments():
help='Only keep the largest connected component of the assembly '
'graph (default: keep all connected components)'
if show_all_args else argparse.SUPPRESS)
spades_group.add_argument('--spades_tmp_dir', type=str, default=None,
help='Specify SPAdes temporary directory using the SPAdes --tmp-dir '
'option (default: make a temporary directory in the output '
'directory)'
if show_all_args else argparse.SUPPRESS)
spades_group.add_argument('--spades_options', type=str, default=None,
help='Additional options to be given to SPAdes (example: '
'"--phred-offset 33", default: no additional options)'
Expand Down Expand Up @@ -487,8 +481,6 @@ def get_arguments():
args.unpaired = os.path.abspath(args.unpaired)
if args.long:
args.long = os.path.abspath(args.long)
if args.spades_tmp_dir:
args.spades_tmp_dir = os.path.abspath(args.spades_tmp_dir)

# Create an initial logger which doesn't have an output file.
log.logger = log.Log(None, args.verbosity)
Expand Down

0 comments on commit 550c082

Please sign in to comment.