Skip to content

Commit

Permalink
second round of debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
malonge committed May 7, 2021
1 parent f7b1d0d commit 1358544
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 48 deletions.
32 changes: 1 addition & 31 deletions ragtag_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,14 @@
import argparse

import pysam
import numpy as np
import networkx as nx

from ragtag_utilities.utilities import log, run_oae, get_ragtag_version
from ragtag_utilities.AlignmentReader import PAFReader
from ragtag_utilities.ContigAlignment import ContigAlignment
from ragtag_utilities.AGPFile import AGPFile
from ragtag_utilities.Aligner import Minimap2Aligner
from ragtag_utilities.Aligner import UnimapAligner
from ragtag_utilities.Aligner import NucmerAligner
from ragtag_utilities.ScaffoldGraph import ScaffoldGraphBase
from ragtag_utilities.ScaffoldGraph import AGPMultiScaffoldGraph
from ragtag_utilities.ScaffoldGraph import MultiScaffoldGraph
from ragtag_utilities.ScaffoldGraph import PatchScaffoldGraph
from ragtag_utilities.ScaffoldGraph import Alignment

Expand Down Expand Up @@ -506,31 +501,6 @@ def main():


if __name__ == "__main__":
"""
- Make a new scaffold graph. It will be directed, and for each edge, there must exist its opposing edge (from <=> to).
- Each edge should have info about its connecting sequences, as well as the patch sequence
-- The patch sequence could be a gap
- The graph should have a method to output the implied AGP file
- It should also have a method for getting a matching (uses a undirected copy of the graph to get pairs of nodes defining the matching)
- For ragtag patch, make two of these graphs
-- A) for contigs that make up scaffolds. edges are known gaps
-- B) is for adjacencies implied by alignments + any edges from A not implied by alignments
If fill only:
Make a new empty copy graph. For each (undirected) edge in A, if edge in B, add to new copy. Replace B with new copy.
If join only:
for each (undirected) edge in A, if edge in B, replace its info with gap info (info from A)
- The edges for the graph will have two pieces of data - a weight and an Alignment object (maybe a list of them)
- The alignment object will contain all the necessary info to output the AGP.
"""
main()


28 changes: 16 additions & 12 deletions ragtag_utilities/ScaffoldGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,11 +844,12 @@ def write_agp(self, agp_fn, ref_fn, add_suffix_to_unplaced=False):
assert u_degree in {2, 4}

# Check if we have found a starting target sequence
if u_degree == 1:
if u_degree == 2:
cur_ref = u_base
from_node = u
to_node = v
used_edges.add((u, v))
used_edges.add((v, u))
break

# If we haven't found a new starting target sequence, we are done
Expand Down Expand Up @@ -886,7 +887,7 @@ def write_agp(self, agp_fn, ref_fn, add_suffix_to_unplaced=False):
if patch_aln.is_gap:
agp.add_gap_line(obj_header, obj_pos+1, obj_pos+patch_len, obj_pid, "N", patch_len, "scaffold", "yes", "align_genus")
else:
agp.add_seq_line(obj_header, obj_pos+1, obj_pos+patch_len, obj_pid, "W", patch_query, patch_aln.my_query_end, patch_aln.their_query_start, patch_strand)
agp.add_seq_line(obj_header, obj_pos+1, obj_pos+patch_len, obj_pid, "W", patch_query, patch_aln.my_query_end+1, patch_aln.their_query_start, patch_strand)
used_components.add(patch_query)
obj_pos += patch_len
obj_pid += 1
Expand All @@ -898,19 +899,22 @@ def write_agp(self, agp_fn, ref_fn, add_suffix_to_unplaced=False):
cur_ref_strand = "+"
if to_node.endswith("_e"):
cur_ref_strand = "-"
agp.add_seq_line(obj_header, obj_pos+1+comp_start, obj_pos+cur_ref_len, obj_pid, "W", cur_ref, 1+comp_start, cur_ref_len, cur_ref_strand)
obj_pos += cur_ref_len
agp.add_seq_line(obj_header, obj_pos+1, obj_pos+(cur_ref_len + comp_start), obj_pid, "W", cur_ref, 1+(-1*comp_start), cur_ref_len, cur_ref_strand)
obj_pos += cur_ref_len + comp_start
obj_pid += 1
used_components.add(cur_ref)

from_node = to_node
next_nodes = set(self.graph[from_node])
visited_nodes = set([i + "_b" for i in used_components]).union(set([i + "_e" for i in used_components]))
remaining_nodes = next_nodes - visited_nodes
assert len(remaining_nodes) in {0, 1}

if remaining_nodes:
to_node = remaining_nodes.pop
# Look for the next edge
from_node = to_node[:-2] + "_b"
if to_node.endswith("_b"):
from_node = to_node[:-2] + "_e"

if from_node in self.graph.nodes:
next_nodes = set(self.graph[from_node])
assert len(next_nodes) == 1
to_node = next_nodes.pop()
used_edges.add((from_node, to_node))
used_edges.add((to_node, from_node))
else:
next_edge_exists = False

Expand Down
5 changes: 0 additions & 5 deletions tests/integration_tests/scripts/sim_target_asm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@
f.write(fai.fetch("Chr1", 1000500, 5000000) + "\n")

# Break the second sequence into 5 sequences
# The first should be reverse complemented
# The second should be overlapping the first, + strand
# The third should be reverse complemented
# The fourth should be reverse complemnted, with some missing sequence between it and the third
# The fifth should be + strand
f.write(">r2a\n")
f.write(fai.fetch("Chr2", 0, 1000000) + "\n")

Expand Down

0 comments on commit 1358544

Please sign in to comment.