forked from PASApipeline/PASApipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOverlap_assembler.pm
executable file
·119 lines (101 loc) · 2.86 KB
/
Overlap_assembler.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package main;
our $SEE = 0;
package CDNA::Overlap_assembler;
use strict;
sub new {
my $packagename = shift;
my $self = {
node_list => []
};
bless ($self, $packagename);
return ($self);
}
sub add_cDNA {
my $self = shift;
my ($cdna_acc, $end5, $end3) = @_;
my ($cdna_start, $cdna_stop) = sort {$a<=>$b} ($end5, $end3);
my $node = Cdna_node->new($cdna_acc, $cdna_start, $cdna_stop);
push (@{$self->{node_list}}, $node);
}
####
sub build_clusters {
my $self = shift;
my $node_list_aref = $self->{node_list};
@{$node_list_aref} = sort {$a->{lend}<=>$b->{lend}} @{$node_list_aref}; #sort by lend coord.
## set indices
for (my $i = 0; $i <= $#{$node_list_aref}; $i++) {
$node_list_aref->[$i]->{myIndex} = $i;
}
my @clusters;
my $first_node = $node_list_aref->[0];
my $start_pos = 0;
my ($exp_left, $exp_right) = ($first_node->{lend}, $first_node->{rend});
print $first_node->{acc} . " ($exp_left, $exp_right)\n" if $SEE;
for (my $i = 1; $i <= $#{$node_list_aref}; $i++) {
my $curr_node = $node_list_aref->[$i];
my ($lend, $rend) = ($curr_node->{lend}, $curr_node->{rend});
print $curr_node->{acc} . " ($lend, $rend)\n" if $SEE;
if ($exp_left <= $rend && $exp_right >= $lend) { #overlap
$exp_left = &min($exp_left, $lend);
$exp_right = &max($exp_right, $rend);
print "overlap. New expanded coords: ($exp_left, $exp_right)\n" if $SEE;
} else {
print "No overlap; Creating cluster: " if $SEE;
my @cluster;
for (my $j=$start_pos; $j < $i; $j++) {
my $acc = $node_list_aref->[$j]->{acc};
push (@cluster, $acc);
print "$acc, " if $SEE;
}
push (@clusters, [@cluster]);
$start_pos = $i;
($exp_left, $exp_right) = ($lend, $rend);
print "\nResetting expanded coords: ($lend, $rend)\n" if $SEE;
}
}
print "# Adding final cluster.\n" if $SEE;
if ($start_pos != $#{$node_list_aref}) {
print "final cluster: " if $SEE;
my @cluster;
for (my $j = $start_pos; $j <= $#{$node_list_aref}; $j++) {
my $acc = $node_list_aref->[$j]->{acc};
print "$acc, " if $SEE;
push (@cluster, $acc);
}
push (@clusters, [@cluster]);
print "\n" if $SEE;
} else {
my $acc = $node_list_aref->[$start_pos]->{acc};
push (@clusters, [$acc]);
print "adding final $acc.\n" if $SEE;
}
return (@clusters);
}
sub min {
my (@x) = @_;
@x = sort {$a<=>$b} @x;
my $min = shift @x;
return ($min);
}
sub max {
my @x = @_;
@x = sort {$a<=>$b} @x;
my $max = pop @x;
return ($max);
}
#################################################################
package Cdna_node;
use strict;
sub new {
my $packagename = shift;
my ($acc, $lend, $rend) = @_;
my $self = { acc=>$acc,
lend=>$lend,
rend=>$rend,
myIndex=>undef(),
overlapping_indices=>[]
};
bless ($self, $packagename);
return ($self);
}
1; #EOM