-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcluster_to_GFA.pl
executable file
·96 lines (62 loc) · 1.85 KB
/
cluster_to_GFA.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl
=head1 Name
cluster_to_GFA.pl -- convert cluster format to GFA format
=head1 Description
=head1 Version
Author: Fan Wei, [email protected]
Version: 1.0, Date: 2021/7/23
Note:
=head1 Usage
cluster_to_GFA.pl <ctg_id_len_file> <cluster_file>
--help output help information to screen
=head1 Example
cluster_to_GFA.pl contigs.id.len z.EndHiC.A.results.summary.cluster > z.EndHiC.A.results.summary.cluster.gfa
=cut
use strict;
use Getopt::Long;
use FindBin qw($Bin $Script);
use File::Basename qw(basename dirname);
use Data::Dumper;
##get options from command line into variables and set default values
my ($Verbose,$Help);
GetOptions(
"verbose"=>\$Verbose,
"help"=>\$Help
);
die `pod2text $0` if (@ARGV == 0 || $Help);
my $Contig_used_file = shift;
my $cluster_file = shift;
my %CtgLen;
my %Ctgs;
##get contig id and length data
open IN, $Contig_used_file || die "fail open $Contig_used_file\n";
while (<IN>) {
if(/(\S+)\s+(\d+)/){
$CtgLen{$1} = $2;
}
}
close IN;
##S ptg000007l * LN:i:180134246
##L ptg000003l + ptg000004l - 0M L1:i:0 RM:i:1 Contact:i:2603.00
my $S_out;
my $L_out;
open IN, $cluster_file || die "fail open $cluster_file";
while (<IN>) {
next if(/^\#/ || /Unclustered/);
chomp;
my ($cluster_id, $ctg_num, $cluster_len, $robustness, $ctg_str) = split /\s+/;
my @t = split /;/, $ctg_str;
for (my $i=0; $i<@t; $i++) {
my ($ctgid, $ctgstrand) = ($1,$2) if($t[$i] =~ /(\w+)([+-])/);
my $ctglen = $CtgLen{$ctgid};
$S_out .= "S\t$ctgid\t*\tLN:i:$ctglen\n";
}
for (my $i=0; $i<@t - 1; $i++) {
my ($ctg1id, $ctg1strand) = ($1,$2) if($t[$i] =~ /(\w+)([+-])/);
my ($ctg2id, $ctg2strand) = ($1,$2) if($t[$i+1] =~ /(\w+)([+-])/);
$L_out .= "L\t$ctg1id\t$ctg1strand\t$ctg2id\t$ctg2strand\t0M\n";
}
}
close IN;
print $S_out;
print $L_out;