From c930476139aa3bb0734c4289d1baf0e1fabe20d0 Mon Sep 17 00:00:00 2001 From: changchang cao <34830720+caochch@users.noreply.github.com> Date: Fri, 30 Aug 2019 16:15:03 +0800 Subject: [PATCH] Add files via upload --- .../interaction_from_loop_to_JuiceBox.pl | 17 +++ 2.construct_hicFiles_for_rRNA/sam_to_loops.pl | 110 ++++++++++++++++++ 2.construct_hicFiles_for_rRNA/work.sh | 4 + 3 files changed, 131 insertions(+) create mode 100644 2.construct_hicFiles_for_rRNA/interaction_from_loop_to_JuiceBox.pl create mode 100644 2.construct_hicFiles_for_rRNA/sam_to_loops.pl create mode 100644 2.construct_hicFiles_for_rRNA/work.sh diff --git a/2.construct_hicFiles_for_rRNA/interaction_from_loop_to_JuiceBox.pl b/2.construct_hicFiles_for_rRNA/interaction_from_loop_to_JuiceBox.pl new file mode 100644 index 0000000..8b58569 --- /dev/null +++ b/2.construct_hicFiles_for_rRNA/interaction_from_loop_to_JuiceBox.pl @@ -0,0 +1,17 @@ +#!/usr/bin/perl +die "perl $0 rRNA.loops want_hs\n" if(@ARGV != 1); +my $in_rRNA_loops=shift; + +my $read_id; +my %matrix; +open(IN,$in_rRNA_loops) || die; +while(my $line=){ + chomp $line; + my @sub=split/\s+/,$line; + $read_id++; + my $loci_one=$sub[1]; + my $loci_two=$sub[4]; + print $read_id,"\t",16,"\t$sub[0]\t",$loci_one,"\t0\t"; + print 16,"\t$sub[3]\t",$loci_two,"\t1\t50\t50\n"; +} + diff --git a/2.construct_hicFiles_for_rRNA/sam_to_loops.pl b/2.construct_hicFiles_for_rRNA/sam_to_loops.pl new file mode 100644 index 0000000..edf1d8a --- /dev/null +++ b/2.construct_hicFiles_for_rRNA/sam_to_loops.pl @@ -0,0 +1,110 @@ +#!/usr/bin/perl +die "perl $0 pcp_rep1_interaction.sam want_chr want_start want_end> xxx.bedpe\n" if(@ARGV != 4); +my $contact_sam=shift; +my $want_chr=shift; +my $want_start=shift; +my $want_end=shift; + +my %unique; +my %gap_len; +open(CS,$contact_sam) || die; +while(my $frag_a=){ + if($frag_a=~/^@/){ + next; + } + my $frag_b=; + my @sub_a=split/\s+/,$frag_a; + my @sub_b=split/\s+/,$frag_b; + my @id_info_a=split/_/,$sub_a[0]; + my @id_info_b=split/_/,$sub_b[0]; + + if($id_info_a[0]."_".$id_info_a[1] ne $id_info_b[0]."_".$id_info_b[1]){ + die "wrong format\n"; + } + else{#same read name + my $chr_a=$sub_a[2]; + my $loci_a=$sub_a[3]; + my $cigar_a=$sub_a[5]; + $cigar_a=~/(\d+)M/; + my $match_a=$1; + my $end_a=$loci_a+$match_a-1; + $sub_a[1] = $sub_a[1] > 255 ? $sub_a[1]-256 : $sub_a[1]; + + my $chr_b=$sub_b[2]; + my $loci_b=$sub_b[3]; + my $cigar_b=$sub_b[5]; + $cigar_b=~/(\d+)M/; + my $match_b=$1; + my $end_b=$loci_b+$match_b-1; + $sub_b[1] = $sub_b[1] > 255 ? $sub_b[1]-256 : $sub_b[1]; + + if($id_info_a[2] ne $id_info_b[2] or $id_info_a[2] ne "Plus"){ #same reads; same RNA; same strand + next; + } + + if($chr_a ne $want_chr or $chr_b ne $want_chr){ + next; + } + + if($sub_a[0] =~ /^AlignPair/){ + next; + my @two=($loci_a+$match_a,$loci_b); + if($two[0] < $two[1]){ + #3'readsAt5' + #print $want_chr,"\t",$two[0],"\t",$two[0]+1,"\t"; + #print $want_chr,"\t",$two[1],"\t",$two[1]+1,"\n"; + } + else{ + #normal but > 600 + print $want_chr,"\t",$two[1],"\t",$two[1]+1,"\t"; + print $want_chr,"\t",$two[0],"\t",$two[0]+1,"\n"; + } + } + elsif($sub_a[0] =~ /^Part/){ + #next; + if($end_a >= $loci_b){ #impossible situation + print $frag_a,$frag_b; + die "this Part-pair is incorrect\n"; + } + my $tmp_gap_len=$loci_b-$end_a-1; + $gap_len{$tmp_gap_len}++; + #if($tmp_gap_len < 5){ + # next; + #} + print $want_chr,"\t",$end_a,"\t",$end_a+1,"\t"; + print $want_chr,"\t",$loci_b-1,"\t",$loci_b,"\n"; + } + elsif($sub_a[0] =~ /^Chimeric/){ + #next; + if($sub_a[1] eq "0"){ + if($end_b > $loci_a){ #maybe some Bugs in chimeric read sets; only a little, so filtered. + #print $frag_a,$frag_b; + next; + } + print $want_chr,"\t",$loci_b,"\t",$loci_b+1,"\t"; + print $want_chr,"\t",$end_a,"\t",$end_a+1,"\n"; + } + elsif($sub_a[1] eq "16"){ + if($end_a > $loci_b){ #maybe some Bugs in chimeric read sets; only a little, so filtered. + #print $frag_a,$frag_b; + next; + } + print $want_chr,"\t",$loci_a,"\t",$loci_a+1,"\t"; + print $want_chr,"\t",$end_b,"\t",$end_b+1,"\n"; + } + else{ + print $frag_a,$frag_b; + die "wrong strand\n"; + } + + } + else{ + print $frag_a,$frag_b; + die "wrong reads class\n"; + } + } +} + +#foreach (sort {$a<=>$b} keys %gap_len){ +# print STDERR $_,"\t",$gap_len{$_},"\n"; +#} diff --git a/2.construct_hicFiles_for_rRNA/work.sh b/2.construct_hicFiles_for_rRNA/work.sh new file mode 100644 index 0000000..9373210 --- /dev/null +++ b/2.construct_hicFiles_for_rRNA/work.sh @@ -0,0 +1,4 @@ +perl sam_to_loops.pl pairTag.sam rRNA 0 20000 > rRNAloops +perl interaction_from_loop_to_JuiceBox.pl rRNA.loops > rRNA.list +cat rRNA.list | sort -k3,3 -k7,7 > rRNA.sort.list +java -jar juicebox_tools.jar pre -r 1,2,5,10,20,50 rRNA.sort.list rRNA.hic hs.size \ No newline at end of file