Skip to content

Commit

Permalink
daily commit of any changes to all scritps
Browse files Browse the repository at this point in the history
  • Loading branch information
srobb1 committed Jun 20, 2014
1 parent 682e116 commit 9315d9b
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 168 deletions.
104 changes: 51 additions & 53 deletions bisulfite/convert_extracted_methylation_2_wig.pl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
my @path = split /\//, $file;
my $file_name = pop @path;


open FASTA, $fasta or die "Can't open $fasta";
my %seq;
my $id;
Expand Down Expand Up @@ -60,7 +59,6 @@
$seq{$id}{met}{$pos}='CHG';
}
}

for (my $i = 0 ; $i < @triplets_3 ; $i++){
my $trip = $triplets_3[$i];
my $pos = ($i * 3) + 3;
Expand All @@ -87,9 +85,6 @@
}




#my ( $type, $sample ) = $file_name =~ /^(CpG|CHH|CHG)_\w{2,4}_(\w{2,5})\.\2/;
#CHH_OB_RIL16_ping.RIL16_ping_p1.fq_bismark_bt2_pe.txt
#CHG_RIL16_ping.bismarkextract.out
my ( $type, $sample );
Expand All @@ -102,62 +97,65 @@
$type = $1;
$sample = $2;
}

if ( !-e "$file.sorted" and $file !~ /sorted$/ ) {
system("sort -k3 $file | grep -v Bismark > $file.sorted");
$file = "$file.sorted";
}
print
"track type=wiggle_0 name=\"$sample.$type\" description=\"$type in $sample\"\n";
open IN, $file or die "Can't open $file for reading $!\n";
my $last = '';
while ( my $line = <IN> ) {
chomp $line;
next if $line =~ /^Bismark methylation extractor version/;
my ( $read, $strand, $ref, $pos, $methy ) = split /\t/, $line;

#next unless uc($methy) eq $methy;
$strand = 0;
$ref = ucfirst $ref if $ref =~ /^Chr/i;
$methy{$sample}{$ref}{$pos}{$type}{$methy}++;

#print "$pos\n";
}
print
"track type=wiggle_0 name=\"$sample.$type\" description=\"$type in $sample\"\n";
foreach my $sample ( keys %methy ) {
foreach my $ref ( keys %{ $methy{$sample} } ) {
foreach my $pos ( sort { $a <=> $b } keys %{ $methy{$sample}{$ref} } ) {
$i++;

foreach my $type ( keys %{ $methy{$sample}{$ref}{$pos} } ) {
my @methy_codes =
keys %{ $methy{$sample}{$ref}{$pos}{$type} };

my $code = $methy_codes[0];
my $big = uc $code;
my $little = lc $code;
my ( $methy_yes, $methy_no ) = ( 0, 0 );
if ( exists $methy{$sample}{$ref}{$pos}{$type}{$big} ) {
$methy_yes = $methy{$sample}{$ref}{$pos}{$type}{$big};
}
if ( exists $methy{$sample}{$ref}{$pos}{$type}{$little} ) {
$methy_no = $methy{$sample}{$ref}{$pos}{$type}{$little};
}

#if ($methy_yes) {
my $total = $methy_yes + $methy_no;
my $percent = ( $methy_yes / ($total) ) * 100;
if ( $percent > 0 and $percent < 0.1 ) {
$percent = 0.1;
if ( ( $ref ne $last and $last ne '' ) or eof(IN) ) {

#foreach my $sample ( keys %methy ) {
#foreach my $ref ( keys %{ $methy{$sample} } ) {
foreach my $pos ( sort { $a <=> $b } keys %{ $methy{$sample}{$last} } ) {
$i++;

foreach my $type ( keys %{ $methy{$sample}{$last}{$pos} } ) {
my @methy_codes =
keys %{ $methy{$sample}{$last}{$pos}{$type} };

my $code = $methy_codes[0];
my $big = uc $code;
my $little = lc $code;
my ( $methy_yes, $methy_no ) = ( 0, 0 );
if ( exists $methy{$sample}{$last}{$pos}{$type}{$big} ) {
$methy_yes = $methy{$sample}{$last}{$pos}{$type}{$big};
}
if ( exists $methy{$sample}{$last}{$pos}{$type}{$little} ) {
$methy_no = $methy{$sample}{$last}{$pos}{$type}{$little};
}

my $total = $methy_yes + $methy_no;
my $percent = ( $methy_yes / ($total) ) * 100;
if ( $percent > 0 and $percent < 0.1 ) {
$percent = 0.1;
}
if ( $percent == 0 ) {
$percent = -100;
}
my $pretty_percent = sprintf( '%.1f', $percent );
my $s = $pos - 1;
my $nt = uc(${$seq{ucfirst $ref}{seq}}[$s]);
my $met_type = $seq{ucfirst $ref}{met}{$pos};
print "$last\t$s\t$pos\t$pretty_percent\n" if uc $nt eq 'C' and $met_type eq $type;;

}
}
if ( $percent == 0 ) {
$percent = -100;
}
my $pretty_percent = sprintf( '%.1f', $percent );
my $s = $pos - 1;
my $nt = uc(${$seq{ucfirst $ref}{seq}}[$s]);
my $met_type = $seq{ucfirst $ref}{met}{$pos};
print "$ref\t$s\t$pos\t$pretty_percent\n" if uc $nt eq 'C' and $met_type eq $type;;

#"$ref\t$sample.$type\tmethylated_cytosine\t$pos\t$pos\t$pretty_percent\t$strand\t.\tID=$sample.$ref$strand.$pos.$type.$i;Name=$ref.$pos.$type;Note=$type $big:$methy_yes $little:$methy_no total:$total percent:$pretty_percent;sample=$sample;\n";
#}
}

delete $methy{$sample}{$last};
#print "Deleting $last\n";
$last = $ref;
#}
}
#}
} elsif ( $last eq '' ) {
$last = $ref;
}
}

71 changes: 41 additions & 30 deletions bisulfite/split_methyl_many_files.pl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

my $methyl_file = shift;
my $fasta = shift;
my $check_seq = defined $fasta ? 1 : 0;
my %methy;

open FASTA, $fasta or die "Can't open $fasta";
Expand All @@ -18,48 +17,60 @@
push @{$seq{ucfirst($id)}} , (split ('', $line));
}
}

if ( !-e "$methyl_file.sorted" and $methyl_file !~ /sorted$/ ) {
system("sort -k3 $methyl_file | grep -v Bismark > $methyl_file.sorted");
$methyl_file = "$methyl_file.sorted";
}
open IN, $methyl_file or die "Can't open $methyl_file";
##CHH_RIL16.bismarkextct.out
##CHG_RIL16.bismarkextract.out
my ($type,$strain) = $methyl_file =~ /(C..)\_(.+)\.bismarkext/;
my ( $type, $strain ) = $methyl_file =~ /(C..)\_(.+)\.bismarkext/;
if ( !-d "split_methyl" ) {
mkdir "split_methyl";
}
my $last = '';
while ( my $line = <IN> ) {
chomp $line;
next if $line =~ /Bismark methylation extractor/;
#DGGXHXP1:377:C2K44ACXX:8:1101:2017:2215_1:N:0:CCGTCC + Chr5 11943414 Z
my ( $read, $strand, $ref, $pos, $methy ) = split /\t/, $line;

my ( $read, $strand, $ref, $pos, $methy ) = split /\t/, $line;
##A119 CpG DGGXHXP1:377:C2K44ACXX:8:1101:2017:2215_1:N:0:CCGTCC + Chr5 11943414 Z
##my ( $strain, $type, $read, $strand, $ref, $pos, $methy ) = split /\t/, $line;
$ref = ucfirst $ref if $ref =~ /^Chr/i;
$methy{$strain}{$ref}{$type}{$pos}{$methy}++;

$methy{$strain}{ucfirst($ref)}{$type}{$pos}{$methy}++;
if ( ( $ref ne $last and $last ne '' ) or eof(IN) ) {

}
if (!-d "split_methyl"){
mkdir "split_methyl";
}
foreach my $strain ( keys %methy ) {
foreach my $ref ( keys %{ $methy{$strain} } ) {
foreach my $type ( keys %{ $methy{$strain}{$ref} } ) {
open OUT, ">", "split_methyl/${strain}_${ref}_${type}.split.txt" or die "can't write to ${strain}_${ref}_${type}.split.txt";
foreach my $pos ( sort {$a<=>$b} keys %{ $methy{$strain}{$ref}{$type} } ) {
my ( $methyl_yes, $methyl_no ) = ( 0, 0 );
foreach my $code ( keys %{ $methy{$strain}{$ref}{$type}{$pos} } ) {
my $count = $methy{$strain}{$ref}{$type}{$pos}{$code};
if ( $code eq uc $code ) {
$methyl_yes = $count;
} else {
$methyl_no = $count;
foreach my $strain ( keys %methy ) {
#foreach my $ref ( keys %{ $methy{$strain} } ) {
foreach my $type ( keys %{ $methy{$strain}{$last} } ) {
open OUT, ">", "split_methyl/${strain}_${last}_${type}.split.txt"
or die "can't write to ${strain}_${last}_${type}.split.txt";
foreach my $pos (
sort { $a <=> $b }
keys %{ $methy{$strain}{$last}{$type} } )
{
my ( $methyl_yes, $methyl_no ) = ( 0, 0 );
foreach my $code ( keys %{ $methy{$strain}{$last}{$type}{$pos} } ) {
my $count = $methy{$strain}{$last}{$type}{$pos}{$code};
if ( $code eq uc $code ) {
$methyl_yes = $count;
} else {
$methyl_no = $count;
}
}
my $nt = uc(${$seq{$ref}}[$pos -1]);
print OUT join( "\t", ( $pos, $methyl_yes, $methyl_no ) ), "\n" if $nt eq 'C';
}
}
my $nt = uc(${$seq{$ref}}[$pos -1]);
print OUT join(
"\t",
(
$nt, $pos, $methyl_yes, $methyl_no
)
),
"\n";# if uc(${$seq{$ref}}[$pos]) eq 'C';
}

delete $methy{$strain}{$last};
$last = $ref;
#}
}
} elsif ( $last eq '' ) {
$last = $ref;
}
}

2 changes: 1 addition & 1 deletion bisulfite/split_methyl_many_files_noFasta.pl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
my $methyl_file = shift;
my %methy;
if ( !-e "$methyl_file.sorted" and $methyl_file !~ /sorted$/ ) {
system("sort -k3 $methyl_file > $methyl_file.sorted");
system("sort -k3 $methyl_file | grep -v Bismark > $methyl_file.sorted");
$methyl_file = "$methyl_file.sorted";
}
open IN, $methyl_file or die "Can't open $methyl_file";
Expand Down
104 changes: 51 additions & 53 deletions convert_extracted_methylation_2_wig.pl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
my @path = split /\//, $file;
my $file_name = pop @path;


open FASTA, $fasta or die "Can't open $fasta";
my %seq;
my $id;
Expand Down Expand Up @@ -60,7 +59,6 @@
$seq{$id}{met}{$pos}='CHG';
}
}

for (my $i = 0 ; $i < @triplets_3 ; $i++){
my $trip = $triplets_3[$i];
my $pos = ($i * 3) + 3;
Expand All @@ -87,9 +85,6 @@
}




#my ( $type, $sample ) = $file_name =~ /^(CpG|CHH|CHG)_\w{2,4}_(\w{2,5})\.\2/;
#CHH_OB_RIL16_ping.RIL16_ping_p1.fq_bismark_bt2_pe.txt
#CHG_RIL16_ping.bismarkextract.out
my ( $type, $sample );
Expand All @@ -102,62 +97,65 @@
$type = $1;
$sample = $2;
}

if ( !-e "$file.sorted" and $file !~ /sorted$/ ) {
system("sort -k3 $file | grep -v Bismark > $file.sorted");
$file = "$file.sorted";
}
print
"track type=wiggle_0 name=\"$sample.$type\" description=\"$type in $sample\"\n";
open IN, $file or die "Can't open $file for reading $!\n";
my $last = '';
while ( my $line = <IN> ) {
chomp $line;
next if $line =~ /^Bismark methylation extractor version/;
my ( $read, $strand, $ref, $pos, $methy ) = split /\t/, $line;

#next unless uc($methy) eq $methy;
$strand = 0;
$ref = ucfirst $ref if $ref =~ /^Chr/i;
$methy{$sample}{$ref}{$pos}{$type}{$methy}++;

#print "$pos\n";
}
print
"track type=wiggle_0 name=\"$sample.$type\" description=\"$type in $sample\"\n";
foreach my $sample ( keys %methy ) {
foreach my $ref ( keys %{ $methy{$sample} } ) {
foreach my $pos ( sort { $a <=> $b } keys %{ $methy{$sample}{$ref} } ) {
$i++;

foreach my $type ( keys %{ $methy{$sample}{$ref}{$pos} } ) {
my @methy_codes =
keys %{ $methy{$sample}{$ref}{$pos}{$type} };

my $code = $methy_codes[0];
my $big = uc $code;
my $little = lc $code;
my ( $methy_yes, $methy_no ) = ( 0, 0 );
if ( exists $methy{$sample}{$ref}{$pos}{$type}{$big} ) {
$methy_yes = $methy{$sample}{$ref}{$pos}{$type}{$big};
}
if ( exists $methy{$sample}{$ref}{$pos}{$type}{$little} ) {
$methy_no = $methy{$sample}{$ref}{$pos}{$type}{$little};
}

#if ($methy_yes) {
my $total = $methy_yes + $methy_no;
my $percent = ( $methy_yes / ($total) ) * 100;
if ( $percent > 0 and $percent < 0.1 ) {
$percent = 0.1;
if ( ( $ref ne $last and $last ne '' ) or eof(IN) ) {

#foreach my $sample ( keys %methy ) {
#foreach my $ref ( keys %{ $methy{$sample} } ) {
foreach my $pos ( sort { $a <=> $b } keys %{ $methy{$sample}{$last} } ) {
$i++;

foreach my $type ( keys %{ $methy{$sample}{$last}{$pos} } ) {
my @methy_codes =
keys %{ $methy{$sample}{$last}{$pos}{$type} };

my $code = $methy_codes[0];
my $big = uc $code;
my $little = lc $code;
my ( $methy_yes, $methy_no ) = ( 0, 0 );
if ( exists $methy{$sample}{$last}{$pos}{$type}{$big} ) {
$methy_yes = $methy{$sample}{$last}{$pos}{$type}{$big};
}
if ( exists $methy{$sample}{$last}{$pos}{$type}{$little} ) {
$methy_no = $methy{$sample}{$last}{$pos}{$type}{$little};
}

my $total = $methy_yes + $methy_no;
my $percent = ( $methy_yes / ($total) ) * 100;
if ( $percent > 0 and $percent < 0.1 ) {
$percent = 0.1;
}
if ( $percent == 0 ) {
$percent = -100;
}
my $pretty_percent = sprintf( '%.1f', $percent );
my $s = $pos - 1;
my $nt = uc(${$seq{ucfirst $ref}{seq}}[$s]);
my $met_type = $seq{ucfirst $ref}{met}{$pos};
print "$last\t$s\t$pos\t$pretty_percent\n" if uc $nt eq 'C' and $met_type eq $type;;

}
}
if ( $percent == 0 ) {
$percent = -100;
}
my $pretty_percent = sprintf( '%.1f', $percent );
my $s = $pos - 1;
my $nt = uc(${$seq{ucfirst $ref}{seq}}[$s]);
my $met_type = $seq{ucfirst $ref}{met}{$pos};
print "$ref\t$s\t$pos\t$pretty_percent\n" if uc $nt eq 'C' and $met_type eq $type;;

#"$ref\t$sample.$type\tmethylated_cytosine\t$pos\t$pos\t$pretty_percent\t$strand\t.\tID=$sample.$ref$strand.$pos.$type.$i;Name=$ref.$pos.$type;Note=$type $big:$methy_yes $little:$methy_no total:$total percent:$pretty_percent;sample=$sample;\n";
#}
}

delete $methy{$sample}{$last};
#print "Deleting $last\n";
$last = $ref;
#}
}
#}
} elsif ( $last eq '' ) {
$last = $ref;
}
}

Loading

0 comments on commit 9315d9b

Please sign in to comment.