Mercurial > repos > yutaka-saito > commet
view bin/Bsf2ComMetIn.pl @ 0:dfdfbdd47b32 default tip
migrate from GitHub
author | yutaka-saito |
---|---|
date | Sun, 19 Apr 2015 20:55:17 +0900 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl # Bisulfighter http://epigenome.cbrc.jp/bisulfighter # by National Institute of Advanced Industrial Science and Technology (AIST) # is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. # http://creativecommons.org/licenses/by-nc-sa/3.0/ use strict; use warnings; # format Bisulfighter's input to ComMet's input # (data from both strands are integrated) # # usage: # ./this_program sample1.tsv sample2.tsv > ComMetInput # input format =pod # sample1 chr1 123 + CG 0.8 10 chr1 128 + CHG 0.7 8 chr1 1000000 + CG 0.234 9 chr2 987 + CG 0.654 12 chr2 1000 + CHG 0.788 10 chr2 1057 + CG 0.826 50 =cut =pod # sample2 chr1 123 + CG 0.3 30 chr1 128 + CHG 0.24 20 chr1 1000000 + CG 0.36 8 chr2 987 + CG 0.64 12 chr2 1000 + CHG 0.820 9 chr2 1057 + CG 0.0 32 =cut # output format =pod chr1 123 10*(0.8) 10*(1-0.8) 30*(0.3) 30*(1-0.3) chr1 1000000 9*(0.234) 9*(1-0.234) 8*(0.36) 8*(1-0.36) chr2 987 12*(0.654) 12*(1-0.654) 12*(0.64) 12*(1-0.64) chr2 1057 50*(0.826) 50*(1-0.826) 32*(0.0) 32*(1-0.0) =cut my $command = ""; my $tmpout1 = "Bsf2ComMetIn.pl.tmp.$$.1"; my $tmpout2 = "Bsf2ComMetIn.pl.tmp.$$.2"; &uniq_add($ARGV[0], $tmpout1); &uniq_add($ARGV[1], $tmpout2); my ($nm1, $pos1, $m1, $u1) = ("", -1, -1, -1); my ($nm2, $pos2, $m2, $u2) = ("", -1, -1, -1); open(IN1, $tmpout1) or die "couldn't open input file $tmpout1\n"; open(IN2, $tmpout2) or die "couldn't open input file $tmpout2\n"; while (1) { my ($line1, $line2); if (&chreq($nm1,$pos1,$nm2,$pos2)) { (defined($line1=<IN1>) and defined($line2=<IN2>)) or last; chomp $line1; ($nm1,$pos1,$m1,$u1) = split(/\s+/, $line1); chomp $line2; ($nm2,$pos2,$m2,$u2) = split(/\s+/, $line2); } elsif (&chrlt($nm1,$pos1,$nm2,$pos2)) { defined($line1=<IN1>) or last; chomp $line1; ($nm1,$pos1,$m1,$u1) = split(/\s+/, $line1); } elsif (&chrgt($nm1,$pos1,$nm2,$pos2)) { defined($line2=<IN2>) or last; chomp $line2; ($nm2,$pos2,$m2,$u2) = split(/\s+/, $line2); } else { die "wrong position\n"; } if (&chreq($nm1,$pos1,$nm2,$pos2)) { print STDOUT join("\t", ($nm1, $pos1, $m1, $u1, $m2, $u2)), "\n"; } } close(IN1); close(IN2); &invoke_command("\\rm -f $tmpout1"); &invoke_command("\\rm -f $tmpout2"); sub invoke_command { my ($command) = @_; # print STDERR $command, "\n"; system $command; } sub chrgt { my ($nm1, $pos1, $nm2, $pos2) = @_; if ($nm1 gt $nm2 || ($nm1 eq $nm2 && $pos1 > $pos2)) { return 1; } else { return 0; } } sub chrlt { my ($nm1, $pos1, $nm2, $pos2) = @_; if ($nm1 lt $nm2 || ($nm1 eq $nm2 && $pos1 < $pos2)) { return 1; } else { return 0; } } sub chreq { my ($nm1, $pos1, $nm2, $pos2) = @_; if ($nm1 eq $nm2 && $pos1 == $pos2) { return 1; } else { return 0; } } sub uniq_add { my ($infile, $outfile) = @_; my ($nm, $str, $pos, $m, $u) = ("", "", -1, -1, -1); my ($pre_nm, $pre_str, $pre_pos, $pre_m, $pre_u) = ("", "", -1, -1, -1); my $mind = 2; # minimum distance between two CpGs my $thsh = 0; open(IN, $infile) or die "couldn't open input file $infile\n"; open(OUT, "> $outfile") or die "couldn't open output file $outfile\n"; while (my $line=<IN>) { chomp $line; $line =~ /^\#/ and next; $line =~ /CG/ or next; my @cells = split(/\s+/, $line); $nm = $cells[0]; $str = $cells[2]; $pos = $cells[1]; $m = $cells[5] * $cells[4]; $u = $cells[5] * (1.0-$cells[4]); if ($str eq "+") { } elsif ($str eq "-") { $str = "+"; $pos -= $mind - 1; } else { print STDERR "warning: abnormal strand $str was removed\n"; next; } if ($nm eq $pre_nm && $str eq $pre_str && $pos==$pre_pos) { $pre_m += $m; $pre_u += $u; } else { if ($nm eq $pre_nm && $pos-$pre_pos < $mind) { print STDERR "warning: abnormal position difference $pre_pos $pos, "; if ($m + $u > $pre_m + $pre_u) { print STDERR "$pre_pos was removed.\n"; $pre_nm = $nm; $pre_str = $str; $pre_pos = $pos; $pre_m = $m; $pre_u = $u; } else { print STDERR "$pos was removed.\n" } next; } if ($pre_m + $pre_u > $thsh) { print OUT join("\t", ($pre_nm, $pre_pos, $pre_m, $pre_u)), "\n"; } $pre_nm = $nm; $pre_str = $str; $pre_pos = $pos; $pre_m = $m; $pre_u = $u; } } if ($pre_m + $pre_u > $thsh) { print OUT join("\t", ($pre_nm, $pre_pos, $pre_m, $pre_u)), "\n"; } close(IN); close(OUT); }