Mercurial > repos > portiahollyoak > temp
diff scripts/pickSoftClipping.over.pl @ 0:28d1a6f8143f draft
planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author | portiahollyoak |
---|---|
date | Mon, 25 Apr 2016 13:08:56 -0400 |
parents | |
children | 9672fe07a232 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/pickSoftClipping.over.pl Mon Apr 25 13:08:56 2016 -0400 @@ -0,0 +1,163 @@ +#!/share/bin/perl +use Bio::Seq; +use List::Util qw(sum); + +die "perl $0 <*.excision.cluster.rpmk> <Reference.2bit>\n" if @ARGV<1; + +my $title=$ARGV[0]; +if ($title =~ /annotation/) { + $title =~ s/excision.cluster.annotation/sorted.bam/; +} +else {$title =~ s/excision.cluster.rpmk/sorted.bam/;} + +my %chrs=(); +system("samtools view -H $title > header"); +open (input, "<header") or die "Can't open header since $!\n"; +while (my $line=<input>) { + if ($line =~ /^\@SQ/) { + my @a=split(/\t/, $line); + for my $j (0..$#a) { + if ($a[$j] =~ /^SN:/) { + $a[$j] =~ s/^SN://; + $chrs{$a[$j]}=1; + } + } + } +} +close input; +system("rm header"); + +open (input, "<$ARGV[0]") or die "Can't open $ARGV[0] since $!\n"; +while (my $line=<input>) { + chomp($line); + my @a=split(/\s+/, $line); + + my $lower=$a[3]-100; + my $upper=$a[4]+100; + my $chr_num=$a[2]; + $chr_num =~ s/chr//; + if (($chrs{$a[2]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[2];} + system("samtools view -bu $title $chr_num\:$lower\-$upper > temp.bam"); + system("samtools view -Xf 0x2 temp.bam > temp.sam"); + + my $leftseq=""; + my $rightseq=""; + + my $ll=$a[3]-150; + my $lu=$a[3]+150; + system("twoBitToFa $ARGV[1] -seq=$a[2] -start=$ll -end=$lu left.fa"); + open (seq, "<left.fa") or die "Can't open left.fa since $!\n"; + my $head=<seq>; + for my $k (0..5) { + $head=<seq>; + chomp($head); + $leftseq=$leftseq."$head"; + } + $leftseq=uc($leftseq); + close seq; + system("rm left.fa"); + + my $rl=$a[4]-150; + my $ru=$a[4]+150; + system("twoBitToFa $ARGV[1] -seq=$a[2] -start=$rl -end=$ru right.fa"); + open (seq, "<right.fa") or die "Can't open right.fa since $!\n"; + my $head=<seq>; + for my $k (0..5) { + $head=<seq>; + chomp($head); + $rightseq=$rightseq."$head"; + } + $rightseq=uc($rightseq); + close seq; + system("rm right.fa"); + + + open in,"temp.sam"; + my %pe=(); + while(<in>) + { + chomp; + my @f=split/\t/,$_,12; + ## read number 1 or 2 + my ($rnum)=$f[1]=~/(\d)$/; + + ## XT:A:* + my ($xt)=$f[11]=~/XT:A:(.)/; + + my $CIGAR=$f[5]; + $CIGAR =~ s/S//g; + if ($f[5]=~/S/) { + + ## Coordinate + my $coor=-10; + my $overcoor=-10; + my $strand=""; + my @z=split(/M/, $f[5]); + + if (($f[5]=~/S$/)&&($f[1]=~/r/)) + { + my (@cigar_m)=$f[5]=~/(\d+)M/g; + my (@cigar_d)=$f[5]=~/(\d+)D/g; + my (@cigar_s)=$f[5]=~/(\d+)S/g; + my (@cigar_i)=$f[5]=~/(\d+)I/g; + my $aln_ln=sum(@cigar_m,@cigar_d); + $coor=$f[3]+$aln_ln-1; + $strand="-"; + + my (@clipped)=$z[1]=~/(\d+)S/g; + my $cliplen=sum(@clipped); +# print "$f[0]\n"; +# print "$cliplen\t"; + if ($cliplen >= 10) { + my $clipseq=substr($f[9], length($f[9])-$cliplen, $cliplen); + $overcoor = index($rightseq, $clipseq); +# print "$clipseq\t$rightseq\t$overcoor\t"; + if ($overcoor > -1) {$overcoor += ($a[4] - 149);} + } +# print "\n"; + } + elsif (($f[1]=~/R/)&&($z[0]=~/S/)) { + $coor=$f[3]; $strand="+"; + my (@clipped)=$z[0]=~/(\d+)S/g; + my $cliplen=sum(@clipped); +# print "$f[0]\n"; +# print "$cliplen\t"; + if ($cliplen >= 10) { + my $clipseq=substr($f[9], 0, $cliplen); + $overcoor = index($leftseq, $clipseq); +# print "$clipseq\t$leftseq\t$overcoor\t"; + if ($overcoor > -1) {$overcoor += ($a[3] - 150 + $cliplen);} + } +# print "\n"; + } + + if ($coor > 0) { + my $final=""; + if ($overcoor > 0) { + if ($strand eq "-") {$final="$coor\-$overcoor"."\($strand\)";} + else {$final="$overcoor\-$coor"."\($strand\)";} + } + else {$final=$coor."\($strand\)";} + if (defined $pe{$final}) {$pe{$final}++;} + else {$pe{$final}=1;} + } + + } + } + close in; + + my $clip_site=""; + + foreach my $coor (keys %pe) + { + if ($pe{$coor} >= 2) { + $clip_site=$clip_site."$coor\:$pe{$coor}\;"; + } + } + + chop($clip_site); + print "$a[2]\t$a[3]\t$a[4]\t$a[5]\t$clip_site\n"; + system("rm temp.sam temp.bam"); +# last; +} +