comparison scripts/pickSoftClipping.over.pl @ 0:28d1a6f8143f draft

planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author portiahollyoak
date Mon, 25 Apr 2016 13:08:56 -0400
parents
children 9672fe07a232
comparison
equal deleted inserted replaced
-1:000000000000 0:28d1a6f8143f
1 #!/share/bin/perl
2 use Bio::Seq;
3 use List::Util qw(sum);
4
5 die "perl $0 <*.excision.cluster.rpmk> <Reference.2bit>\n" if @ARGV<1;
6
7 my $title=$ARGV[0];
8 if ($title =~ /annotation/) {
9 $title =~ s/excision.cluster.annotation/sorted.bam/;
10 }
11 else {$title =~ s/excision.cluster.rpmk/sorted.bam/;}
12
13 my %chrs=();
14 system("samtools view -H $title > header");
15 open (input, "<header") or die "Can't open header since $!\n";
16 while (my $line=<input>) {
17 if ($line =~ /^\@SQ/) {
18 my @a=split(/\t/, $line);
19 for my $j (0..$#a) {
20 if ($a[$j] =~ /^SN:/) {
21 $a[$j] =~ s/^SN://;
22 $chrs{$a[$j]}=1;
23 }
24 }
25 }
26 }
27 close input;
28 system("rm header");
29
30 open (input, "<$ARGV[0]") or die "Can't open $ARGV[0] since $!\n";
31 while (my $line=<input>) {
32 chomp($line);
33 my @a=split(/\s+/, $line);
34
35 my $lower=$a[3]-100;
36 my $upper=$a[4]+100;
37 my $chr_num=$a[2];
38 $chr_num =~ s/chr//;
39 if (($chrs{$a[2]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[2];}
40 system("samtools view -bu $title $chr_num\:$lower\-$upper > temp.bam");
41 system("samtools view -Xf 0x2 temp.bam > temp.sam");
42
43 my $leftseq="";
44 my $rightseq="";
45
46 my $ll=$a[3]-150;
47 my $lu=$a[3]+150;
48 system("twoBitToFa $ARGV[1] -seq=$a[2] -start=$ll -end=$lu left.fa");
49 open (seq, "<left.fa") or die "Can't open left.fa since $!\n";
50 my $head=<seq>;
51 for my $k (0..5) {
52 $head=<seq>;
53 chomp($head);
54 $leftseq=$leftseq."$head";
55 }
56 $leftseq=uc($leftseq);
57 close seq;
58 system("rm left.fa");
59
60 my $rl=$a[4]-150;
61 my $ru=$a[4]+150;
62 system("twoBitToFa $ARGV[1] -seq=$a[2] -start=$rl -end=$ru right.fa");
63 open (seq, "<right.fa") or die "Can't open right.fa since $!\n";
64 my $head=<seq>;
65 for my $k (0..5) {
66 $head=<seq>;
67 chomp($head);
68 $rightseq=$rightseq."$head";
69 }
70 $rightseq=uc($rightseq);
71 close seq;
72 system("rm right.fa");
73
74
75 open in,"temp.sam";
76 my %pe=();
77 while(<in>)
78 {
79 chomp;
80 my @f=split/\t/,$_,12;
81 ## read number 1 or 2
82 my ($rnum)=$f[1]=~/(\d)$/;
83
84 ## XT:A:*
85 my ($xt)=$f[11]=~/XT:A:(.)/;
86
87 my $CIGAR=$f[5];
88 $CIGAR =~ s/S//g;
89 if ($f[5]=~/S/) {
90
91 ## Coordinate
92 my $coor=-10;
93 my $overcoor=-10;
94 my $strand="";
95 my @z=split(/M/, $f[5]);
96
97 if (($f[5]=~/S$/)&&($f[1]=~/r/))
98 {
99 my (@cigar_m)=$f[5]=~/(\d+)M/g;
100 my (@cigar_d)=$f[5]=~/(\d+)D/g;
101 my (@cigar_s)=$f[5]=~/(\d+)S/g;
102 my (@cigar_i)=$f[5]=~/(\d+)I/g;
103 my $aln_ln=sum(@cigar_m,@cigar_d);
104 $coor=$f[3]+$aln_ln-1;
105 $strand="-";
106
107 my (@clipped)=$z[1]=~/(\d+)S/g;
108 my $cliplen=sum(@clipped);
109 # print "$f[0]\n";
110 # print "$cliplen\t";
111 if ($cliplen >= 10) {
112 my $clipseq=substr($f[9], length($f[9])-$cliplen, $cliplen);
113 $overcoor = index($rightseq, $clipseq);
114 # print "$clipseq\t$rightseq\t$overcoor\t";
115 if ($overcoor > -1) {$overcoor += ($a[4] - 149);}
116 }
117 # print "\n";
118 }
119 elsif (($f[1]=~/R/)&&($z[0]=~/S/)) {
120 $coor=$f[3]; $strand="+";
121 my (@clipped)=$z[0]=~/(\d+)S/g;
122 my $cliplen=sum(@clipped);
123 # print "$f[0]\n";
124 # print "$cliplen\t";
125 if ($cliplen >= 10) {
126 my $clipseq=substr($f[9], 0, $cliplen);
127 $overcoor = index($leftseq, $clipseq);
128 # print "$clipseq\t$leftseq\t$overcoor\t";
129 if ($overcoor > -1) {$overcoor += ($a[3] - 150 + $cliplen);}
130 }
131 # print "\n";
132 }
133
134 if ($coor > 0) {
135 my $final="";
136 if ($overcoor > 0) {
137 if ($strand eq "-") {$final="$coor\-$overcoor"."\($strand\)";}
138 else {$final="$overcoor\-$coor"."\($strand\)";}
139 }
140 else {$final=$coor."\($strand\)";}
141 if (defined $pe{$final}) {$pe{$final}++;}
142 else {$pe{$final}=1;}
143 }
144
145 }
146 }
147 close in;
148
149 my $clip_site="";
150
151 foreach my $coor (keys %pe)
152 {
153 if ($pe{$coor} >= 2) {
154 $clip_site=$clip_site."$coor\:$pe{$coor}\;";
155 }
156 }
157
158 chop($clip_site);
159 print "$a[2]\t$a[3]\t$a[4]\t$a[5]\t$clip_site\n";
160 system("rm temp.sam temp.bam");
161 # last;
162 }
163