Mercurial > repos > portiahollyoak > temp
comparison scripts/pickSoftClipping.over.pl @ 0:28d1a6f8143f draft
planemo upload for repository https://github.com/portiahollyoak/Tools commit 132bb96bba8e7aed66a102ed93b7744f36d10d37-dirty
author | portiahollyoak |
---|---|
date | Mon, 25 Apr 2016 13:08:56 -0400 |
parents | |
children | 9672fe07a232 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:28d1a6f8143f |
---|---|
1 #!/share/bin/perl | |
2 use Bio::Seq; | |
3 use List::Util qw(sum); | |
4 | |
5 die "perl $0 <*.excision.cluster.rpmk> <Reference.2bit>\n" if @ARGV<1; | |
6 | |
7 my $title=$ARGV[0]; | |
8 if ($title =~ /annotation/) { | |
9 $title =~ s/excision.cluster.annotation/sorted.bam/; | |
10 } | |
11 else {$title =~ s/excision.cluster.rpmk/sorted.bam/;} | |
12 | |
13 my %chrs=(); | |
14 system("samtools view -H $title > header"); | |
15 open (input, "<header") or die "Can't open header since $!\n"; | |
16 while (my $line=<input>) { | |
17 if ($line =~ /^\@SQ/) { | |
18 my @a=split(/\t/, $line); | |
19 for my $j (0..$#a) { | |
20 if ($a[$j] =~ /^SN:/) { | |
21 $a[$j] =~ s/^SN://; | |
22 $chrs{$a[$j]}=1; | |
23 } | |
24 } | |
25 } | |
26 } | |
27 close input; | |
28 system("rm header"); | |
29 | |
30 open (input, "<$ARGV[0]") or die "Can't open $ARGV[0] since $!\n"; | |
31 while (my $line=<input>) { | |
32 chomp($line); | |
33 my @a=split(/\s+/, $line); | |
34 | |
35 my $lower=$a[3]-100; | |
36 my $upper=$a[4]+100; | |
37 my $chr_num=$a[2]; | |
38 $chr_num =~ s/chr//; | |
39 if (($chrs{$a[2]} == 1) && (! defined $chrs{$chr_num})) {$chr_num=$a[2];} | |
40 system("samtools view -bu $title $chr_num\:$lower\-$upper > temp.bam"); | |
41 system("samtools view -Xf 0x2 temp.bam > temp.sam"); | |
42 | |
43 my $leftseq=""; | |
44 my $rightseq=""; | |
45 | |
46 my $ll=$a[3]-150; | |
47 my $lu=$a[3]+150; | |
48 system("twoBitToFa $ARGV[1] -seq=$a[2] -start=$ll -end=$lu left.fa"); | |
49 open (seq, "<left.fa") or die "Can't open left.fa since $!\n"; | |
50 my $head=<seq>; | |
51 for my $k (0..5) { | |
52 $head=<seq>; | |
53 chomp($head); | |
54 $leftseq=$leftseq."$head"; | |
55 } | |
56 $leftseq=uc($leftseq); | |
57 close seq; | |
58 system("rm left.fa"); | |
59 | |
60 my $rl=$a[4]-150; | |
61 my $ru=$a[4]+150; | |
62 system("twoBitToFa $ARGV[1] -seq=$a[2] -start=$rl -end=$ru right.fa"); | |
63 open (seq, "<right.fa") or die "Can't open right.fa since $!\n"; | |
64 my $head=<seq>; | |
65 for my $k (0..5) { | |
66 $head=<seq>; | |
67 chomp($head); | |
68 $rightseq=$rightseq."$head"; | |
69 } | |
70 $rightseq=uc($rightseq); | |
71 close seq; | |
72 system("rm right.fa"); | |
73 | |
74 | |
75 open in,"temp.sam"; | |
76 my %pe=(); | |
77 while(<in>) | |
78 { | |
79 chomp; | |
80 my @f=split/\t/,$_,12; | |
81 ## read number 1 or 2 | |
82 my ($rnum)=$f[1]=~/(\d)$/; | |
83 | |
84 ## XT:A:* | |
85 my ($xt)=$f[11]=~/XT:A:(.)/; | |
86 | |
87 my $CIGAR=$f[5]; | |
88 $CIGAR =~ s/S//g; | |
89 if ($f[5]=~/S/) { | |
90 | |
91 ## Coordinate | |
92 my $coor=-10; | |
93 my $overcoor=-10; | |
94 my $strand=""; | |
95 my @z=split(/M/, $f[5]); | |
96 | |
97 if (($f[5]=~/S$/)&&($f[1]=~/r/)) | |
98 { | |
99 my (@cigar_m)=$f[5]=~/(\d+)M/g; | |
100 my (@cigar_d)=$f[5]=~/(\d+)D/g; | |
101 my (@cigar_s)=$f[5]=~/(\d+)S/g; | |
102 my (@cigar_i)=$f[5]=~/(\d+)I/g; | |
103 my $aln_ln=sum(@cigar_m,@cigar_d); | |
104 $coor=$f[3]+$aln_ln-1; | |
105 $strand="-"; | |
106 | |
107 my (@clipped)=$z[1]=~/(\d+)S/g; | |
108 my $cliplen=sum(@clipped); | |
109 # print "$f[0]\n"; | |
110 # print "$cliplen\t"; | |
111 if ($cliplen >= 10) { | |
112 my $clipseq=substr($f[9], length($f[9])-$cliplen, $cliplen); | |
113 $overcoor = index($rightseq, $clipseq); | |
114 # print "$clipseq\t$rightseq\t$overcoor\t"; | |
115 if ($overcoor > -1) {$overcoor += ($a[4] - 149);} | |
116 } | |
117 # print "\n"; | |
118 } | |
119 elsif (($f[1]=~/R/)&&($z[0]=~/S/)) { | |
120 $coor=$f[3]; $strand="+"; | |
121 my (@clipped)=$z[0]=~/(\d+)S/g; | |
122 my $cliplen=sum(@clipped); | |
123 # print "$f[0]\n"; | |
124 # print "$cliplen\t"; | |
125 if ($cliplen >= 10) { | |
126 my $clipseq=substr($f[9], 0, $cliplen); | |
127 $overcoor = index($leftseq, $clipseq); | |
128 # print "$clipseq\t$leftseq\t$overcoor\t"; | |
129 if ($overcoor > -1) {$overcoor += ($a[3] - 150 + $cliplen);} | |
130 } | |
131 # print "\n"; | |
132 } | |
133 | |
134 if ($coor > 0) { | |
135 my $final=""; | |
136 if ($overcoor > 0) { | |
137 if ($strand eq "-") {$final="$coor\-$overcoor"."\($strand\)";} | |
138 else {$final="$overcoor\-$coor"."\($strand\)";} | |
139 } | |
140 else {$final=$coor."\($strand\)";} | |
141 if (defined $pe{$final}) {$pe{$final}++;} | |
142 else {$pe{$final}=1;} | |
143 } | |
144 | |
145 } | |
146 } | |
147 close in; | |
148 | |
149 my $clip_site=""; | |
150 | |
151 foreach my $coor (keys %pe) | |
152 { | |
153 if ($pe{$coor} >= 2) { | |
154 $clip_site=$clip_site."$coor\:$pe{$coor}\;"; | |
155 } | |
156 } | |
157 | |
158 chop($clip_site); | |
159 print "$a[2]\t$a[3]\t$a[4]\t$a[5]\t$clip_site\n"; | |
160 system("rm temp.sam temp.bam"); | |
161 # last; | |
162 } | |
163 |