annotate precursors.pl @ 43:4c0b1a94b882 draft

Uploaded
author big-tiandm
date Tue, 28 Oct 2014 01:35:32 -0400
parents 1131b4008650
children ca05d68aca13
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
1 #!/usr/bin/perl -w
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
2 #Filename:
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
3 #Author: Tian Dongmei
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
4 #Email: tiandm@big.ac.cn
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
5 #Date: 2013/7/19
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
6 #Modified:
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
7 #Description:
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
8 my $version=1.00;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
9
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
10 use strict;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
11 use Getopt::Long;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
12 use RNA;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
13
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
14 my %opts;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
15 GetOptions(\%opts,"map=s","g=s","d:i","f:i","o=s","e:f","s=s","h");
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
16 if (!(defined $opts{map} and defined $opts{g} and defined $opts{o} and defined $opts{s} ) || defined $opts{h}) { #necessary arguments
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
17 &usage;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
18 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
19
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
20 my $filein=$opts{'map'};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
21 my $faout=$opts{'o'};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
22 my $strout=$opts{'s'};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
23 my $genome= $opts{'g'};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
24
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
25 my $maxd=defined $opts{'d'} ? $opts{'d'} : 200;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
26 my $flank=defined $opts{'f'}? $opts{'f'} : 10;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
27
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
28 my $MAX_ENERGY=-18;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
29 if (defined $opts{'e'}) {$MAX_ENERGY=$opts{'e'};}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
30 my $MAX_UNPAIR=5;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
31 my $MIN_PAIR=15;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
32 my $MAX_SIZEDIFF=4;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
33 my $MAX_BULGE=2;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
34 my $ASYMMETRY=5;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
35 my $MIN_UNPAIR=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
36 my $MIN_SPACE=5;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
37 my $MAX_SPACE=$maxd;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
38 my $FLANK=$flank;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
39
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
40 ######### load in genome sequences start ########
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
41 my %genome;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
42 my %lng;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
43 my $name;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
44 open IN,"<$genome";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
45 while (my $aline=<IN>) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
46 chomp $aline;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
47 next if($aline=~/^\#/);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
48 if ($aline=~/^>(\S+)/) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
49 $name=$1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
50 next;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
51 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
52 $genome{$name} .=$aline;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
53 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
54 close IN;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
55 foreach my $key (keys %genome) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
56 $lng{$key}=length($genome{$key});
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
57 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
58 ####### load in genome sequences end ##########
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
59
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
60 my %breaks; ### reads number bigger than 3
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
61 open IN,"<$filein"; #input file
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
62 while (my $aline=<IN>) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
63 chomp $aline;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
64 my @tmp=split/\t/,$aline;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
65 $tmp[0]=~/_x(\d+)$/;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
66 my $no=$1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
67 next if($no<3);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
68 #my $trand=&find_strand($tmp[9]);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
69 #my @pos=split/\.\./,$tmp[5];
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
70 my $end=$tmp[3]+length($tmp[4])-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
71 if($tmp[1] eq "-"){$tmp[4]=revcom($tmp[4]);}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
72 push @{$breaks{$tmp[2]}{$tmp[1]}},[$tmp[3],$end,$no,$tmp[4]]; ### 0 base
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
73 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
74 close IN;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
75
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
76 my %cites; ### peaks
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
77 foreach my $chr (keys %breaks) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
78 foreach my $strand (keys %{$breaks{$chr}}) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
79 my @array=@{$breaks{$chr}{$strand}};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
80 @array=sort{$a->[0]<=>$b->[0]} @array;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
81 for (my $i=0;$i<@array;$i++) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
82 my $start=$array[$i][0];my $end=$array[$i][1];
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
83 my @subarray=();
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
84 push @subarray,$array[$i];
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
85
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
86 for (my $j=$i+1;$j<@array;$j++) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
87 if ($start<$array[$j][1] && $end>$array[$j][0]) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
88 push @subarray,$array[$j];
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
89 ($start,$end)=&newpos($start,$end,$array[$j][0],$array[$j][1]);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
90 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
91 else{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
92 $i=$j;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
93 &find_cites(\@subarray,$chr,$strand);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
94 last;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
95 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
96 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
97 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
98 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
99 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
100
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
101 open FA,">$faout"; #output file
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
102 open STR,">$strout";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
103 foreach my $chr (keys %cites) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
104 foreach my $strand (keys %{$cites{$chr}}) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
105
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
106 my @array2=@{$cites{$chr}{$strand}};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
107 @array2=sort{$a->[0]<=>$b->[0]} @array2;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
108 &excise(\@array2,$chr,$strand);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
109 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
110 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
111 close FA;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
112 close STR;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
113 sub oneCiteDn{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
114 my ($array,$a,$chr,$strand)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
115
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
116 my $ss=$$array[$a][0]-$flank;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
117 $ss=0 if($ss<0);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
118 my $ee=$$array[$a][1]+$maxd+$flank;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
119 $ee=$lng{$chr} if($ee>$lng{$chr});
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
120
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
121 my $seq=substr($genome{$chr},$ss,$ee-$ss+1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
122 if($strand eq "-"){$seq=revcom($seq);}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
123
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
124 my $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
125 return $val;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
126 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
127 sub oneCiteUp{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
128 my ($array,$a,$chr,$strand)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
129
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
130 my $ss=$$array[$a][0]-$maxd-$flank;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
131 $ss=0 if($ss<0);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
132 my $ee=$$array[$a][1]+$flank;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
133 $ee=$lng{$chr} if($ee>$lng{$chr});
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
134
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
135 my $seq=substr($genome{$chr},$ss,$ee-$ss+1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
136 if($strand eq "-"){$seq=revcom($seq);}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
137
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
138 my $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
139 return $val;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
140
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
141 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
142
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
143 sub twoCites{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
144 my ($array,$a,$b,$chr,$strand)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
145
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
146 my $ss=$$array[$a][0]-$flank;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
147 $ss=0 if($ss<0);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
148 my $ee=$$array[$b][1]+$flank;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
149 $ee=$lng{$chr} if($ee>$lng{$chr});
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
150
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
151 my $seq=substr($genome{$chr},$ss,$ee-$ss+1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
152 if($strand eq "-"){$seq=revcom($seq);}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
153
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
154 # my( $str,$mfe)=RNA::fold($seq);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
155 # return 0 if($mfe>$MAX_ENERGY); ### minimum mfe
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
156 my $val=&ffw2($seq,$$array[$a][3],$$array[$b][3],$chr,$strand,$ss,$ee);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
157
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
158 return $val;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
159
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
160 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
161 sub excise{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
162 my ($cluster,$chr,$strand)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
163
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
164 my $last_pos=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
165 for (my $i=0;$i<@{$cluster};$i++) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
166 next if($$cluster[$i][0]<$last_pos);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
167 my $ok=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
168 for (my $j=$i+1;$j<@{$cluster} ;$j++) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
169 if($$cluster[$j][0]-$$cluster[$i][1]>$maxd){
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
170 $i=$j;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
171 last;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
172 }else{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
173 $ok=&twoCites($cluster,$i,$j,$chr,$strand);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
174 if($ok){ $last_pos=$$cluster[$j][1]+$flank; $i=$j; last;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
175 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
176 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
177 next if($ok);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
178
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
179 $ok=&oneCiteDn($cluster,$i,$chr,$strand);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
180 if($ok){$last_pos=$$cluster[$i][1]+$maxd+$flank; next;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
181 $ok=&oneCiteUp($cluster,$i,$chr,$strand);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
182 if($ok){$last_pos=$$cluster[$i][1]+$flank;next;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
183 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
184
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
185
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
186 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
187
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
188 sub ffw2{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
189 my ($seq,$tag1,$tag2,$chr,$strand,$ss,$ee)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
190
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
191 my $N_count=$seq=~tr/N//; ## precursor sequence has not more than 5 Ns
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
192 if ($N_count > 5) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
193 return 0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
194 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
195
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
196 my $seq_length=length $seq;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
197 # position tag1 and tag2
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
198 my $tag1_beg=index($seq,$tag1,0)+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
199 if ($tag1_beg < 1) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
200 warn "[ffw2] coordinate error.\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
201 # $fold->{reason}="coordinate error";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
202 return 0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
203 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
204 my $tag2_beg=index($seq,$tag2,0)+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
205 if ($tag2_beg < 1) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
206 warn "[ffw2] coordinate error.\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
207 # $fold->{reason}="coordinate error";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
208 return 0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
209 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
210 if ($tag2_beg < $tag1_beg) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
211 # swap tag1 and tag2
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
212 ($tag1,$tag2)=($tag2,$tag1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
213 ($tag1_beg,$tag2_beg)=($tag2_beg,$tag1_beg);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
214 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
215 my $tag1_end=$tag1_beg+length($tag1)-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
216 my $tag2_end=$tag2_beg+length($tag2)-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
217 # re-clipping
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
218 my $beg=$tag1_beg-$FLANK; $beg=1 if $beg < 1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
219 my $end=$tag2_end+$FLANK; $end=$seq_length if $end > $seq_length;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
220 $seq=substr($seq,$beg-1,$end-$beg+1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
221 $seq_length=length $seq;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
222 # re-reposition
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
223 $tag1_beg=index($seq,$tag1,0)+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
224 if ($tag1_beg < 1) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
225 warn "[ffw2] coordinate error.\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
226 # $fold->{reason}="coordinate error";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
227 return 0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
228 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
229
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
230 $tag2_beg=index($seq,$tag2,0)+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
231 if ($tag2_beg < 1) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
232 warn "[ffw2] coordinate error.\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
233 # $fold->{reason}="coordinate error";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
234 return 0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
235 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
236 $tag1_end=$tag1_beg+length($tag1)-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
237 $tag2_end=$tag2_beg+length($tag2)-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
238
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
239 # fold
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
240 my ($struct,$mfe)=RNA::fold($seq);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
241 $mfe=sprintf "%.2f", $mfe;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
242 if ($mfe > $MAX_ENERGY) {return 0;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
243
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
244 # tag1
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
245 my $tag1_length=$tag1_end-$tag1_beg+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
246 my $tag1_struct=substr($struct,$tag1_beg-1,$tag1_length);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
247 my $tag1_arm=which_arm($tag1_struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
248 my $tag1_unpair=$tag1_struct=~tr/.//;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
249 my $tag1_pair=$tag1_length-$tag1_unpair;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
250 my $tag1_max_bulge=biggest_bulge($tag1_struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
251 if ($tag1_arm ne "5p") { return 0;} # tag not in stem
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
252 # if ($tag1_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag1_unpair ($MAX_UNPAIR)"; return $pass}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
253 if ($tag1_pair < $MIN_PAIR) {return 0;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
254 if ($tag1_max_bulge > $MAX_BULGE) {return 0;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
255
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
256 # tag2
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
257 my $tag2_length=$tag2_end-$tag2_beg+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
258 my $tag2_struct=substr($struct,$tag2_beg-1,$tag2_length);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
259 my $tag2_arm=which_arm($tag2_struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
260 my $tag2_unpair=$tag2_struct=~tr/.//;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
261 my $tag2_pair=$tag2_length-$tag2_unpair;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
262 my $tag2_max_bulge=biggest_bulge($tag2_struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
263 if ($tag2_arm ne "3p") {return 0;} # star not in stem
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
264 # if ($tag2_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag2_unpair ($MAX_UNPAIR)"; return $pass}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
265 if ($tag2_pair < $MIN_PAIR) {return 0;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
266 if ($tag2_max_bulge > $MAX_BULGE) {return 0;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
267
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
268 # space size between miR and miR*
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
269 my $space=$tag2_beg-$tag1_end-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
270 if ($space < $MIN_SPACE) {return 0;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
271 if ($space > $MAX_SPACE) {return 0;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
272
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
273 # size diff of miR and miR*
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
274 my $size_diff=abs($tag1_length-$tag2_length);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
275 if ($size_diff > $MAX_SIZEDIFF) {return 0;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
276
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
277 # build base pairing table
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
278 my %pairtable;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
279 &parse_struct($struct,\%pairtable); # coords count from 1
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
280
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
281 my $asy1=get_asy(\%pairtable,$tag1_beg,$tag1_end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
282 my $asy2=get_asy(\%pairtable,$tag2_beg,$tag2_end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
283 my $asy=($asy1 < $asy2) ? $asy1 : $asy2;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
284 if ($asy > $ASYMMETRY) {return 0}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
285
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
286 # duplex fold, determine whether two matures like a miR/miR* ike duplex
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
287 my ($like_mir_duplex1,$duplex_pair,$overhang1,$overhang2)=likeMirDuplex1($tag1,$tag2);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
288 # parse hairpin, determine whether two matures form miR/miR* duplex in hairpin context
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
289 my ($like_mir_duplex2,$duplex_pair2,$overhang_b,$overhang_t)=likeMirDuplex2(\%pairtable,$tag1_beg,$tag1_end,$tag2_beg,$tag2_end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
290 if ($like_mir_duplex1==0 && $like_mir_duplex2==0) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
291 return 0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
292 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
293
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
294 print FA ">$chr:$strand:$ss..$ee\n$seq\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
295 print STR ">$chr:$strand:$ss..$ee\n$seq\n$struct\t($mfe)\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
296
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
297 return 1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
298 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
299
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
300 sub ffw1{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
301 my ($seq,$tag,$chr,$strand,$ss,$ee)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
302 my $pass=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
303
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
304 my $N_count=$seq=~tr/N//;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
305 if ($N_count > 5) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
306 return 0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
307 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
308
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
309 my $seq_length=length $seq;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
310 my $tag_length=length $tag;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
311
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
312 # position
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
313 my $tag_beg=index($seq,$tag,0)+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
314 if ($tag_beg < 1) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
315 warn "[ffw1] coordinate error.\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
316 return $pass;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
317 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
318 my $tag_end=$tag_beg+length($tag)-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
319
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
320
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
321 # define candidate precursor by hybrid short arm to long arm, not solid enough
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
322 my($beg,$end)=define_precursor($seq,$tag);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
323 if (not defined $beg) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
324 return $pass;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
325 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
326 if (not defined $end) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
327 return $pass;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
328 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
329 $seq=substr($seq,$beg-1,$end-$beg+1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
330 $seq_length=length $seq;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
331
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
332
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
333 # fold
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
334 my ($struct,$mfe)=RNA::fold($seq);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
335 $mfe=sprintf "%.2f",$mfe;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
336 if ($mfe > $MAX_ENERGY) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
337 $pass=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
338 return $pass;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
339 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
340
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
341 # reposition
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
342 $tag_beg=index($seq,$tag,0)+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
343 if ($tag_beg < 1) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
344 warn "[ffw1] coordinate error.\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
345 return 0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
346 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
347 $tag_end=$tag_beg+length($tag)-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
348
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
349 my $tag_struct=substr($struct,$tag_beg-1,$tag_length);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
350 my $tag_arm=which_arm($tag_struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
351 my $tag_unpair=$tag_struct=~tr/.//;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
352 my $tag_pair=$tag_length-$tag_unpair;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
353 my $tag_max_bulge=biggest_bulge($tag_struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
354 if ($tag_arm eq "-") { return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
355 # if ($tag_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag_unpair ($MAX_UNPAIR)"; return $pass}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
356 if ($tag_pair < $MIN_PAIR) { return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
357 if ($tag_max_bulge > $MAX_BULGE) {return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
358
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
359 # build base pairing table
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
360 my %pairtable;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
361 &parse_struct($struct,\%pairtable); # coords count from 1
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
362
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
363 # get star
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
364 my ($star_beg,$star_end)=get_star(\%pairtable,$tag_beg,$tag_end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
365 my $star=substr($seq,$star_beg-1,$star_end-$star_beg+1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
366 my $star_length=$star_end-$star_beg+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
367 my $star_struct=substr($struct,$star_beg-1,$star_end-$star_beg+1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
368 my $star_arm=which_arm($star_struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
369 my $star_unpair=$star_struct=~tr/.//;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
370 my $star_pair=$star_length-$star_unpair;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
371 my $star_max_bulge=biggest_bulge($star_struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
372 if ($star_arm eq "-") { return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
373 # if ($star_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$star_unpair ($MAX_UNPAIR)"; return $pass}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
374 if ($star_pair < $MIN_PAIR) {return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
375 if ($star_max_bulge > $MAX_BULGE) {return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
376
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
377 if ($tag_arm eq $star_arm) {return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
378
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
379 # space size between miR and miR*
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
380 my $space;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
381 if ($tag_beg < $star_beg) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
382 $space=$star_beg-$tag_end-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
383 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
384 else {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
385 $space=$tag_beg-$star_end-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
386 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
387 if ($space < $MIN_SPACE) { return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
388 if ($space > $MAX_SPACE) { return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
389
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
390 # size diff
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
391 my $size_diff=abs($tag_length-$star_length);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
392 if ($size_diff > $MAX_SIZEDIFF) { return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
393
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
394 # asymmetry
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
395 my $asy=get_asy(\%pairtable,$tag_beg,$tag_end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
396 if ($asy > $ASYMMETRY) {return $pass;}
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
397
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
398 $pass=1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
399 print FA ">$chr:$strand:$ss..$ee\n$seq\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
400 print STR ">$chr:$strand:$ss..$ee\n$seq\n$struct\t($mfe)\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
401 return $pass;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
402
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
403 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
404 sub get_star {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
405 my($table,$beg,$end)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
406
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
407 my ($s1,$e1,$s2,$e2); # s1 pair to s2, e1 pair to e2
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
408 foreach my $i ($beg..$end) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
409 if (defined $table->{$i}) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
410 my $j=$table->{$i};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
411 $s1=$i;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
412 $s2=$j;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
413 last;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
414 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
415 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
416 foreach my $i (reverse ($beg..$end)) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
417 if (defined $table->{$i}) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
418 my $j=$table->{$i};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
419 $e1=$i;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
420 $e2=$j;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
421 last;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
422 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
423 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
424 # print "$s1,$e1 $s2,$e2\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
425
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
426 # correct terminus
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
427 my $off1=$s1-$beg;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
428 my $off2=$end-$e1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
429 $s2+=$off1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
430 $s2+=2; # 081009
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
431 $e2-=$off2; $e2=1 if $e2 < 1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
432 $e2+=2; $e2=1 if $e2 < 1; # 081009
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
433 ($s2,$e2)=($e2,$s2) if ($s2 > $e2);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
434 return ($s2,$e2);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
435 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
436
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
437 sub define_precursor {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
438 my $seq=shift;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
439 my $tag=shift;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
440
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
441 my $seq_length=length $seq;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
442 my $tag_length=length $tag;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
443 my $tag_beg=index($seq,$tag,0)+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
444 my $tag_end=$tag_beg+$tag_length-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
445
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
446 # split the candidate region into short arm and long arm
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
447 my $tag_arm;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
448 my ($larm,$larm_beg,$larm_end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
449 my ($sarm,$sarm_beg,$sarm_end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
450 if ($tag_beg-1 < $seq_length-$tag_end) { # on 5' arm
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
451 $sarm=substr($seq,0,$tag_end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
452 $larm=substr($seq,$tag_end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
453 $sarm_beg=1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
454 $sarm_end=$tag_end;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
455 $larm_beg=$tag_end+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
456 $larm_end=$seq_length;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
457 $tag_arm="5p";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
458 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
459 else {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
460 $larm=substr($seq,0,$tag_beg-1); # on 3' arm
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
461 $sarm=substr($seq,$tag_beg-1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
462 $larm_beg=1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
463 $larm_end=$tag_beg-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
464 $sarm_beg=$tag_beg;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
465 $sarm_end=$seq_length;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
466 $tag_arm="3p";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
467 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
468
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
469 # print "$sarm_beg,$sarm_end $sarm\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
470 # print "$larm_beg,$larm_end $larm\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
471
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
472 # clipping short arm
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
473 if ($tag_arm eq "5p") {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
474 $sarm_beg=$tag_beg-$flank; $sarm_beg=1 if $sarm_beg < 1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
475 $sarm=substr($seq,$sarm_beg-1,$sarm_end-$sarm_beg+1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
476 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
477 else {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
478 $sarm_end=$tag_end+$flank; $sarm_end=$seq_length if $sarm_end > $seq_length;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
479 $sarm=substr($seq,$sarm_beg-1,$sarm_end-$sarm_beg+1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
480 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
481 # print "$sarm_beg,$sarm_end $sarm\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
482 # print "$larm_beg,$larm_end $larm\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
483
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
484 # define the precursor by hybriding short arm to long arm
43
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
485 =cut #modify in 2014-10-28
17
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
486 my $duplex=RNA::duplexfold($sarm,$larm);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
487 my $struct=$duplex->{structure};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
488 my $energy=sprintf "%.2f", $duplex->{energy};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
489 my ($str1,$str2)=split(/&/,$struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
490 my $pair=$str1=~tr/(//;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
491 # print "pair=$pair\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
492 my $beg1=$duplex->{i}+1-length($str1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
493 my $end1=$duplex->{i};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
494 my $beg2=$duplex->{j};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
495 my $end2=$duplex->{j}+length($str2)-1;
43
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
496 =cut
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
497 ###### new codes begin
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
498 my $duplex=`perl -e 'print "$sarm\n$larm"' | RNAduplex`;
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
499 #(.(.(((.....(((.&))))))...).). 1,16 : 1,13 (-7.20)
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
500 my @tmpduplex=split/\s+/,$duplex;
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
501 my $struct=$tmpduplex[0];
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
502 $tmpduplex[-1]=~s/[(|)]//g;
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
503 my $energy=$tmpduplex[-1];
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
504 my ($str1,$str2)=split(/&/,$struct);
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
505 my $pair=$str1=~tr/(//;
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
506 my ($beg1,$end1)=split/,/,$tmpduplex[1];
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
507 my ($beg2,$end2)=split/,/,$tmpduplex[3];
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
508 ######## new codes end
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
509
17
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
510 # print "$beg1:$end1 $beg2:$end2\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
511 # transform coordinates
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
512 $beg1=$beg1+$sarm_beg-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
513 $end1=$end1+$sarm_beg-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
514 $beg2=$beg2+$larm_beg-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
515 $end2=$end2+$larm_beg-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
516 # print "$beg1:$end1 $beg2:$end2\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
517
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
518 my $off5p=$beg1-$sarm_beg;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
519 my $off3p=$sarm_end-$end1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
520 $beg2-=$off3p; $beg2=1 if $beg2 < 1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
521 $end2+=$off5p; $end2=$seq_length if $end2 > $seq_length;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
522
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
523 # print "$beg1:$end1 $beg2:$end2\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
524
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
525 my $beg=$sarm_beg < $beg2 ? $sarm_beg : $beg2;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
526 my $end=$sarm_end > $end2 ? $sarm_end : $end2;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
527
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
528 return if $pair < $MIN_PAIR;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
529 # print "$beg,$end\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
530 return ($beg,$end);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
531 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
532
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
533
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
534 # duplex fold, judge whether two short seqs like a miRNA/miRNA* duplex
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
535 sub likeMirDuplex1 {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
536 my $seq1=shift;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
537 my $seq2=shift;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
538 my $like_mir_duplex=1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
539
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
540 my $length1=length $seq1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
541 my $length2=length $seq2;
43
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
542 =cut
17
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
543 my $duplex=RNA::duplexfold($seq1, $seq2);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
544 my $duplex_struct=$duplex->{structure};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
545 my $duplex_energy=sprintf "%.2f", $duplex->{energy};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
546 my ($str1,$str2)=split(/&/,$duplex_struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
547 my $beg1=$duplex->{i}+1-length($str1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
548 my $end1=$duplex->{i};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
549 my $beg2=$duplex->{j};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
550 my $end2=$duplex->{j}+length($str2)-1;
43
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
551 =cut
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
552 my $duplex=`perl -e 'print "$seq1\n$seq2"' | RNAduplex`;
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
553 #(.(.(((.....(((.&))))))...).). 1,16 : 1,13 (-7.20)
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
554 my @tmpduplex=split/\s+/,$duplex;
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
555 my $duplex_struct=$tmpduplex[0];
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
556 $tmpduplex[-1]=~s/[(|)]//g;
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
557 my $duplex_energy=$tmpduplex[-1];
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
558 my ($str1,$str2)=split(/&/,$duplex_struct);
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
559 #my $pair=$str1=~tr/(//;
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
560 my ($beg1,$end1)=split/,/,$tmpduplex[1];
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
561 my ($beg2,$end2)=split/,/,$tmpduplex[3];
4c0b1a94b882 Uploaded
big-tiandm
parents: 17
diff changeset
562
17
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
563 # revise beg1, end1, beg2, end2
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
564 $str1=~/^(\.*)/;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
565 $beg1+=length($1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
566 $str1=~/(\.*)$/;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
567 $end1-=length($1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
568 $str2=~/^(\.*)/;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
569 $beg2+=length($1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
570 $str2=~/(\.*)$/;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
571 $end2-=length($1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
572
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
573 my $pair_num=$str1=~tr/(//;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
574 my $overhang1=($length2-$end2)-($beg1-1); # 3' overhang at hairpin bottom
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
575 my $overhang2=($length1-$end1)-($beg2-1); # 3' overhang at hairpin neck
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
576 # print $pair_num,"\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
577 # print $overhang1,"\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
578 # print $overhang2,"\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
579 if ($pair_num < 13) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
580 $like_mir_duplex=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
581 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
582 if ($overhang1 < 0 || $overhang2 < 0 ) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
583 $like_mir_duplex=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
584 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
585 if ($overhang1 > 4 || $overhang2 > 4) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
586 $like_mir_duplex=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
587 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
588 return ($like_mir_duplex,$pair_num,$overhang1,$overhang2);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
589 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
590
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
591 # judge whether two matures form miR/miR* duplex, in hairpin context
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
592 sub likeMirDuplex2 {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
593 my ($table,$beg1,$end1,$beg2,$end2)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
594 my $like_mir_duplex=1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
595
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
596 # s1 e1
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
597 # 5 ----------------------------3
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
598 # | | |||| ||| |
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
599 #3 -------------------------------5
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
600 # e2 s2
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
601
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
602 my $pair_num=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
603 my $overhang1=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
604 my $overhang2=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
605 my ($s1,$e1,$s2,$e2);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
606 foreach my $i ($beg1..$end1) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
607 if (defined $table->{$i}) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
608 my $j=$table->{$i};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
609 if ($j <= $end2 && $j >= $beg2) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
610 $s1=$i;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
611 $e2=$j;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
612 last;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
613 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
614 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
615 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
616 foreach my $i (reverse ($beg1..$end1)) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
617 if (defined $table->{$i}) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
618 my $j=$table->{$i};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
619 if ($j <= $end2 && $j >= $beg2) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
620 $e1=$i;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
621 $s2=$j;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
622 last;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
623 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
624 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
625 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
626
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
627 # print "$beg1,$end1 $s1,$e1\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
628 # print "$beg2,$end2 $s2,$e2\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
629
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
630 foreach my $i ($beg1..$end1) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
631 if (defined $table->{$i}) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
632 my $j=$table->{$i};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
633 if ($j <= $end2 && $j >= $beg2) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
634 ++$pair_num;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
635 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
636 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
637 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
638 if (defined $s1 && defined $e2) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
639 $overhang1=($end2-$e2)-($s1-$beg1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
640 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
641 if (defined $e1 && defined $s2) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
642 $overhang2=($end1-$e1)-($s2-$beg2);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
643 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
644
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
645 if ($pair_num < 13) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
646 $like_mir_duplex=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
647 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
648 if ($overhang1 < 0 && $overhang2 < 0) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
649 $like_mir_duplex=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
650 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
651 return ($like_mir_duplex,$pair_num,$overhang1,$overhang2);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
652 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
653 sub parse_struct {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
654 my $struct=shift;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
655 my $table=shift;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
656
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
657 my @t=split('',$struct);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
658 my @lbs; # left brackets
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
659 foreach my $k (0..$#t) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
660 if ($t[$k] eq "(") {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
661 push @lbs, $k+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
662 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
663 elsif ($t[$k] eq ")") {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
664 my $lb=pop @lbs;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
665 my $rb=$k+1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
666 $table->{$lb}=$rb;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
667 $table->{$rb}=$lb;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
668 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
669 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
670 if (@lbs) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
671 warn "unbalanced RNA struct.\n";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
672 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
673 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
674 sub which_arm {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
675 my $substruct=shift;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
676 my $arm;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
677 if ($substruct=~/\(/ && $substruct=~/\)/) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
678 $arm="-";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
679 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
680 elsif ($substruct=~/\(/) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
681 $arm="5p";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
682 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
683 else {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
684 $arm="3p";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
685 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
686 return $arm;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
687 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
688 sub biggest_bulge {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
689 my $struct=shift;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
690 my $bulge_size=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
691 my $max_bulge=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
692 while ($struct=~/(\.+)/g) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
693 $bulge_size=length $1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
694 if ($bulge_size > $max_bulge) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
695 $max_bulge=$bulge_size;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
696 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
697 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
698 return $max_bulge;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
699 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
700 sub get_asy {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
701 my($table,$a1,$a2)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
702 my ($pre_i,$pre_j);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
703 my $asymmetry=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
704 foreach my $i ($a1..$a2) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
705 if (defined $table->{$i}) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
706 my $j=$table->{$i};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
707 if (defined $pre_i && defined $pre_j) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
708 my $diff=($i-$pre_i)+($j-$pre_j);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
709 $asymmetry += abs($diff);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
710 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
711 $pre_i=$i;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
712 $pre_j=$j;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
713 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
714 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
715 return $asymmetry;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
716 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
717
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
718 sub peaks{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
719 my @cluster=@{$_[0]};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
720
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
721 return if(@cluster<1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
722
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
723 my $max=0; my $index=-1;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
724 for (my $i=0;$i<@cluster;$i++) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
725 if($cluster[$i][2]>$max){
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
726 $max=$cluster[$i][2];
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
727 $index=$i;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
728 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
729 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
730 # &excise(\@cluster,$index,$_[1],$_[2]);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
731 return($index);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
732 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
733
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
734 sub find_cites{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
735 my @tmp=@{$_[0]};
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
736 my $i=&peaks(\@tmp);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
737
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
738 my $start=$tmp[$i][0];
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
739 my $total=0; my $node5=0;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
740 for (my $j=0;$j<@tmp ;$j++) {
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
741 $total+=$tmp[$j][2];
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
742 $node5 +=$tmp[$j][2] if($tmp[$j][0]-$start<=2 && $tmp[$j][0]-$start>=-2);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
743 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
744 push @{$cites{$_[1]}{$_[2]}},$tmp[$i] if($node5/$total>0.80 && $tmp[$i][2]/$node5>0.5);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
745 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
746
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
747 sub newpos{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
748 my ($a,$b,$c,$d)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
749 my $s= $a>$c ? $c : $a;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
750 my $e=$b>$d ? $b : $d;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
751 return($s,$e);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
752 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
753
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
754 sub rev{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
755
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
756 my($sequence)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
757
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
758 my $rev=reverse $sequence;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
759
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
760 return $rev;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
761 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
762
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
763 sub com{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
764
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
765 my($sequence)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
766
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
767 $sequence=~tr/acgtuACGTU/TGCAATGCAA/;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
768
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
769 return $sequence;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
770 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
771
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
772 sub revcom{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
773
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
774 my($sequence)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
775
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
776 my $revcom=rev(com($sequence));
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
777
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
778 return $revcom;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
779 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
780
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
781 sub find_strand{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
782
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
783 #A subroutine to find the strand, parsing different blast formats
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
784 my($other)=@_;
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
785
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
786 my $strand="+";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
787
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
788 if($other=~/-/){
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
789 $strand="-";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
790 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
791
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
792 if($other=~/minus/i){
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
793 $strand="-";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
794 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
795
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
796 return($strand);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
797 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
798 sub usage{
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
799 print <<"USAGE";
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
800 Version $version
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
801 Usage:
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
802 $0 -map -g -d -f -o -s -e
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
803 options:
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
804 -map input file# align result # bst. format
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
805 -g input file # genome sequence fasta format
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
806 -d <int> Maximal space between miRNA and miRNA* (200)
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
807 -f <int> Flank sequence length of miRNA precursor (10)
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
808 -o output file# percursor fasta file
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
809 -s output file# precursor structure file
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
810 -e <folat> Maximal free energy allowed for a miRNA precursor (-18 kcal/mol)
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
811
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
812 -h help
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
813 USAGE
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
814 exit(1);
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
815 }
1131b4008650 Uploaded
big-tiandm
parents:
diff changeset
816