13
|
1 #!/usr/bin/perl -w
|
|
2 #Filename:
|
|
3 #Author: Tian Dongmei
|
|
4 #Email: tiandm@big.ac.cn
|
|
5 #Date: 2014-6-4
|
|
6 #Modified:
|
|
7 #Description: solexa miRNA express and sequence
|
|
8 my $version=1.00;
|
|
9
|
|
10 use strict;
|
|
11 use Getopt::Long;
|
|
12
|
|
13 my %opts;
|
|
14 GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h");
|
|
15 if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments
|
|
16 &usage;
|
|
17 }
|
|
18
|
|
19 my $filein=$opts{'i'};
|
|
20 my $fileout=$opts{'list'};
|
|
21 my $out=$opts{'fa'};
|
|
22 my $preout=$opts{'pre'};
|
|
23
|
|
24 =cut
|
|
25 my %hash_pri;
|
|
26 open PRI,"<$opts{p}";
|
|
27 while (my $aline=<PRI>) {
|
|
28 chomp $aline;
|
|
29 if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;}
|
|
30 }
|
|
31 close PRI;
|
|
32 =cut
|
|
33
|
|
34 open IN,"<$filein"; #input file
|
|
35 open OUT,">$fileout"; #output file
|
|
36 open FA ,">$out";
|
|
37 open PRE,">$preout";
|
|
38
|
|
39 print OUT "#ID\tcoordinate\tpos1\tpos2";
|
|
40 my @marks=split/\,/,$opts{'tag'};
|
|
41 foreach (@marks) {
|
|
42 print OUT "\t",$_,"_matureExp";
|
|
43 }
|
|
44 foreach (@marks) {
|
|
45 print OUT "\t",$_,"_starExp";
|
|
46 }
|
|
47 foreach (@marks) {
|
|
48 print OUT "\t",$_,"_totalExp";
|
|
49 }
|
|
50
|
|
51 print OUT "\n";
|
|
52
|
|
53 my (%uniq_id,$novel);
|
|
54 while (my $aline=<IN>) {
|
|
55 chomp $aline;
|
|
56 until ($aline =~ /^score\s+[-\d\.]+/){
|
|
57 $aline = <IN>;
|
|
58 if (eof) {last;}
|
|
59 }
|
|
60 if (eof) {last;}
|
|
61 ########## miRNA ID ################
|
|
62 $novel++;
|
|
63 ########### annotate####################
|
|
64 do {$aline=<IN>;} until($aline=~/flank_first_end/) ;
|
|
65 chomp $aline;
|
|
66 my @flank1=split/\t/,$aline;
|
|
67 do {$aline=<IN>;} until($aline=~/flank_second_beg/) ;
|
|
68 chomp $aline;
|
|
69 my @flank2=split/\t/,$aline;
|
|
70 #
|
|
71 ########## mature start loop pre ####
|
|
72 do {$aline=<IN>;} until($aline=~/mature_beg/) ;
|
|
73 chomp $aline;
|
|
74 my @start=split/\t/,$aline;
|
|
75 # $start[1] -=$flank1[1];
|
|
76 do {$aline=<IN>;} until($aline=~/mature_end/) ;
|
|
77 chomp $aline;
|
|
78 my @end=split/\t/,$aline;
|
|
79 # $end[1] -=$flank1[1];
|
|
80 do {$aline=<IN>;} until($aline=~/mature_seq/) ;
|
|
81 chomp $aline;
|
|
82 my @arr1=split/\t/,$aline;
|
|
83 do {$aline=<IN>;} until($aline=~/pre_seq/) ;
|
|
84 chomp $aline;
|
|
85 my @arr2=split/\t/,$aline;
|
|
86 do {$aline=<IN>;} until($aline=~/pri_id/) ;
|
|
87 chomp $aline;
|
|
88 my @pri_id=split/\t/,$aline;
|
|
89 do {$aline=<IN>;} until($aline=~/pri_seq/) ;
|
|
90 chomp $aline;
|
|
91 my @pri_seq=split/\t/,$aline;
|
|
92 do {$aline=<IN>;} until($aline=~/star_beg/) ;
|
|
93 chomp $aline;
|
|
94 my @star_start=split/\t/,$aline;
|
|
95 # $star_start[1] -=$flank1[1];
|
|
96 do {$aline=<IN>;} until($aline=~/star_end/) ;
|
|
97 chomp $aline;
|
|
98 my @star_end=split/\t/,$aline;
|
|
99 # $star_end[1] -=$flank1[1];
|
|
100 do {$aline=<IN>;} until($aline=~/star_seq/) ;
|
|
101 chomp $aline;
|
|
102 my @arr3=split/\t/,$aline;
|
|
103 print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t";
|
|
104 #print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t";
|
|
105 print FA ">miR-c-$novel\n$arr1[1]\n";
|
|
106 print PRE ">miR-c-$novel\n$pri_seq[1]\n";
|
|
107 ########## reads count #############
|
|
108 <IN>;
|
|
109 my @count1;my @count2;my @count3;my @count4;
|
|
110 $aline=<IN>;
|
|
111 do {
|
|
112 chomp $aline;
|
|
113 my @reads=split/\t/,$aline;
|
|
114 my @pos=();
|
|
115 $reads[5]=~/(\d+)\.\.(\d+)/;
|
|
116 # $pos[0] =$1-$flank1[1];
|
|
117 # $pos[1] =$2-$flank1[1];
|
|
118 $pos[0]=$1;
|
|
119 $pos[1]=$2;
|
|
120 $reads[0]=~/:([\d|_]+)_x(\d+)$/;
|
|
121 my @ss=split/_/,$1;
|
|
122 for (my $i=0;$i<@ss ;$i++) {
|
|
123 if (!(defined $count3[$i])) {
|
|
124 $count3[$i]=0;
|
|
125 }
|
|
126 if (!(defined $count4[$i])) {
|
|
127 $count4[$i]=0;
|
|
128 }
|
|
129 $count2[$i]+=$ss[$i];
|
|
130
|
|
131 }
|
|
132 # $count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 );
|
|
133 # $count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 );
|
|
134 # $count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1);
|
|
135 # $count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1);
|
|
136 if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 )
|
|
137 {
|
|
138 for (my $i=0;$i<@ss;$i++) {
|
|
139 $count3[$i]+=$ss[$i];
|
|
140 }
|
|
141 }
|
|
142 if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){
|
|
143 for (my $i=0;$i<@ss;$i++) {
|
|
144 $count4[$i]+=$ss[$i];
|
|
145 }
|
|
146 }
|
|
147 $aline=<IN>;
|
|
148 chomp $aline;
|
|
149 } until(length $aline < 1) ;
|
|
150 $"="\t";
|
|
151 print OUT "@count3\t@count4\t@count2\n";
|
|
152 $"=" ";
|
|
153 }
|
|
154
|
|
155 close IN;
|
|
156 close OUT;
|
|
157
|
|
158 sub usage{
|
|
159 print <<"USAGE";
|
|
160 Version $version
|
|
161 Usage:
|
|
162 $0 -i -list -fa -pre -tag
|
|
163 options:
|
|
164 -i input file,predictions file
|
|
165 -list output file miRNA list file
|
|
166 -fa output file ,miRNA sequence fasta file.
|
|
167 -pre output file, miRNA precursor fasta file.
|
|
168 -tag string, sample names# eg: samA,samB,samC
|
|
169 -h help
|
|
170 USAGE
|
|
171 exit(1);
|
|
172 }
|
|
173
|