47
|
1 #!/usr/bin/perl -w
|
|
2 #Filename:
|
|
3 #Author: Tian Dongmei
|
|
4 #Email: tiandm@big.ac.cn
|
|
5 #Date: 2014-5-29
|
|
6 #Modified:
|
|
7 #Description:
|
|
8 my $version=1.00;
|
|
9
|
|
10 use strict;
|
|
11 use Getopt::Long;
|
|
12 use File::Basename;
|
|
13
|
|
14 my %opts;
|
|
15 GetOptions(\%opts,"i=s","format=s","min=i","max=i","o=s","h");
|
|
16 if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
|
|
17 &usage;
|
|
18 }
|
|
19 my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath);
|
|
20 my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir);
|
|
21 open IN,"<$opts{i}";
|
|
22 $config=<IN>; chomp $config;
|
|
23 $prepath=<IN>; chomp $prepath;
|
|
24 $genomepath=<IN>; chomp $genomepath;
|
|
25 $rfampath=<IN>;
|
|
26 close IN;
|
|
27 my @tmp=split/\//,$prepath;
|
|
28 $predir=$tmp[-1];
|
|
29 @tmp=split/\//,$genomepath;
|
|
30 $genomedir=$tmp[-1];
|
|
31
|
|
32 my $dir=dirname($opts{'o'});
|
|
33
|
|
34 open OUT ,">$opts{'o'}";
|
|
35 print OUT "<HTML>\n <HEAD>\n <TITLE> Analysis Report </TITLE>\n </HEAD>
|
|
36 <BODY bgcolor=\"lightgray\">\n <h1 align=\"center\">\n <font face=\"ºÚÌå\">\n <b>Preprocess Report</b>\n </font>\n </h1>
|
|
37 <h2>1. Sequence No. and quality</h2>
|
|
38 <h3>1.1 Sequece No.</h3>
|
|
39 ";
|
|
40
|
|
41 ### raw data no
|
|
42 open IN,"<$config";
|
|
43 my @files;my @marks; my @rawNo;
|
|
44 while (my $aline=<IN>) {
|
|
45 chomp $aline;
|
|
46 my @tmp=split/\t/,$aline;
|
|
47 push @files,$tmp[0];
|
|
48
|
|
49 my $no=`less $tmp[0] |wc -l `;
|
|
50 chomp $no;
|
|
51 if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") {
|
|
52 $no=$no/4;
|
|
53 }
|
|
54 else{
|
|
55 $no=$no/2;
|
|
56 }
|
|
57 push @rawNo,$no;
|
|
58
|
|
59 push @marks,$tmp[1];
|
|
60 }
|
|
61 close IN;
|
|
62
|
|
63 ### preprocess
|
|
64 unless ($prepath=~/\/$/) {
|
|
65 $prepath .="/";
|
|
66 }
|
|
67
|
|
68 my @trimNo;my @collapse;
|
|
69 my $collapsefile=$prepath."collapse_reads.fa";
|
|
70 open IN,"<$collapsefile";
|
|
71 while (my $aline=<IN>) {
|
|
72 chomp $aline;
|
|
73 <IN>;
|
|
74 $aline=~/:([\d|_]+)_x(\d+)$/;
|
|
75 my @lng=split/_/,$1;
|
|
76 for (my $i=0;$i<@lng;$i++) {
|
|
77 if ($lng[$i]>0) {
|
|
78 $trimNo[$i] +=$lng[$i];
|
|
79 $collapse[$i] ++;
|
|
80 }
|
|
81 }
|
|
82 }
|
|
83 close IN;
|
|
84
|
|
85 my @cleanR;my @cleanT;
|
|
86 my $clean=$prepath."collapse_reads_$opts{min}_$opts{max}.fa";
|
|
87 open IN,"<$clean";
|
|
88 while (my $aline=<IN>) {
|
|
89 chomp $aline;
|
|
90 <IN>;
|
|
91 $aline=~/:([\d|_]+)_x(\d+)$/;
|
|
92 my @lng=split/_/,$1;
|
|
93 for (my $i=0;$i<@lng;$i++) {
|
|
94 if ($lng[$i]>0) {
|
|
95 $cleanR[$i] +=$lng[$i];
|
|
96 $cleanT[$i] ++;
|
|
97 }
|
|
98 }
|
|
99 }
|
|
100 close IN;
|
|
101
|
|
102 print OUT "<table border=\"1\">
|
|
103 <tr align=\"center\">
|
|
104 <th> </th>
|
|
105 ";
|
|
106 foreach (@marks) {
|
|
107 print OUT "<th> $_ </th>\n";
|
|
108 }
|
|
109 print OUT "</tr>
|
|
110 <tr align=\"center\">
|
|
111 <th align=\"left\">Raw Reads No. </th>
|
|
112 ";
|
|
113 foreach (@rawNo) {
|
|
114 print OUT "<td> $_ </td>\n";
|
|
115 }
|
|
116 print OUT "</tr>
|
|
117 <tr align=\"center\">
|
|
118 <th align=\"left\">Reads No. After Trimed 3\' adapter </th>
|
|
119 ";
|
|
120 foreach (@trimNo) {
|
|
121 print OUT "<td> $_ </td>\n";
|
|
122 }
|
|
123 print OUT "</tr>
|
|
124 <tr align=\"center\">
|
|
125 <th align=\"left\">Unique Tags No. </th>
|
|
126 ";
|
|
127 foreach (@collapse) {
|
|
128 print OUT "<td> $_ </td>\n";
|
|
129 }
|
|
130 print OUT "</tr>
|
|
131 <tr align=\"center\">
|
|
132 <th align=\"left\">Clean Reads No. </th>
|
|
133 ";
|
|
134 foreach (@cleanR) {
|
|
135 print OUT "<td> $_ </td>\n";
|
|
136 }
|
|
137 print OUT "</tr>
|
|
138 <tr align=\"center\">
|
|
139 <th align=\"left\">Clean Tags No. </th>
|
|
140 ";
|
|
141 foreach (@cleanT) {
|
|
142 print OUT "<td> $_ </td>\n";
|
|
143 }
|
|
144 print OUT "</tr>\n</table>";
|
|
145 print OUT "<p>
|
|
146 Note:<br />
|
|
147 The raw data file path is: <b>$files[0]</b><br />
|
|
148 ";
|
|
149 for (my $i=1;$i<@files;$i++) {
|
|
150 print OUT "          <b>$files[$i]</b><br />";
|
|
151 }
|
|
152 print OUT "The collapsed file path is: <b>$collapsefile</b><br />
|
|
153 The clean data file path is: <b>$clean</b><br />
|
|
154 </p>
|
|
155 <h2> 1. Sequence length count</h2>
|
|
156 <h3> 1.1 Reads length count </h3>
|
|
157 ";
|
|
158 print OUT "\n";
|
|
159
|
|
160 my (%length); my $key="Tags Length";
|
|
161 open IN,"<$prepath/reads_length_distribution.txt";
|
|
162 while (my $aline=<IN>) {
|
|
163 chomp $aline;
|
|
164 next if($aline=~/^\s*$/);
|
|
165 if ($aline=~/^Reads/) { $key="Reads Length";}
|
|
166 my @tmp=split/\t/,$aline;
|
|
167 my @array=split/\s/,$tmp[1];
|
|
168 push @{$length{$key}},[$tmp[0],@array];
|
|
169 }
|
|
170 close IN;
|
|
171
|
|
172 print OUT "<table border=\"1\">
|
|
173 <tr align=\"center\">";
|
|
174 my $hashkey="Reads Length";
|
|
175 foreach (@{$length{$hashkey}[0]}) {
|
|
176 print OUT "<th> $_ </th>\n";
|
|
177 }
|
|
178 print OUT "</tr>";
|
|
179
|
|
180 for (my $i=1;$i<@{$length{$hashkey}};$i++) {
|
|
181 print OUT "<tr align=\"center\">
|
|
182 <th >$length{$hashkey}[$i][0] </th>
|
|
183 ";
|
|
184 for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
|
|
185 print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
|
|
186 }
|
|
187 print OUT "</tr>\n";
|
|
188 }
|
|
189 print OUT "</table>\n";
|
|
190
|
|
191 print OUT "<h3> 1.2 Tags length count </h3>";
|
|
192
|
|
193 print OUT "<table border=\"1\">
|
|
194 <tr align=\"center\">";
|
|
195 $hashkey="Tags Length";
|
|
196 foreach (@{$length{$hashkey}[0]}) {
|
|
197 print OUT "<th> $_ </th>\n";
|
|
198 }
|
|
199 print OUT "</tr>";
|
|
200
|
|
201 for (my $i=1;$i<@{$length{$hashkey}};$i++) {
|
|
202 print OUT "<tr align=\"center\">
|
|
203 <th > $length{$hashkey}[$i][0] </th>
|
|
204 ";
|
|
205 for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
|
|
206 print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
|
|
207 }
|
|
208 print OUT "</tr>\n";
|
|
209 }
|
|
210
|
|
211 print OUT "</table>\n";
|
|
212
|
|
213 print OUT "<h2> 2. Sequence length distribution </h2>";
|
|
214 my $length=$prepath."length.html";
|
|
215 open IN,"<$length";
|
|
216 while (my $aline=<IN>) {
|
|
217 chomp $aline;
|
|
218 print OUT "$aline\n";
|
|
219 }
|
|
220
|
|
221 #print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a>
|
|
222 #</p>
|
|
223 #";
|
|
224
|
|
225
|
|
226
|
|
227
|
|
228 ####genome map
|
|
229 #unless ($genomedir=~/\/$/) {
|
|
230 # $genomedir .="/";
|
|
231 #}
|
|
232
|
|
233 print OUT "<h2>2. Genome Alignment Result</h2>
|
|
234 <h3>2.1 Mapping count</h3>
|
|
235 ";
|
|
236
|
|
237 open IN,"<$genomepath/genome_mapped.fa";
|
|
238 my (@gread,@gtag);
|
|
239 while (my $aline=<IN>) {
|
|
240 chomp $aline;
|
|
241 <IN>;
|
|
242 $aline=~/:([\d|_]+)_x(\d+)$/;
|
|
243 my @sss=split/_/,$1;
|
|
244 for (my $i=0;$i<@sss;$i++) {
|
|
245 if ($sss[$i]>0) {
|
|
246 $gread[$i] +=$sss[$i];
|
|
247 $gtag[$i] ++;
|
|
248 }
|
|
249 }
|
|
250 }
|
|
251 close IN;
|
|
252
|
|
253 print OUT "<table border=\"1\">
|
|
254 <tr align=\"center\">
|
|
255 <th> </th>
|
|
256 ";
|
|
257 foreach (@marks) {
|
|
258 print OUT "<th> $_ </th>\n";
|
|
259 }
|
|
260 print OUT "</tr>
|
|
261 <tr align=\"center\">
|
|
262 <th align=\"left\">Genome Mapped Reads No. </th>
|
|
263 ";
|
|
264 foreach (@gread) {
|
|
265 print OUT "<td> $_ </td>\n";
|
|
266 }
|
|
267 print OUT "</tr>
|
|
268 <tr align=\"center\">
|
|
269 <th align=\"left\">Genome Mapped Reads Percent </th>
|
|
270 ";
|
|
271
|
|
272 for (my $i=0;$i<@gread;$i++) {
|
|
273 my $per=sprintf ("%.2f",$gread[$i]/$cleanR[$i]*100);
|
|
274 print OUT "<td> $per\%</td>\n";
|
|
275 }
|
|
276
|
|
277 print OUT "</tr>
|
|
278 <tr align=\"center\">
|
|
279 <th align=\"left\">Genome Mapped Tags No. </th>
|
|
280 ";
|
|
281 foreach (@gtag) {
|
|
282 print OUT "<td> $_ </td>\n";
|
|
283 }
|
|
284 print OUT "</tr>
|
|
285 <tr align=\"center\">
|
|
286 <th align=\"left\">Genome Mapped Tags Percent </th>
|
|
287 ";
|
|
288
|
|
289 for (my $i=0;$i<@gtag;$i++) {
|
|
290 my $per=sprintf ("%.2f",$gtag[$i]/$cleanT[$i]*100);
|
|
291 print OUT "<td> $per\%</td>\n";
|
|
292 }
|
|
293 print OUT "</tr>\n</table>";
|
|
294 print OUT "<p>
|
|
295 Note:<br />
|
|
296 The genome mapped bwt file path is: <b>$genomedir/genome_mapped.bwt</b><br />
|
|
297 The genome mapped FASTA file path is: <b>$genomedir/genome_mapped.fa</b>
|
|
298 <br />
|
|
299 ";
|
|
300
|
|
301
|
|
302
|
|
303 #### rfam
|
|
304 if(defined $rfampath && $rfampath=~/rfam_match/){
|
|
305 chomp $rfampath;
|
|
306 @tmp=split/\//,$rfampath;
|
|
307 $rfamdir=$tmp[-1];
|
|
308
|
|
309 unless ($rfampath=~/\/$/) {
|
|
310 $rfampath .="/";
|
|
311 }
|
|
312 print OUT "<h2>3. Rfam non-miRNA annotation</h2>
|
|
313 <h3>3.1 Reads count</h3>
|
|
314 <table border=\"1\">
|
|
315 <tr align=\"center\">
|
|
316 ";
|
|
317
|
|
318 my @rfamR; my @rfamT;
|
|
319 my $tag=1;
|
|
320 open IN,"<$dir/rfam_non-miRNA_annotation.txt";
|
|
321 while (my $aline=<IN>) {
|
|
322 chomp $aline;
|
|
323 $tag=0 if($aline=~/tags\s+number/);
|
|
324 next if($aline=~/^\#/);
|
|
325 next if($aline=~/^\s*$/);
|
|
326 my @tmp=split/\s+/,$aline;
|
|
327 if($tag == 1){push @rfamR,[@tmp];}
|
|
328 else{push @rfamT,[@tmp];}
|
|
329 }
|
|
330 close IN;
|
|
331
|
|
332
|
|
333 print OUT "<th>RNA Name</th>\n";
|
|
334 foreach (@marks) {
|
|
335 print OUT "<th> $_ </th>\n";
|
|
336 }
|
|
337 for (my $i=0;$i<@rfamR;$i++) {
|
|
338 print OUT "</tr>
|
|
339 <tr align=\"center\">
|
|
340 <th align=\"left\">$rfamR[$i][0]</th>
|
|
341 ";
|
|
342 for (my $j=1;$j<@{$rfamR[$i]} ;$j++) {
|
|
343 print OUT "<td> $rfamR[$i][$j]</td>\n";
|
|
344 }
|
|
345 }
|
|
346
|
|
347 print OUT "</tr>\n</table>
|
|
348 <h3>3.2 Tags count</h3>
|
|
349 <table border=\"1\">
|
|
350 <tr align=\"center\">
|
|
351 <th>RNA Name</th>\n";
|
|
352 foreach (@marks) {
|
|
353 print OUT "<th> $_ </th>\n";
|
|
354 }
|
|
355 for (my $i=0;$i<@rfamT;$i++) {
|
|
356 print OUT "</tr>
|
|
357 <tr align=\"center\">
|
|
358 <th align=\"left\">$rfamT[$i][0]</th>
|
|
359 ";
|
|
360 for (my $j=1;$j<@{$rfamT[$i]} ;$j++) {
|
|
361 print OUT "<td> $rfamT[$i][$j]</td>\n";
|
|
362 }
|
|
363 }
|
|
364 print OUT "</tr>\n</table>
|
|
365 <p>Note:<br />The rfam mapping results is: <b>$rfampath</b>";
|
|
366 print OUT "<b>rfam_mapped.bwt</b></p>
|
|
367 ";
|
|
368 }
|
|
369
|
|
370
|
|
371 print OUT "
|
|
372 </BODY>
|
|
373 </HTML>
|
|
374 ";
|
|
375 close OUT;
|
|
376
|
|
377 sub usage{
|
|
378 print <<"USAGE";
|
|
379 Version $version
|
|
380 Usage:
|
|
381 $0 -o
|
|
382 options:
|
|
383 -o output file
|
|
384 -h help
|
|
385 USAGE
|
|
386 exit(1);
|
|
387 }
|
|
388
|