Mercurial > repos > bigrna > gpsrna
view html_preprocess.pl @ 0:87fe81de0931 draft default tip
Uploaded
author | bigrna |
---|---|
date | Sun, 04 Jan 2015 02:47:25 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w #Filename: #Author: Tian Dongmei #Email: tiandm@big.ac.cn #Date: 2014-5-29 #Modified: #Description: my $version=1.00; use strict; use Getopt::Long; use File::Basename; my %opts; GetOptions(\%opts,"i=s","format=s","min=i","max=i","o=s","h"); if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments &usage; } my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath); my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir); open IN,"<$opts{i}"; $config=<IN>; chomp $config; $prepath=<IN>; chomp $prepath; $genomepath=<IN>; chomp $genomepath; $rfampath=<IN>; close IN; my @tmp=split/\//,$prepath; $predir=$tmp[-1]; @tmp=split/\//,$genomepath; $genomedir=$tmp[-1]; my $dir=dirname($opts{'o'}); open OUT ,">$opts{'o'}"; print OUT "<HTML>\n <HEAD>\n <TITLE> Analysis Report </TITLE>\n </HEAD> <BODY bgcolor=\"lightgray\">\n <h1 align=\"center\">\n <font face=\"ºÚÌå\">\n <b>Preprocess Report</b>\n </font>\n </h1> <h2>1. Sequence No. and quality</h2> <h3>1.1 Sequece No.</h3> "; ### raw data no open IN,"<$config"; my @files;my @marks; my @rawNo; while (my $aline=<IN>) { chomp $aline; my @tmp=split/\t/,$aline; push @files,$tmp[0]; my $no=`less $tmp[0] |wc -l `; chomp $no; if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") { $no=$no/4; } else{ $no=$no/2; } push @rawNo,$no; push @marks,$tmp[1]; } close IN; ### preprocess unless ($prepath=~/\/$/) { $prepath .="/"; } my @trimNo;my @collapse; my $collapsefile=$prepath."collapse_reads.fa"; open IN,"<$collapsefile"; while (my $aline=<IN>) { chomp $aline; <IN>; $aline=~/:([\d|_]+)_x(\d+)$/; my @lng=split/_/,$1; for (my $i=0;$i<@lng;$i++) { if ($lng[$i]>0) { $trimNo[$i] +=$lng[$i]; $collapse[$i] ++; } } } close IN; my @cleanR;my @cleanT; my $clean=$prepath."collapse_reads_$opts{min}_$opts{max}.fa"; open IN,"<$clean"; while (my $aline=<IN>) { chomp $aline; <IN>; $aline=~/:([\d|_]+)_x(\d+)$/; my @lng=split/_/,$1; for (my $i=0;$i<@lng;$i++) { if ($lng[$i]>0) { $cleanR[$i] +=$lng[$i]; $cleanT[$i] ++; } } } close IN; print OUT "<table border=\"1\"> <tr align=\"center\"> <th> </th> "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Raw Reads No. </th> "; foreach (@rawNo) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Reads No. After Trimed 3\' adapter </th> "; foreach (@trimNo) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Unique Tags No. </th> "; foreach (@collapse) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Clean Reads No. </th> "; foreach (@cleanR) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Clean Tags No. </th> "; foreach (@cleanT) { print OUT "<td> $_ </td>\n"; } print OUT "</tr>\n</table>"; print OUT "<p> Note:<br /> The raw data file path is: <b>$files[0]</b><br /> "; for (my $i=1;$i<@files;$i++) { print OUT "          <b>$files[$i]</b><br />"; } print OUT "The collapsed file path is: <b>$collapsefile</b><br /> The clean data file path is: <b>$clean</b><br /> </p> <h2> 1. Sequence length count</h2> <h3> 1.1 Reads length count </h3> "; print OUT "\n"; my (%length); my $key="Tags Length"; open IN,"<$prepath/reads_length_distribution.txt"; while (my $aline=<IN>) { chomp $aline; next if($aline=~/^\s*$/); if ($aline=~/^Reads/) { $key="Reads Length";} my @tmp=split/\t/,$aline; my @array=split/\s/,$tmp[1]; push @{$length{$key}},[$tmp[0],@array]; } close IN; print OUT "<table border=\"1\"> <tr align=\"center\">"; my $hashkey="Reads Length"; foreach (@{$length{$hashkey}[0]}) { print OUT "<th> $_ </th>\n"; } print OUT "</tr>"; for (my $i=1;$i<@{$length{$hashkey}};$i++) { print OUT "<tr align=\"center\"> <th >$length{$hashkey}[$i][0] </th> "; for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) { print OUT "<td> $length{$hashkey}[$i][$j] </td>\n"; } print OUT "</tr>\n"; } print OUT "</table>\n"; print OUT "<h3> 1.2 Tags length count </h3>"; print OUT "<table border=\"1\"> <tr align=\"center\">"; $hashkey="Tags Length"; foreach (@{$length{$hashkey}[0]}) { print OUT "<th> $_ </th>\n"; } print OUT "</tr>"; for (my $i=1;$i<@{$length{$hashkey}};$i++) { print OUT "<tr align=\"center\"> <th > $length{$hashkey}[$i][0] </th> "; for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) { print OUT "<td> $length{$hashkey}[$i][$j] </td>\n"; } print OUT "</tr>\n"; } print OUT "</table>\n"; print OUT "<h2> 2. Sequence length distribution </h2>"; my $length=$prepath."length.html"; open IN,"<$length"; while (my $aline=<IN>) { chomp $aline; print OUT "$aline\n"; } #print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a> #</p> #"; ####genome map #unless ($genomedir=~/\/$/) { # $genomedir .="/"; #} print OUT "<h2>2. Genome Alignment Result</h2> <h3>2.1 Mapping count</h3> "; open IN,"<$genomepath/genome_mapped.fa"; my (@gread,@gtag); while (my $aline=<IN>) { chomp $aline; <IN>; $aline=~/:([\d|_]+)_x(\d+)$/; my @sss=split/_/,$1; for (my $i=0;$i<@sss;$i++) { if ($sss[$i]>0) { $gread[$i] +=$sss[$i]; $gtag[$i] ++; } } } close IN; print OUT "<table border=\"1\"> <tr align=\"center\"> <th> </th> "; foreach (@marks) { print OUT "<th> $_ </th>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Genome Mapped Reads No. </th> "; foreach (@gread) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Genome Mapped Reads Percent </th> "; for (my $i=0;$i<@gread;$i++) { my $per=sprintf ("%.2f",$gread[$i]/$cleanR[$i]*100); print OUT "<td> $per\%</td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Genome Mapped Tags No. </th> "; foreach (@gtag) { print OUT "<td> $_ </td>\n"; } print OUT "</tr> <tr align=\"center\"> <th align=\"left\">Genome Mapped Tags Percent </th> "; for (my $i=0;$i<@gtag;$i++) { my $per=sprintf ("%.2f",$gtag[$i]/$cleanT[$i]*100); print OUT "<td> $per\%</td>\n"; } print OUT "</tr>\n</table>"; print OUT "<p> Note:<br /> The genome mapped bwt file path is: <b>$genomedir/genome_mapped.bwt</b><br /> The genome mapped FASTA file path is: <b>$genomedir/genome_mapped.fa</b> <br /> "; #### rfam if(defined $rfampath && $rfampath=~/rfam_match/){ chomp $rfampath; @tmp=split/\//,$rfampath; $rfamdir=$tmp[-1]; unless ($rfampath=~/\/$/) { $rfampath .="/"; } print OUT "<h2>3. Rfam non-miRNA annotation</h2> <h3>3.1 Reads count</h3> <table border=\"1\"> <tr align=\"center\"> "; my @rfamR; my @rfamT; my $tag=1; open IN,"<$dir/rfam_non-miRNA_annotation.txt"; while (my $aline=<IN>) { chomp $aline; $tag=0 if($aline=~/tags\s+number/); next if($aline=~/^\#/); next if($aline=~/^\s*$/); my @tmp=split/\s+/,$aline; if($tag == 1){push @rfamR,[@tmp];} else{push @rfamT,[@tmp];} } close IN; print OUT "<th>RNA Name</th>\n"; foreach (@marks) { print OUT "<th> $_ </th>\n"; } for (my $i=0;$i<@rfamR;$i++) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$rfamR[$i][0]</th> "; for (my $j=1;$j<@{$rfamR[$i]} ;$j++) { print OUT "<td> $rfamR[$i][$j]</td>\n"; } } print OUT "</tr>\n</table> <h3>3.2 Tags count</h3> <table border=\"1\"> <tr align=\"center\"> <th>RNA Name</th>\n"; foreach (@marks) { print OUT "<th> $_ </th>\n"; } for (my $i=0;$i<@rfamT;$i++) { print OUT "</tr> <tr align=\"center\"> <th align=\"left\">$rfamT[$i][0]</th> "; for (my $j=1;$j<@{$rfamT[$i]} ;$j++) { print OUT "<td> $rfamT[$i][$j]</td>\n"; } } print OUT "</tr>\n</table> <p>Note:<br />The rfam mapping results is: <b>$rfampath</b>"; print OUT "<b>rfam_mapped.bwt</b></p> "; } print OUT " </BODY> </HTML> "; close OUT; sub usage{ print <<"USAGE"; Version $version Usage: $0 -o options: -o output file -h help USAGE exit(1); }