Mercurial > repos > big-tiandm > mirplant2

--- a/convert_bowtie_to_blast.pl	Thu Nov 13 22:43:35 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-#!/usr/bin/perl
-
-
-use warnings;
-use strict;
-use Getopt::Std;
-
-######################################### USAGE ################################
-
-my $usage=
-"$0 file_bowtie_result file_solexa_seq file_chromosome
-
-This is a converter which changes Bowtie output into Blast format.
-The input includes three files: a Bowtie result file (default Bowtie
-output file), a fasta file consisting of small Reads and a chromosome
-fasta file. It outputs the alignments in blast_parsed format.
-
-file_bowtie_result likes:
-
-AtFlower100010_x2	+	MIR319c	508	AAGGAGATTCTTTCAGTCCAG	IIIIIIIIIIIIIIIIIIIII	0
-AtFlower1000188_x1	+	MIR2933a	421	TCGGAGAGGAAATTCGTCGGCG	IIIIIIIIIIIIIIIIIIIIII	0
-
-file_solexa_seq likes:
-
->AtFlower100010_x2
-AAGGAGATTCTTTCAGTCCAG
-
-file_chromosome contains chromosome seq in fasta format
-
-";
-
-
-####################################### INPUT FILES ############################
-
-my $file_bowtie_result=shift or die $usage;
-my $file_short_seq=shift or die $usage;
-my $file_chromosome_seq=shift or die $usage;
-
-
-##################################### GLOBAL VARIBALES #########################
-
-my %short_seq_length=();
-my %chromosome_length=();
-
-
-######################################### MAIN #################################
-
-#get the short sequence id and its length
-sequence_length($file_short_seq,\%short_seq_length);
-
-#get the chromosome sequence id and its length
-sequence_length($file_chromosome_seq,\%chromosome_length);
-
-#convert bowtie result format to blast format;
-change_format($file_bowtie_result);
-
-exit;
-
-
-##################################### SUBROUTINES ##############################
-
-sub sequence_length{
-    my ($file,$hash) = @_;
-    my ($id, $desc, $sequence, $seq_length) = ();
-
-    open (FASTA, "<$file") or die "can not open $$file\n";
-    while (<FASTA>)
-    {
-        chomp;
-        if (/^>(\S+)(.*)/)
-	{
-	    $id       = $1;
-	    $desc     = $2;
-	    $sequence = "";
-	    while (<FASTA>){
-                chomp;
-                if (/^>(\S+)(.*)/){
-		    $$hash{$id}  = length $sequence;
-		    $id         = $1;
-		    $desc       = $2;
-		    $sequence   = "";
-		    next;
-                }
-		$sequence .= $_;
-            }
-        }
-    }
-    $seq_length=length($sequence);
-    $$hash{$id} = $seq_length;
-    close FASTA;
-}
-
-
-
-
-
-sub change_format{
-	#Change Bowtie format into blast format
-	my $file=shift @_;
-	open(FILE,"<$file")||die"can not open the bowtie result file:$!\n";
-  #open(BLASTOUT,">blastout")||die"can not create the blastout file:$!\n";
-
-	while(<FILE>){
-		chomp;
-		my @tmp=split("\t",$_);
-	#Clean the reads ID
-		my @tmp1=split(" ",$tmp[0]);
-		print  "$tmp1[0]"."\t"."$short_seq_length{$tmp1[0]}"."\t"."1".'..'."$short_seq_length{$tmp1[0]}"."\t"."$tmp[2]"."\t"."$chromosome_length{$tmp[2]}"."\t";
-		if($tmp[1] eq "+"){
-			my $seq_end=$tmp[3] + $short_seq_length{$tmp1[0]};
-			my $seq_bg=$tmp[3] + 1;
-			print  "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Plus"."\n";
-			}
-		if($tmp[1] eq "-"){
-			my $seq_end=$chromosome_length{$tmp[2]} - $tmp[3];
-			my $seq_bg=$seq_end - $short_seq_length{$tmp1[0]} + 1;
-			print  "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Minus"."\n";
-			}
-		}
-
-#	close BLASTOUT;
-
-}
-
-
-
--- a/html.pl	Thu Nov 13 22:43:35 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,273 +0,0 @@
-#!/usr/bin/perl -w
-#Filename:
-#Author: Tian Dongmei
-#Email: tiandm@big.ac.cn
-#Date: 2014-5-29
-#Modified:
-#Description:
-my $version=1.00;
-
-use strict;
-use Getopt::Long;
-use File::Basename;
-
-my %opts;
-GetOptions(\%opts,"i=s","format=s","o=s","h");
-if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
-&usage;
-}
-my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath);
-my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir);
-open IN,"<$opts{i}";
-$config=<IN>; chomp $config;
-$prepath=<IN>; chomp $prepath;
-$rfampath=<IN>;chomp $rfampath;
-$knownpath=<IN>; chomp $knownpath;
-$genomepath=<IN>; chomp $genomepath;
-$novelpath=<IN>; chomp $novelpath;
-close IN;
-my @tmp=split/\//,$prepath;
-$predir=$tmp[-1];
-@tmp=split/\//,$rfampath;
-$rfamdir=$tmp[-1];
-@tmp=split/\//,$knownpath;
-$knowndir=$tmp[-1];
-@tmp=split/\//,$genomepath;
-$genomedir=$tmp[-1];
-@tmp=split/\//,$novelpath;
-$noveldir=$tmp[-1];
-
-my $dir=dirname($opts{'o'});
-
-open OUT ,">$opts{'o'}";
-print OUT "<HTML>\n  <HEAD>\n  <TITLE> Analysis Report </TITLE>\n </HEAD>
- <BODY bgcolor=\"lightgray\">\n  <h1 align=\"center\">\n    <font face=\"����\">\n	<b>Small RNA Analysis Report</b>\n  </font>\n  </h1>
-  <h2>1. Sequence No. and quality</h2>
-  <h3>1.1 Sequece No.</h3>
-";
-
-### raw data no
-open IN,"<$config";
-my @files;my @marks; my @rawNo;
-while (my $aline=<IN>) {
-	chomp $aline;
-	my @tmp=split/\t/,$aline;
-	push @files,$tmp[0];
-
-	my $no=`less $tmp[0] |wc -l `;
-	chomp $no;
-	if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") {
-		$no=$no/4;
-	}
-	else{
-		$no=$no/2;
-	}
-	push @rawNo,$no;
-
-	push @marks,$tmp[1];
-}
-close IN;
-
-### preprocess
-unless ($prepath=~/\/$/) {
-	$prepath .="/";
-}
-
-my @trimNo;my @collapse;
-my $collapsefile=$prepath."collapse_reads.fa";
-open IN,"<$collapsefile";
-while (my $aline=<IN>) {
-	chomp $aline;
-	<IN>;
-	$aline=~/:([\d|_]+)_x(\d+)$/;
-	my @lng=split/_/,$1;
-	for (my $i=0;$i<@lng;$i++) {
-		if ($lng[$i]>0) {
-			$trimNo[$i] +=$lng[$i];
-			$collapse[$i] ++;
-		}
-	}
-}
-close IN;
-
-my @cleanR;my @cleanT;
-my $clean=$prepath."collapse_reads_19_28.fa";
-open IN,"<$clean";
-while (my $aline=<IN>) {
-	chomp $aline;
-	<IN>;
-	$aline=~/:([\d|_]+)_x(\d+)$/;
-	my @lng=split/_/,$1;
-	for (my $i=0;$i<@lng;$i++) {
-		if ($lng[$i]>0) {
-			$cleanR[$i] +=$lng[$i];
-			$cleanT[$i] ++;
-		}
-	}
-}
-close IN;
-
-print OUT "<table border=\"1\">
-<tr align=\"center\">
-<th>&nbsp;</th>
-";
-foreach  (@marks) {
-	print OUT "<th> $_ </th>\n";
-}
-print  OUT "</tr>
-<tr align=\"center\">
-<th align=\"left\">Raw Reads No. </th>
-";
-foreach  (@rawNo) {
-	print OUT "<td> $_ </td>\n";
-}
-print OUT "</tr>
-<tr align=\"center\">
-<th align=\"left\">Reads No. After Trimed 3\' adapter </th>
-";
-foreach  (@trimNo) {
-	print OUT "<td> $_ </td>\n";
-}
-print OUT "</tr>
-<tr align=\"center\">
-<th align=\"left\">Unique Tags No. </th>
-";
-foreach  (@collapse) {
-	print OUT "<td> $_ </td>\n";
-}
-print OUT  "</tr>
-<tr align=\"center\">
-<th align=\"left\">Clean Reads No. </th>
-";
-foreach  (@cleanR) {
-	print OUT "<td> $_ </td>\n";
-}
-print OUT "</tr>
-<tr align=\"center\">
-<th align=\"left\">Clean Tags No. </th>
-";
-foreach  (@cleanT) {
-	print OUT "<td> $_ </td>\n";
-}
-print OUT "</tr>\n</table>";
-print OUT "<p>
-Note:<br />
-The raw data file path is: <b>$files[0]</b><br />
-";
-for (my $i=1;$i<@files;$i++) {
-	print OUT "&nbsp;&nbsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;<b>$files[$i]</b><br />";
-}
-print OUT "The collapsed file path is: <b>$collapsefile</b><br />
-The clean data file path is: <b>$clean</b><br />
-</p>
-<h2> 1. Sequence length count</h2>
-";
-print OUT "\n";
-
-my $length=$prepath."length.html";
-open IN,"<$length";
-while (my $aline=<IN>) {
-	chomp $aline;
-	print OUT "$aline\n";
-}
-
-print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a>
-</p>
-";
-
-#### rfam
-unless ($rfampath=~/\/$/) {
-	$rfampath .="/";
-}
-print OUT "<h2>2. Rfam non-miRNA annotation</h2>
-<h3>2.1 Reads count</h3>
-<table border=\"1\">
-<tr align=\"center\">
-";
-
-my @rfamR; my @rfamT;
-my $tag=1;
-open IN,"<$dir/rfam_non-miRNA_annotation.txt";
-while (my $aline=<IN>) {
-	chomp $aline;
-	$tag=0 if($aline=~/tags\s+number/);
-	next if($aline=~/^\#/);
-	next if($aline=~/^\s*$/);
-	my @tmp=split/\s+/,$aline;
-	if($tag == 1){push @rfamR,[@tmp];}
-	else{push @rfamT,[@tmp];}
-}
-close IN;
-
-
-print OUT "<th>RNA Name</th>\n";
-foreach  (@marks) {
-	print OUT "<th> $_ </th>\n";
-}
-for (my $i=0;$i<@rfamR;$i++) {
-	print  OUT "</tr>
-	<tr align=\"center\">
-	<th align=\"left\">$rfamR[$i][0]</th>
-	";
-	for (my $j=1;$j<@{$rfamR[$i]} ;$j++) {
-	print OUT "<td> $rfamR[$i][$j]</td>\n";
-	}
-}
-
-print OUT "</tr>\n</table>
-  <h3>2.2 Tags count</h3>
-  <table border=\"1\">
-   <tr align=\"center\">
-   <th>RNA Name</th>\n";
-foreach  (@marks) {
-	print OUT "<th> $_ </th>\n";
-}
-for (my $i=0;$i<@rfamT;$i++) {
-	print  OUT "</tr>
-	<tr align=\"center\">
-	<th align=\"left\">$rfamT[$i][0]</th>
-	";
-	for (my $j=1;$j<@{$rfamT[$i]} ;$j++) {
-	print OUT "<td> $rfamT[$i][$j]</td>\n";
-	}
-}
-print OUT "</tr>\n</table>
-<p>Note:<br />The rfam mapping results is: <b>$rfampath</b>";
-print OUT "<b>rfam_mapped.bwt</b></p>
-  <h2>3. MicroRNA result</h2>
-  <h3>3.1 known microRNA</h3>
-  <p>The known microRNA express list: <a href=\"./known_microRNA_express.txt\"> known_microRNA_express.txt</a><br/>
-  The known microRNA alngment file: <a href=\"./known_microRNA_express.aln\"> known_microRNA_express.aln</a><br/>
-  The known moRs file: <a href=\"./known_microRNA_express.moRs\"> known_microRNA_express.moRs</a><br/>
-  The known microRNA mature sequence file: <a href=\"./known_microRNA_mature.fa\"> known_microRNA_mature.fa</a><br/>
-  The knowm microRNA precursor sequence file: <a href=\"./known_microRNA_precursor.fa\"> known_microRNA_precursor.fa</a>
-  </p>
-
-  <h3>3.2 novel microRNA</h3>
-  <p>The novel microRNA prediction file:<a href=\"./microRNA_prediction.mrd\"> microRNA_prediction.mrd</a><br/>
-  The novel microRNA express list: <a href=\"./novel_microRNA_express.txt\"> novel_microRNA_express.txt</a><br/>
-  The novel microRNA mature sequence file: <a href=\"./novel_microRNA_mature.fa\"> novel_microRNA_mature.fa</a><br/>
-  The novel microRNA precursor sequence file: <a href=\"./novel_microRNA_precursor.fa\"> novel_microRNA_precursor.fa</a>
-  </p>
-";
-
-
-
-print OUT "
- </BODY>
-</HTML>
-";
-close OUT;
-
-sub usage{
-print <<"USAGE";
-Version $version
-Usage:
-$0 -o
-options:
--o output file
--h help
-USAGE
-exit(1);
-}
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/html_preprocess.pl	Wed Dec 03 01:54:29 2014 -0500
@@ -0,0 +1,388 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-5-29
+#Modified:
+#Description:
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","format=s","min=i","max=i","o=s","h");
+if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath);
+my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir);
+open IN,"<$opts{i}";
+$config=<IN>; chomp $config;
+$prepath=<IN>; chomp $prepath;
+$genomepath=<IN>; chomp $genomepath;
+$rfampath=<IN>;
+close IN;
+my @tmp=split/\//,$prepath;
+$predir=$tmp[-1];
+@tmp=split/\//,$genomepath;
+$genomedir=$tmp[-1];
+
+my $dir=dirname($opts{'o'});
+
+open OUT ,">$opts{'o'}";
+print OUT "<HTML>\n  <HEAD>\n  <TITLE> Analysis Report </TITLE>\n </HEAD>
+ <BODY bgcolor=\"lightgray\">\n  <h1 align=\"center\">\n    <font face=\"����\">\n	<b>Preprocess Report</b>\n  </font>\n  </h1>
+  <h2>1. Sequence No. and quality</h2>
+  <h3>1.1 Sequece No.</h3>
+";
+
+### raw data no
+open IN,"<$config";
+my @files;my @marks; my @rawNo;
+while (my $aline=<IN>) {
+	chomp $aline;
+	my @tmp=split/\t/,$aline;
+	push @files,$tmp[0];
+
+	my $no=`less $tmp[0] |wc -l `;
+	chomp $no;
+	if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") {
+		$no=$no/4;
+	}
+	else{
+		$no=$no/2;
+	}
+	push @rawNo,$no;
+
+	push @marks,$tmp[1];
+}
+close IN;
+
+### preprocess
+unless ($prepath=~/\/$/) {
+	$prepath .="/";
+}
+
+my @trimNo;my @collapse;
+my $collapsefile=$prepath."collapse_reads.fa";
+open IN,"<$collapsefile";
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @lng=split/_/,$1;
+	for (my $i=0;$i<@lng;$i++) {
+		if ($lng[$i]>0) {
+			$trimNo[$i] +=$lng[$i];
+			$collapse[$i] ++;
+		}
+	}
+}
+close IN;
+
+my @cleanR;my @cleanT;
+my $clean=$prepath."collapse_reads_$opts{min}_$opts{max}.fa";
+open IN,"<$clean";
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @lng=split/_/,$1;
+	for (my $i=0;$i<@lng;$i++) {
+		if ($lng[$i]>0) {
+			$cleanR[$i] +=$lng[$i];
+			$cleanT[$i] ++;
+		}
+	}
+}
+close IN;
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">
+<th>&nbsp;</th>
+";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Raw Reads No. </th>
+";
+foreach  (@rawNo) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Reads No. After Trimed 3\' adapter </th>
+";
+foreach  (@trimNo) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Unique Tags No. </th>
+";
+foreach  (@collapse) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT  "</tr>
+<tr align=\"center\">
+<th align=\"left\">Clean Reads No. </th>
+";
+foreach  (@cleanR) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Clean Tags No. </th>
+";
+foreach  (@cleanT) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>\n</table>";
+print OUT "<p>
+Note:<br />
+The raw data file path is: <b>$files[0]</b><br />
+";
+for (my $i=1;$i<@files;$i++) {
+	print OUT "&nbsp;&nbsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;<b>$files[$i]</b><br />";
+}
+print OUT "The collapsed file path is: <b>$collapsefile</b><br />
+The clean data file path is: <b>$clean</b><br />
+</p>
+<h2> 1. Sequence length count</h2>
+<h3> 1.1 Reads length count </h3>
+";
+print OUT "\n";
+
+my (%length); my $key="Tags Length";
+open IN,"<$prepath/reads_length_distribution.txt";
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\s*$/);
+	if ($aline=~/^Reads/) { $key="Reads Length";}
+	my @tmp=split/\t/,$aline;
+	my @array=split/\s/,$tmp[1];
+	push @{$length{$key}},[$tmp[0],@array];
+}
+close IN;
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">";
+my $hashkey="Reads Length";
+foreach  (@{$length{$hashkey}[0]}) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>";
+
+for (my $i=1;$i<@{$length{$hashkey}};$i++) {
+	print OUT "<tr align=\"center\">
+	<th >$length{$hashkey}[$i][0] </th>
+	";
+	for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
+		print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
+	}
+	print OUT "</tr>\n";
+}
+print OUT "</table>\n";
+
+print OUT "<h3> 1.2 Tags length count </h3>";
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">";
+$hashkey="Tags Length";
+foreach  (@{$length{$hashkey}[0]}) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>";
+
+for (my $i=1;$i<@{$length{$hashkey}};$i++) {
+	print OUT "<tr align=\"center\">
+	<th > $length{$hashkey}[$i][0] </th>
+	";
+	for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
+		print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
+	}
+	print OUT "</tr>\n";
+}
+
+print OUT "</table>\n";
+
+print OUT "<h2> 2. Sequence length distribution </h2>";
+my $length=$prepath."length.html";
+open IN,"<$length";
+while (my $aline=<IN>) {
+	chomp $aline;
+	print OUT "$aline\n";
+}
+
+#print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a>
+#</p>
+#";
+
+
+
+
+####genome map
+#unless ($genomedir=~/\/$/) {
+#	$genomedir .="/";
+#}
+
+print OUT "<h2>2. Genome Alignment Result</h2>
+<h3>2.1 Mapping count</h3>
+";
+
+open IN,"<$genomepath/genome_mapped.fa";
+my (@gread,@gtag);
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @sss=split/_/,$1;
+	for (my $i=0;$i<@sss;$i++) {
+		if ($sss[$i]>0) {
+			$gread[$i] +=$sss[$i];
+			$gtag[$i] ++;
+		}
+	}
+}
+close IN;
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">
+<th>&nbsp;</th>
+";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Genome Mapped Reads No. </th>
+";
+foreach  (@gread) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Genome Mapped Reads Percent </th>
+";
+
+for (my $i=0;$i<@gread;$i++) {
+	my $per=sprintf ("%.2f",$gread[$i]/$cleanR[$i]*100);
+	print OUT "<td> $per\%</td>\n";
+}
+
+print  OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Genome Mapped Tags No. </th>
+";
+foreach  (@gtag) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Genome Mapped Tags Percent </th>
+";
+
+for (my $i=0;$i<@gtag;$i++) {
+	my $per=sprintf ("%.2f",$gtag[$i]/$cleanT[$i]*100);
+	print OUT "<td> $per\%</td>\n";
+}
+print OUT "</tr>\n</table>";
+print OUT "<p>
+Note:<br />
+The genome mapped bwt file path is: <b>$genomedir/genome_mapped.bwt</b><br />
+The genome mapped FASTA file path is: <b>$genomedir/genome_mapped.fa</b>
+<br />
+";
+
+
+
+#### rfam
+if(defined $rfampath && $rfampath=~/rfam_match/){
+chomp $rfampath;
+@tmp=split/\//,$rfampath;
+$rfamdir=$tmp[-1];
+
+unless ($rfampath=~/\/$/) {
+	$rfampath .="/";
+}
+print OUT "<h2>3. Rfam non-miRNA annotation</h2>
+<h3>3.1 Reads count</h3>
+<table border=\"1\">
+<tr align=\"center\">
+";
+
+my @rfamR; my @rfamT;
+my $tag=1;
+open IN,"<$dir/rfam_non-miRNA_annotation.txt";
+while (my $aline=<IN>) {
+	chomp $aline;
+	$tag=0 if($aline=~/tags\s+number/);
+	next if($aline=~/^\#/);
+	next if($aline=~/^\s*$/);
+	my @tmp=split/\s+/,$aline;
+	if($tag == 1){push @rfamR,[@tmp];}
+	else{push @rfamT,[@tmp];}
+}
+close IN;
+
+
+print OUT "<th>RNA Name</th>\n";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@rfamR;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$rfamR[$i][0]</th>
+	";
+	for (my $j=1;$j<@{$rfamR[$i]} ;$j++) {
+	print OUT "<td> $rfamR[$i][$j]</td>\n";
+	}
+}
+
+print OUT "</tr>\n</table>
+  <h3>3.2 Tags count</h3>
+  <table border=\"1\">
+   <tr align=\"center\">
+   <th>RNA Name</th>\n";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@rfamT;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$rfamT[$i][0]</th>
+	";
+	for (my $j=1;$j<@{$rfamT[$i]} ;$j++) {
+	print OUT "<td> $rfamT[$i][$j]</td>\n";
+	}
+}
+print OUT "</tr>\n</table>
+<p>Note:<br />The rfam mapping results is: <b>$rfampath</b>";
+print OUT "<b>rfam_mapped.bwt</b></p>
+";
+}
+
+
+print OUT "
+ </BODY>
+</HTML>
+";
+close OUT;
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -o
+options:
+-o output file
+-h help
+USAGE
+exit(1);
+}
+
--- a/matching.pl	Thu Nov 13 22:43:35 2014 -0500
+++ b/matching.pl	Wed Dec 03 01:54:29 2014 -0500
@@ -11,7 +11,7 @@
 use Getopt::Long;

 my %opts;
-GetOptions(\%opts,"i=s","g=s","index:s","v:i","p:i","r:s","o=s","time:s","h");
+GetOptions(\%opts,"i=s","g=s","index:s","v:i","p:i","r:s","o=s","h");
 if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
 &usage;
 }
@@ -27,11 +27,7 @@


 my $time=&Time();
-if (defined $opts{'time'}) {
-	$time=$opts{'time'};
-}
-
-my $mapdir=$fileout."/genome_match_".$time;
+my $mapdir=$fileout."/genome_match";
 if(not -d $mapdir){
 	mkdir $mapdir;
 }
--- a/miRDeep_plant.pl	Thu Nov 13 22:43:35 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1622 +0,0 @@
-#!/usr/bin/perl
-
-use warnings;
-use strict;
-use Getopt::Std;
-#use RNA;
-
-
-################################# MIRDEEP #################################################
-
-################################## USAGE ##################################################
-
-
-my $usage=
-"$0 file_signature file_structure temp_out_directory
-
-This is the core algorithm of miRDeep. It takes as input a file in blastparsed format with
-information on the positions of reads aligned to potential precursor sequences (signature).
-It also takes as input an RNAfold output file, giving information on the sequence, structure
-and mimimum free energy of the potential precursor sequences.
-
-Extra arguments can be given. -s specifies a fastafile containing the known mature miRNA
-sequences that should be considered for conservation purposes. -t prints out the potential
-precursor sequences that do _not_ exceed the cut-off (default prints out the sequences that
-exceeds the cut-off). -u gives limited output, that is only the ids of the potential precursors
-that exceed the cut-off. -v varies the cut-off. -x is a sensitive option for Sanger sequences
-obtained through conventional cloning. -z consider the number of base pairings in the lower
-stems (this option is not well tested).
-
--h print this usage
--s fasta file with known miRNAs
-#-o temp directory ,maked befor running the program.
--t print filtered
--u limited output (only ids)
--v cut-off (default 1)
--x sensitive option for Sanger sequences
--y use Randfold
--z consider Drosha processing
-";
-
-
-
-
-
-############################################################################################
-
-################################### INPUT ##################################################
-
-
-#signature file in blast_parsed format
-my $file_blast_parsed=shift or die $usage;
-
-#structure file outputted from RNAfold
-my $file_struct=shift or die $usage;
-
-my $tmpdir=shift or die $usage;
-#options
-my %options=();
-getopts("hs:tuv:xyz",\%options);
-
-
-
-
-
-
-#############################################################################################
-
-############################# GLOBAL VARIABLES ##############################################
-
-
-#parameters
-my $nucleus_lng=11;
-
-my $score_star=3.9;
-my $score_star_not=-1.3;
-my $score_nucleus=7.63;
-my $score_nucleus_not=-1.17;
-my $score_randfold=1.37;
-my $score_randfold_not=-3.624;
-my $score_intercept=0.3;
-my @scores_stem=(-3.1,-2.3,-2.2,-1.6,-1.5,0.1,0.6,0.8,0.9,0.9,0);
-my $score_min=1;
-if($options{v}){$score_min=$options{v};}
-if($options{x}){$score_min=-5;}
-
-my $e=2.718281828;
-
-#hashes
-my %hash_desc;
-my %hash_seq;
-my %hash_struct;
-my %hash_mfe;
-my %hash_nuclei;
-my %hash_mirs;
-my %hash_query;
-my %hash_comp;
-my %hash_bp;
-
-#other variables
-my $subject_old;
-my $message_filter;
-my $message_score;
-my $lines;
-my $out_of_bound;
-
-
-
-##############################################################################################
-
-################################  MAIN  ######################################################
-
-
-#print help if that option is used
-if($options{h}){die $usage;}
-unless ($tmpdir=~/\/$/) {$tmpdir .="/";}
-if(!(-s $tmpdir)){mkdir $tmpdir;}
-$tmpdir .="TMP_DIR/";
-mkdir $tmpdir;
-
-#parse structure file outputted from RNAfold
-parse_file_struct($file_struct);
-
-#if conservation is scored, the fasta file of known miRNA sequences is parsed
-if($options{s}){create_hash_nuclei($options{s})};
-
-#parse signature file in blast_parsed format and resolve each potential precursor
-parse_file_blast_parsed($file_blast_parsed);
-#`rm -rf $tmpdir`;
-exit;
-
-
-
-
-##############################################################################################
-
-############################## SUBROUTINES ###################################################
-
-
-
-sub parse_file_blast_parsed{
-
-#    read through the signature blastparsed file, fills up a hash with information on queries
-#    (deep sequences) mapping to the current subject (potential precursor) and resolve each
-#    potential precursor in turn
-
-    my $file_blast_parsed=shift;
-
-    open (FILE_BLAST_PARSED, "<$file_blast_parsed") or die "can not open $file_blast_parsed\n";
-    while (my $line=<FILE_BLAST_PARSED>){
-		if($line=~/^(\S+)\s+(\S+)\s+(\d+)\.+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\.+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(.+)$/){
-			my $query=$1;
-			my $query_lng=$2;
-			my $query_beg=$3;
-			my $query_end=$4;
-			my $subject=$5;
-			my $subject_lng=$6;
-			my $subject_beg=$7;
-			my $subject_end=$8;
-			my $e_value=$9;
-			my $pid=$10;
-			my $bitscore=$11;
-			my $other=$12;
-
-			#if the new line concerns a new subject (potential precursor) then the old subject must be resolved
-			if($subject_old and $subject_old ne $subject){
-				resolve_potential_precursor();
-			}
-
-			#resolve the strand
-			my $strand=find_strand($other);
-
-			#resolve the number of reads that the deep sequence represents
-			my $freq=find_freq($query);
-
-			#read information of the query (deep sequence) into hash
-			$hash_query{$query}{"subject_beg"}=$subject_beg;
-			$hash_query{$query}{"subject_end"}=$subject_end;
-			$hash_query{$query}{"strand"}=$strand;
-			$hash_query{$query}{"freq"}=$freq;
-
-			#save the signature information
-			$lines.=$line;
-
-			$subject_old=$subject;
-		}
-    }
-    resolve_potential_precursor();
-}
-
-sub resolve_potential_precursor{
-
-#    dissects the potential precursor in parts by filling hashes, and tests if it passes the
-#    initial filter and the scoring filter
-
-#    binary variable whether the potential precursor is still viable
-    my $ret=1;
-#print STDERR ">$subject_old\n";
-
-    fill_structure();
-#print STDERR "\%hash_bp",scalar keys %hash_bp,"\n";
-    fill_pri();
-#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
-
-    fill_mature();
-#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
-
-    fill_star();
-#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
-
-    fill_loop();
-#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
-
-    fill_lower_flanks();
-#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
-
-#    do_test_assemble();
-
-#    this is the actual classification
-    unless(pass_filtering_initial() and pass_threshold_score()){$ret=0;}
-
-    print_results($ret);
-
-    reset_variables();
-
-    return;
-
-}
-
-
-
-sub print_results{
-
-    my $ret=shift;
-
-#    print out if the precursor is accepted and accepted precursors should be printed out
-#    or if the potential precursor is discarded and discarded potential precursors should
-#    be printed out
-
-    if((!$options{t} and $ret) or ($options{t} and !$ret)){
-	#full output
-		unless($options{u}){
-			if($message_filter){print $message_filter;}
-			if($message_score){print $message_score;}
-			print_hash_comp();
-			print $lines,"\n\n";
-			return;
-		}
-		#limited output (only ids)
-		my $id=$hash_comp{"pri_id"};
-		print "$id\n";
-    }
-}
-
-
-
-
-
-
-
-sub pass_threshold_score{
-
-#    this is the scoring
-
-    #minimum free energy of the potential precursor
-#    my $score_mfe=score_mfe($hash_comp{"pri_mfe"});
-    my $score_mfe=score_mfe($hash_comp{"pri_mfe"},$hash_comp{"pri_end"});
-
-    #count of reads that map in accordance with Dicer processing
-    my $score_freq=score_freq($hash_comp{"freq"});
-#print STDERR "score_mfe: $score_mfe\nscore_freq: $score_freq\n";
-
-    #basic score
-    my $score=$score_mfe+$score_freq;
-
-    #scoring of conserved nucleus/seed (optional)
-    if($options{s}){
-
-	#if the nucleus is conserved
-	if(test_nucleus_conservation()){
-
-	    #nucleus from position 2-8
-	    my $nucleus=substr($hash_comp{"mature_seq"},1,$nucleus_lng);
-
-	    #resolve DNA/RNA ambiguities
-	    $nucleus=~tr/[T]/[U]/;
-
-	    #print score contribution
-	    score_s("score_nucleus\t$score_nucleus");
-
-	    #print the ids of known miRNAs with same nucleus
-	    score_s("$hash_mirs{$nucleus}");
-#print STDERR "score_nucleus\t$score_nucleus\n";
-
-	    #add to score
-	    $score+=$score_nucleus;
-
-	#if the nucleus is not conserved
-	}else{
-	    #print (negative) score contribution
-	    score_s("score_nucleus\t$score_nucleus_not");
-
-	    #add (negative) score contribution
-	    $score+=$score_nucleus_not;
-	}
-    }
-
-    #if the majority of potential star reads fall as expected from Dicer processing
-    if($hash_comp{"star_read"}){
-	score_s("score_star\t$score_star");
-#print STDERR "score_star\t$score_star\n";
-	$score+=$score_star;
-    }else{
-	score_s("score_star\t$score_star_not");
-#print STDERR "score_star_not\t$score_star_not\n";
-	$score+=$score_star_not;
-    }
-
-    #score lower stems for potential for Drosha recognition (highly optional)
-    if($options{z}){
-	my $stem_bp=$hash_comp{"stem_bp"};
-	my $score_stem=$scores_stem[$stem_bp];
-	$score+=$score_stem;
-	score_s("score_stem\t$score_stem");
-    }
-
-#print STDERR "score_intercept\t$score_intercept\n";
-
-    $score+=$score_intercept;
-
-    #score for randfold (optional)
-    if($options{y}){
-
-#	only calculate randfold value if it can make the difference between the potential precursor
-#	being accepted or discarded
-	if($score+$score_randfold>=$score_min and $score+$score_randfold_not<=$score_min){
-
-	    #randfold value<0.05
-	    if(test_randfold()){$score+=$score_randfold;score_s("score_randfold\t$score_randfold");}
-
-	    #randfold value>0.05
-	    else{$score+=$score_randfold_not;score_s("score_randfold\t$score_randfold_not");}
-	}
-    }
-
-    #round off values to one decimal
-    my $round_mfe=round($score_mfe*10)/10;
-    my $round_freq=round($score_freq*10)/10;
-    my $round=round($score*10)/10;
-
-    #print scores
-    score_s("score_mfe\t$round_mfe\nscore_freq\t$round_freq\nscore\t$round");
-
-    #return 1 if the potential precursor is accepted, return 0 if discarded
-    unless($score>=$score_min){return 0;}
-    return 1;
-}
-
-sub test_randfold{
-
-    #print sequence to temporary file, test randfold value, return 1 or 0
-
-#    print_file("pri_seq.fa",">pri_seq\n".$hash_comp{"pri_seq"});
-	#my $tmpfile=$tmpdir.$hash_comp{"pri_id"};
-	#open(FILE, ">$tmpfile");
-	#print FILE ">pri_seq\n",$hash_comp{"pri_seq"};
-	#close FILE;
-
-#	my $p_value=`randfold -s $tmpfile 999 | cut -f 3`;
-	#my $p1=`randfold -s $tmpfile 999 | cut -f 3`;
-	#my $p2=`randfold -s $tmpfile 999 | cut -f 3`;
-	my $p1=&randfold_pvalue($hash_comp{"pri_seq"},999);
-	my $p2=&randfold_pvalue($hash_comp{"pri_seq"},999);
-	my $p_value=($p1+$p2)/2;
-	wait;
-#    system "rm $tmpfile";
-
-    if($p_value<=0.05){return 1;}
-
-    return 0;
-}
-
-sub randfold_pvalue{
-	my $cpt_sup = 0;
-	my $cpt_inf = 0;
-	my $cpt_ega = 1;
-
-	my ($seq,$number_of_randomizations)=@_;
-	#my $str =$seq;
-	#my $mfe = RNA::fold($seq,$str);
-		my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
-		my @rawfolds=split/\s+/,$rnafold;
-		my $str=$rawfolds[1];
-		my $mfe=$rawfolds[-1];
-		$mfe=~s/\(//;
-		$mfe=~s/\)//;
-
-	for (my $i=0;$i<$number_of_randomizations;$i++) {
-		$seq = shuffle_sequence_dinucleotide($seq);
-		#$str = $seq;
-
-		#my $rand_mfe = RNA::fold($str,$str);
-		$rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
-		my @rawfolds=split/\s+/,$rnafold;
-		my $str=$rawfolds[1];
-		my $rand_mfe=$rawfolds[-1];
-		$rand_mfe=~s/\(//;
-		$rand_mfe=~s/\)//;
-
-		if ($rand_mfe < $mfe) {
-			$cpt_inf++;
-		}
-		if ($rand_mfe == $mfe) {
-			$cpt_ega++;
-		}
-		if ($rand_mfe > $mfe) {
-			$cpt_sup++;
-		}
-	}
-
-	my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1);
-
-	#print "$name\t$mfe\t$proba\n";
-	return $proba;
-}
-
-sub shuffle_sequence_dinucleotide {
-
-	my ($str) = @_;
-
-	# upper case and convert to ATGC
-	$str = uc($str);
-	$str =~ s/U/T/g;
-
-	my @nuc = ('A','T','G','C');
-	my $count_swap = 0;
-	# set maximum number of permutations
-	my $stop = length($str) * 10;
-
-	while($count_swap < $stop) {
-
-		my @pos;
-
-		# look start and end letters
-		my $firstnuc = $nuc[int(rand 4)];
-		my $thirdnuc = $nuc[int(rand 4)];
-
-		# get positions for matching nucleotides
-		for (my $i=0;$i<(length($str)-2);$i++) {
-			if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) {
-				push (@pos,($i+1));
-				$i++;
-			}
-		}
-
-		# swap at random trinucleotides
-		my $max = scalar(@pos);
-		for (my $i=0;$i<$max;$i++) {
-			my $swap = int(rand($max));
-			if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) {
-				$count_swap++;
-				my $w1 = substr($str,$pos[$i],1);
-				my $w2 = substr($str,$pos[$swap],1);
-				substr($str,$pos[$i],1,$w2);
-				substr($str,$pos[$swap],1,$w1);
-			}
-		}
-	}
-	return($str);
-}
-
-sub test_nucleus_conservation{
-
-    #test if nucleus is identical to nucleus from known miRNA, return 1 or 0
-
-    my $nucleus=substr($hash_comp{"mature_seq"},1,$nucleus_lng);
-    $nucleus=~tr/[T]/[U]/;
-    if($hash_nuclei{$nucleus}){return 1;}
-
-    return 0;
-}
-
-
-
-sub pass_filtering_initial{
-
-    #test if the structure forms a plausible hairpin
-    unless(pass_filtering_structure()){filter_p("structure problem"); return 0;}
-
-    #test if >90% of reads map to the hairpin in consistence with Dicer processing
-    unless(pass_filtering_signature()){filter_p("signature problem");return 0;}
-
-    return 1;
-
-}
-
-
-sub pass_filtering_signature{
-
-    #number of reads that map in consistence with Dicer processing
-    my $consistent=0;
-
-    #number of reads that map inconsistent with Dicer processing
-    my $inconsistent=0;
-
-#   number of potential star reads map in good consistence with Drosha/Dicer processing
-#   (3' overhangs relative to mature product)
-    my $star_perfect=0;
-
-#   number of potential star reads that do not map in good consistence with 3' overhang
-    my $star_fuzzy=0;
-
-
-    #sort queries (deep sequences) by their position on the hairpin
-    my @queries=sort {$hash_query{$a}{"subject_beg"} <=> $hash_query{$b}{"subject_beg"}} keys %hash_query;
-
-    foreach my $query(@queries){
-
-		#number of reads that the deep sequence represents
-		unless(defined($hash_query{$query}{"freq"})){next;}
-		my $query_freq=$hash_query{$query}{"freq"};
-
-		#test which Dicer product (if any) the deep sequence corresponds to
-		my $product=test_query($query);
-
-		#if the deep sequence corresponds to a Dicer product, add to the 'consistent' variable
-		if($product){$consistent+=$query_freq;}
-
-		#if the deep sequence do not correspond to a Dicer product, add to the 'inconsistent' variable
-		else{$inconsistent+=$query_freq;}
-
-		#test a potential star sequence has good 3' overhang
-		if($product eq "star"){
-			if(test_star($query)){$star_perfect+=$query_freq;}
-			else{$star_fuzzy+=$query_freq;}
-		}
-    }
-
-#   if the majority of potential star sequences map in good accordance with 3' overhang
-#    score for the presence of star evidence
-    if($star_perfect>$star_fuzzy){$hash_comp{"star_read"}=1;}
-
-    #total number of reads mapping to the hairpin
-    my $freq=$consistent+$inconsistent;
-    $hash_comp{"freq"}=$freq;
-    unless($freq>0){filter_s("read frequency too low"); return 0;}
-
-    #unless >90% of the reads map in consistence with Dicer processing, the hairpin is discarded
-    my $inconsistent_fraction=$inconsistent/($inconsistent+$consistent);
-    unless($inconsistent_fraction<=0.1){filter_p("inconsistent\t$inconsistent\nconsistent\t$consistent"); return 0;}
-
-    #the hairpin is retained
-    return 1;
-}
-
-sub test_star{
-
-    #test if a deep sequence maps in good consistence with 3' overhang
-
-    my $query=shift;
-
-    #5' begin and 3' end positions
-    my $beg=$hash_query{$query}{"subject_beg"};
-    my $end=$hash_query{$query}{"subject_end"};
-
-    #the difference between observed and expected begin positions must be 0 or 1
-    my $offset=$beg-$hash_comp{"star_beg"};
-    if($offset==0 or $offset==1 or $offset==-1){return 1;}
-
-    return 0;
-}
-
-
-
-sub test_query{
-
-    #test if deep sequence maps in consistence with Dicer processing
-
-    my $query=shift;
-
-    #begin, end, strand and read count
-    my $beg=$hash_query{$query}{"subject_beg"};
-    my $end=$hash_query{$query}{"subject_end"};
-    my $strand=$hash_query{$query}{"strand"};
-    my $freq=$hash_query{$query}{"freq"};
-
-    #should not be on the minus strand (although this has in fact anecdotally been observed for known miRNAs)
-    if($strand eq '-'){return 0;}
-
-    #the deep sequence is allowed to stretch 2 nt beyond the expected 5' end
-    my $fuzz_beg=2;
-    #the deep sequence is allowed to stretch 5 nt beyond the expected 3' end
-    my $fuzz_end=2;
-
-    #if in accordance with Dicer processing, return the type of Dicer product
-    if(contained($beg,$end,$hash_comp{"mature_beg"}-$fuzz_beg,$hash_comp{"mature_end"}+$fuzz_end)){return "mature";}
-    if(contained($beg,$end,$hash_comp{"star_beg"}-$fuzz_beg,$hash_comp{"star_end"}+$fuzz_end)){return "star";}
-    if(contained($beg,$end,$hash_comp{"loop_beg"}-$fuzz_beg,$hash_comp{"loop_end"}+$fuzz_end)){return "loop";}
-
-    #if not in accordance, return 0
-    return 0;
-}
-
-
-sub pass_filtering_structure{
-
-    #The potential precursor must form a hairpin with miRNA precursor-like characteristics
-
-    #return value
-    my $ret=1;
-
-    #potential mature, star, loop and lower flank parts must be identifiable
-    unless(test_components()){return 0;}
-
-    #no bifurcations
-    unless(no_bifurcations_precursor()){$ret=0;}
-
-    #minimum 14 base pairings in duplex
-    unless(bp_duplex()>=15){$ret=0;filter_s("too few pairings in duplex");}
-
-    #not more than 6 nt difference between mature and star length
-    unless(-6<diff_lng() and diff_lng()<6){$ret=0; filter_s("too big difference between mature and star length") }
-
-    return $ret;
-}
-
-
-
-
-
-
-sub test_components{
-
-    #tests whether potential mature, star, loop and lower flank parts are identifiable
-
-    unless($hash_comp{"mature_struct"}){
-	filter_s("no mature");
-#	print STDERR "no mature\n";
-	return 0;
-    }
-
-    unless($hash_comp{"star_struct"}){
-	filter_s("no star");
-#	print STDERR "no star\n";
-	return 0;
-    }
-
-    unless($hash_comp{"loop_struct"}){
-	filter_s("no loop");
-#	print STDERR "no loop\n";
-   	return 0;
-    }
-
-    unless($hash_comp{"flank_first_struct"}){
-	filter_s("no flanks");
-#print STDERR "no flanks_first_struct\n";
-   	return 0;
-    }
-
-    unless($hash_comp{"flank_second_struct"}){
-	filter_s("no flanks");
-#	print STDERR "no flanks_second_struct\n";
-    	return 0;
-    }
-    return 1;
-}
-
-
-
-
-
-sub no_bifurcations_precursor{
-
-    #tests whether there are bifurcations in the hairpin
-
-    #assembles the potential precursor sequence and structure from the expected Dicer products
-    #this is the expected biological precursor, in contrast with 'pri_seq' that includes
-    #some genomic flanks on both sides
-
-    my $pre_struct;
-    my $pre_seq;
-    if($hash_comp{"mature_arm"} eq "first"){
-	$pre_struct.=$hash_comp{"mature_struct"}.$hash_comp{"loop_struct"}.$hash_comp{"star_struct"};
-	$pre_seq.=$hash_comp{"mature_seq"}.$hash_comp{"loop_seq"}.$hash_comp{"star_seq"};
-    }else{
-	$pre_struct.=$hash_comp{"star_struct"}.$hash_comp{"loop_struct"}.$hash_comp{"mature_struct"};
-	$pre_seq.=$hash_comp{"star_seq"}.$hash_comp{"loop_seq"}.$hash_comp{"mature_seq"};
-    }
-
-    #read into hash
-    $hash_comp{"pre_struct"}=$pre_struct;
-    $hash_comp{"pre_seq"}=$pre_seq;
-
-    #simple pattern matching checks for bifurcations
-    unless($pre_struct=~/^((\.|\()+..(\.|\))+)$/){
-	filter_s("bifurcation in precursor");
-#	print STDERR "bifurcation in precursor\n";
-	return 0;
-    }
-
-    return 1;
-}
-
-sub bp_precursor{
-
-    #total number of bps in the precursor
-
-    my $pre_struct=$hash_comp{"pre_struct"};
-
-    #simple pattern matching
-    my $pre_bps=0;
-    while($pre_struct=~/\(/g){
-	$pre_bps++;
-    }
-    return $pre_bps;
-}
-
-
-sub bp_duplex{
-
-    #total number of bps in the duplex
-
-    my $duplex_bps=0;
-    my $mature_struct=$hash_comp{"mature_struct"};
-
-    #simple pattern matching
-    while($mature_struct=~/(\(|\))/g){
-	$duplex_bps++;
-    }
-    return $duplex_bps;
-}
-
-sub diff_lng{
-
-    #find difference between mature and star lengths
-
-    my $mature_lng=length $hash_comp{"mature_struct"};
-    my $star_lng=length $hash_comp{"star_struct"};
-    my $diff_lng=$mature_lng-$star_lng;
-    return $diff_lng;
-}
-
-
-
-sub do_test_assemble{
-
-#    not currently used, tests if the 'pri_struct' as assembled from the parts (Dicer products, lower flanks)
-#    is identical to 'pri_struct' before disassembly into parts
-
-    my $assemble_struct;
-
-    if($hash_comp{"flank_first_struct"} and $hash_comp{"mature_struct"} and $hash_comp{"loop_struct"} and $hash_comp{"star_struct"} and $hash_comp{"flank_second_struct"}){
-	if($hash_comp{"mature_arm"} eq "first"){
-	    $assemble_struct.=$hash_comp{"flank_first_struct"}.$hash_comp{"mature_struct"}.$hash_comp{"loop_struct"}.$hash_comp{"star_struct"}.$hash_comp{"flank_second_struct"};
-	}else{
-	    $assemble_struct.=$hash_comp{"flank_first_struct"}.$hash_comp{"star_struct"}.$hash_comp{"loop_struct"}.$hash_comp{"mature_struct"}.$hash_comp{"flank_second_struct"};
-	}
-	unless($assemble_struct eq $hash_comp{"pri_struct"}){
-	    $hash_comp{"test_assemble"}=$assemble_struct;
-	    print_hash_comp();
-	}
-    }
-    return;
- }
-
-
-
-sub fill_structure{
-
-    #reads the dot bracket structure into the 'bp' hash where each key and value are basepaired
-
-    my $struct=$hash_struct{$subject_old};
-    my $lng=length $struct;
-
-    #local stack for keeping track of basepairings
-    my @bps;
-
-    for(my $pos=1;$pos<=$lng;$pos++){
-		my $struct_pos=excise_struct($struct,$pos,$pos,"+");
-
-		if($struct_pos eq "("){
-			push(@bps,$pos);
-		}
-
-		if($struct_pos eq ")"){
-			my $pos_prev=pop(@bps);
-			$hash_bp{$pos_prev}=$pos;
-			$hash_bp{$pos}=$pos_prev;
-		}
-    }
-    return;
-}
-
-
-
-sub fill_star{
-
-    #fills specifics on the expected star strand into 'comp' hash ('component' hash)
-
-    #if the mature sequence is not plausible, don't look for the star arm
-    my $mature_arm=$hash_comp{"mature_arm"};
-    unless($mature_arm){$hash_comp{"star_arm"}=0; return;}
-
-    #if the star sequence is not plausible, don't fill into the hash
-    my($star_beg,$star_end)=find_star();
-    my $star_arm=arm_star($star_beg,$star_end);
-    unless($star_arm){return;}
-
-    #excise expected star sequence and structure
-    my $star_seq=excise_seq($hash_comp{"pri_seq"},$star_beg,$star_end,"+");
-    my $star_struct=excise_seq($hash_comp{"pri_struct"},$star_beg,$star_end,"+");
-
-    #fill into hash
-    $hash_comp{"star_beg"}=$star_beg;
-    $hash_comp{"star_end"}=$star_end;
-    $hash_comp{"star_seq"}=$star_seq;
-    $hash_comp{"star_struct"}=$star_struct;
-    $hash_comp{"star_arm"}=$star_arm;
-
-    return;
-}
-
-
-sub find_star{
-
-    #uses the 'bp' hash to find the expected star begin and end positions from the mature positions
-
-    #the -2 is for the overhang
-    my $mature_beg=$hash_comp{"mature_beg"};
-    my $mature_end=$hash_comp{"mature_end"}-2;
-    my $mature_lng=$mature_end-$mature_beg+1;
-
-    #in some cases, the last nucleotide of the mature sequence does not form a base pair,
-    #and therefore does not basepair with the first nucleotide of the star sequence.
-    #In this case, the algorithm searches for the last nucleotide of the mature sequence
-    #to form a base pair. The offset is the number of nucleotides searched through.
-    my $offset_star_beg=0;
-    my $offset_beg=0;
-
-    #the offset should not be longer than the length of the mature sequence, then it
-    #means that the mature sequence does not form any base pairs
-    while(!$offset_star_beg and $offset_beg<$mature_lng){
-		if($hash_bp{$mature_end-$offset_beg}){
-			$offset_star_beg=$hash_bp{$mature_end-$offset_beg};
-		}else{
-			$offset_beg++;
-		}
-    }
-    #when defining the beginning of the star sequence, compensate for the offset
-    my $star_beg=$offset_star_beg-$offset_beg;
-
-    #same as above
-    my $offset_star_end=0;
-    my $offset_end=0;
-    while(!$offset_star_end and $offset_end<$mature_lng){
-		if($hash_bp{$mature_beg+$offset_end}){
-			$offset_star_end=$hash_bp{$mature_beg+$offset_end};
-		}else{
-			$offset_end++;
-		}
-    }
-    #the +2 is for the overhang
-    my $star_end=$offset_star_end+$offset_end+2;
-
-    return($star_beg,$star_end);
-}
-
-
-sub fill_pri{
-
-    #fills basic specifics on the precursor into the 'comp' hash
-
-    my $seq=$hash_seq{$subject_old};
-    my $struct=$hash_struct{$subject_old};
-    my $mfe=$hash_mfe{$subject_old};
-    my $length=length $seq;
-
-    $hash_comp{"pri_id"}=$subject_old;
-    $hash_comp{"pri_seq"}=$seq;
-    $hash_comp{"pri_struct"}=$struct;
-    $hash_comp{"pri_mfe"}=$mfe;
-    $hash_comp{"pri_beg"}=1;
-    $hash_comp{"pri_end"}=$length;
-
-    return;
-}
-
-
-sub fill_mature{
-
-    #fills specifics on the mature sequence into the 'comp' hash
-
-    my $mature_query=find_mature_query();
-    my($mature_beg,$mature_end)=find_positions_query($mature_query);
-    my $mature_strand=find_strand_query($mature_query);
-    my $mature_seq=excise_seq($hash_comp{"pri_seq"},$mature_beg,$mature_end,$mature_strand);
-    my $mature_struct=excise_struct($hash_comp{"pri_struct"},$mature_beg,$mature_end,$mature_strand);
-    my $mature_arm=arm_mature($mature_beg,$mature_end,$mature_strand);
-
-    $hash_comp{"mature_query"}=$mature_query;
-    $hash_comp{"mature_beg"}=$mature_beg;
-    $hash_comp{"mature_end"}=$mature_end;
-    $hash_comp{"mature_strand"}=$mature_strand;
-    $hash_comp{"mature_struct"}=$mature_struct;
-    $hash_comp{"mature_seq"}=$mature_seq;
-    $hash_comp{"mature_arm"}=$mature_arm;
-
-    return;
-}
-
-
-
-sub fill_loop{
-
-    #fills specifics on the loop sequence into the 'comp' hash
-
-    #unless both mature and star sequences are plausible, do not look for the loop
-    unless($hash_comp{"mature_arm"} and $hash_comp{"star_arm"}){return;}
-
-    my $loop_beg;
-    my $loop_end;
-
-    #defining the begin and end positions of the loop from the mature and star positions
-    #excision depends on whether the mature or star sequence is 5' of the loop ('first')
-    if($hash_comp{"mature_arm"} eq "first"){
-	$loop_beg=$hash_comp{"mature_end"}+1;
-    }else{
-	$loop_end=$hash_comp{"mature_beg"}-1;
-    }
-
-    if($hash_comp{"star_arm"} eq "first"){
-	$loop_beg=$hash_comp{"star_end"}+1;
-    }else{
-	$loop_end=$hash_comp{"star_beg"}-1;
-    }
-
-    #unless the positions are plausible, do not fill into hash
-    unless(test_loop($loop_beg,$loop_end)){return;}
-
-    my $loop_seq=excise_seq($hash_comp{"pri_seq"},$loop_beg,$loop_end,"+");
-    my $loop_struct=excise_struct($hash_comp{"pri_struct"},$loop_beg,$loop_end,"+");
-
-    $hash_comp{"loop_beg"}=$loop_beg;
-    $hash_comp{"loop_end"}=$loop_end;
-    $hash_comp{"loop_seq"}=$loop_seq;
-    $hash_comp{"loop_struct"}=$loop_struct;
-
-    return;
-}
-
-
-sub fill_lower_flanks{
-
-    #fills specifics on the lower flanks and unpaired strands into the 'comp' hash
-
-    #unless both mature and star sequences are plausible, do not look for the flanks
-    unless($hash_comp{"mature_arm"} and $hash_comp{"star_arm"}){return;}
-
-    my $flank_first_end;
-    my $flank_second_beg;
-
-    #defining the begin and end positions of the flanks from the mature and star positions
-    #excision depends on whether the mature or star sequence is 5' in the potenitial precursor ('first')
-    if($hash_comp{"mature_arm"} eq "first"){
-	$flank_first_end=$hash_comp{"mature_beg"}-1;
-    }else{
-	$flank_second_beg=$hash_comp{"mature_end"}+1;
-    }
-
-    if($hash_comp{"star_arm"} eq "first"){
-	$flank_first_end=$hash_comp{"star_beg"}-1;
-    }else{
-	$flank_second_beg=$hash_comp{"star_end"}+1;
-    }
-
-    #unless the positions are plausible, do not fill into hash
-    unless(test_flanks($flank_first_end,$flank_second_beg)){return;}
-
-    $hash_comp{"flank_first_end"}=$flank_first_end;
-    $hash_comp{"flank_second_beg"}=$flank_second_beg;
-    $hash_comp{"flank_first_seq"}=excise_seq($hash_comp{"pri_seq"},$hash_comp{"pri_beg"},$hash_comp{"flank_first_end"},"+");
-    $hash_comp{"flank_second_seq"}=excise_seq($hash_comp{"pri_seq"},$hash_comp{"flank_second_beg"},$hash_comp{"pri_end"},"+");
-    $hash_comp{"flank_first_struct"}=excise_struct($hash_comp{"pri_struct"},$hash_comp{"pri_beg"},$hash_comp{"flank_first_end"},"+");
-    $hash_comp{"flank_second_struct"}=excise_struct($hash_comp{"pri_struct"},$hash_comp{"flank_second_beg"},$hash_comp{"pri_end"},"+");
-
-    if($options{z}){
-	fill_stems_drosha();
-    }
-
-    return;
-}
-
-
-sub fill_stems_drosha{
-
-    #scores the number of base pairings formed by the first ten nt of the lower stems
-    #in general, the more stems, the higher the score contribution
-    #warning: this options has not been thoroughly tested
-
-    my $flank_first_struct=$hash_comp{"flank_first_struct"};
-    my $flank_second_struct=$hash_comp{"flank_second_struct"};
-
-    my $stem_first=substr($flank_first_struct,-10);
-    my $stem_second=substr($flank_second_struct,0,10);
-
-    my $stem_bp_first=0;
-    my $stem_bp_second=0;
-
-    #find base pairings by simple pattern matching
-    while($stem_first=~/\(/g){
-	$stem_bp_first++;
-    }
-
-    while($stem_second=~/\)/g){
-	$stem_bp_second++;
-    }
-
-    my $stem_bp=min2($stem_bp_first,$stem_bp_second);
-
-    $hash_comp{"stem_first"}=$stem_first;
-    $hash_comp{"stem_second"}=$stem_second;
-    $hash_comp{"stem_bp_first"}=$stem_bp_first;
-    $hash_comp{"stem_bp_second"}=$stem_bp_second;
-    $hash_comp{"stem_bp"}=$stem_bp;
-
-    return;
-}
-
-
-
-
-sub arm_mature{
-
-    #tests whether the mature sequence is in the 5' ('first') or 3' ('second') arm of the potential precursor
-
-    my ($beg,$end,$strand)=@_;
-
-    #mature and star sequences should alway be on plus strand
-    if($strand eq "-"){return 0;}
-
-    #there should be no bifurcations and minimum one base pairing
-    my $struct=excise_seq($hash_comp{"pri_struct"},$beg,$end,$strand);
-    if(defined($struct) and $struct=~/^(\(|\.)+$/ and $struct=~/\(/){
-	return "first";
-    }elsif(defined($struct) and $struct=~/^(\)|\.)+$/ and $struct=~/\)/){
-	return "second";
-    }
-    return 0;
-}
-
-
-sub arm_star{
-
-    #tests whether the star sequence is in the 5' ('first') or 3' ('second') arm of the potential precursor
-
-    my ($beg,$end)=@_;
-
-    #unless the begin and end positions are plausible, test negative
-    unless($beg>0 and $beg<=$hash_comp{"pri_end"} and $end>0 and $end<=$hash_comp{"pri_end"} and $beg<=$end){return 0;}
-
-    #no overlap between the mature and the star sequence
-    if($hash_comp{"mature_arm"} eq "first"){
-	($hash_comp{"mature_end"}<$beg) or return 0;
-    }elsif($hash_comp{"mature_arm"} eq "second"){
-	($end<$hash_comp{"mature_beg"}) or return 0;
-    }
-
-    #there should be no bifurcations and minimum one base pairing
-    my $struct=excise_seq($hash_comp{"pri_struct"},$beg,$end,"+");
-    if($struct=~/^(\(|\.)+$/ and $struct=~/\(/){
-	return "first";
-    }elsif($struct=~/^(\)|\.)+$/ and $struct=~/\)/){
-	return "second";
-    }
-    return 0;
-}
-
-
-sub test_loop{
-
-    #tests the loop positions
-
-    my ($beg,$end)=@_;
-
-    #unless the begin and end positions are plausible, test negative
-    unless($beg>0 and $beg<=$hash_comp{"pri_end"} and $end>0 and $end<=$hash_comp{"pri_end"} and $beg<=$end){return 0;}
-
-    return 1;
-}
-
-
-sub test_flanks{
-
-    #tests the positions of the lower flanks
-
-    my ($beg,$end)=@_;
-
-    #unless the begin and end positions are plausible, test negative
-    unless($beg>0 and $beg<=$hash_comp{"pri_end"} and $end>0 and $end<=$hash_comp{"pri_end"} and $beg<=$end){return 0;}
-
-    return 1;
-}
-
-
-sub comp{
-
-    #subroutine to retrive from the 'comp' hash
-
-    my $type=shift;
-    my $component=$hash_comp{$type};
-    return $component;
-}
-
-
-sub find_strand_query{
-
-    #subroutine to find the strand for a given query
-
-    my $query=shift;
-    my $strand=$hash_query{$query}{"strand"};
-    return $strand;
-}
-
-
-sub find_positions_query{
-
-    #subroutine to find the begin and end positions for a given query
-
-    my $query=shift;
-    my $beg=$hash_query{$query}{"subject_beg"};
-    my $end=$hash_query{$query}{"subject_end"};
-    return ($beg,$end);
-}
-
-
-
-sub find_mature_query{
-
-    #finds the query with the highest frequency of reads and returns it
-    #is used to determine the positions of the potential mature sequence
-
-    my @queries=sort {$hash_query{$b}{"freq"} <=> $hash_query{$a}{"freq"}} keys %hash_query;
-    my $mature_query=$queries[0];
-    return $mature_query;
-}
-
-
-
-
-sub reset_variables{
-
-    #resets the hashes for the next potential precursor
-
-#    %hash_query=();
-#    %hash_comp=();
-#    %hash_bp=();
-	foreach my $key (keys %hash_query) {delete($hash_query{$key});}
-	foreach my $key (keys %hash_comp) {delete($hash_comp{$key});}
-	foreach my $key (keys %hash_bp) {delete($hash_bp{$key});}
-
-#    $message_filter=();
-#    $message_score=();
-#    $lines=();
-	undef($message_filter);
-	undef($message_score);
-	undef($lines);
-    return;
-}
-
-
-
-sub excise_seq{
-
-    #excise sub sequence from the potential precursor
-
-    my($seq,$beg,$end,$strand)=@_;
-
-    #begin can be equal to end if only one nucleotide is excised
-    unless($beg<=$end){print STDERR "begin can not be smaller than end for $subject_old\n";exit;}
-
-    #rarely, permuted combinations of signature and structure cause out of bound excision errors.
-    #this happens once appr. every two thousand combinations
-    unless($beg<=length($seq)){$out_of_bound++;return 0;}
-
-    #if on the minus strand, the reverse complement should be excised
-    if($strand eq "-"){$seq=revcom($seq);}
-
-    #the blast parsed format is 1-indexed, substr is 0-indexed
-    my $sub_seq=substr($seq,$beg-1,$end-$beg+1);
-
-    return $sub_seq;
-
-}
-
-sub excise_struct{
-
-    #excise sub structure
-
-    my($struct,$beg,$end,$strand)=@_;
-    my $lng=length $struct;
-
-    #begin can be equal to end if only one nucleotide is excised
-    unless($beg<=$end){print STDERR "begin can not be smaller than end for $subject_old\n";exit;}
-
-    #rarely, permuted combinations of signature and structure cause out of bound excision errors.
-    #this happens once appr. every two thousand combinations
-    unless($beg<=length($struct)){return 0;}
-
-    #if excising relative to minus strand, positions are reversed
-    if($strand eq "-"){($beg,$end)=rev_pos($beg,$end,$lng);}
-
-    #the blast parsed format is 1-indexed, substr is 0-indexed
-    my $sub_struct=substr($struct,$beg-1,$end-$beg+1);
-
-    return $sub_struct;
-}
-
-
-sub create_hash_nuclei{
-    #parses a fasta file with sequences of known miRNAs considered for conservation purposes
-    #reads the nuclei into a hash
-
-    my ($file) = @_;
-    my ($id, $desc, $sequence, $nucleus) = ();
-
-    open (FASTA, "<$file") or die "can not open $file\n";
-    while (<FASTA>)
-    {
-        chomp;
-        if (/^>(\S+)(.*)/)
-	{
-	    $id       = $1;
-	    $desc     = $2;
-	    $sequence = "";
-	    $nucleus  = "";
-	    while (<FASTA>){
-                chomp;
-                if (/^>(\S+)(.*)/){
-		    $nucleus                = substr($sequence,1,$nucleus_lng);
-		    $nucleus                =~ tr/[T]/[U]/;
-		    $hash_mirs{$nucleus}   .="$id\t";
-		    $hash_nuclei{$nucleus} += 1;
-
-		    $id               = $1;
-		    $desc             = $2;
-		    $sequence         = "";
-		    $nucleus          = "";
-		    next;
-                }
-		$sequence .= $_;
-            }
-        }
-    }
-    $nucleus                = substr($sequence,1,$nucleus_lng);
-    $nucleus                =~ tr/[T]/[U]/;
-    $hash_mirs{$nucleus}   .="$id\t";
-    $hash_nuclei{$nucleus} += 1;
-    close FASTA;
-}
-
-
-sub parse_file_struct{
-	#parses the output from RNAfoldand reads it into hashes
-	my($file) = @_;
-	my($id,$desc,$seq,$struct,$mfe) = ();
-	open (FILE_STRUCT, "<$file") or die "can not open $file\n";
-	while (<FILE_STRUCT>){
-		chomp;
-		if (/^>(\S+)\s*(.*)/){
-			$id= $1;
-			$desc= $2;
-			$seq= "";
-			$struct= "";
-			$mfe= "";
-			while (<FILE_STRUCT>){
-				chomp;
-				if (/^>(\S+)\s*(.*)/){
-					$hash_desc{$id}   = $desc;
-					$hash_seq{$id}    = $seq;
-					$hash_struct{$id} = $struct;
-					$hash_mfe{$id}    = $mfe;
-					$id          = $1;
-					$desc        = $2;
-					$seq         = "";
-					$struct      = "";
-					$mfe         = "";
-					next;
-				}
-				if(/^\w/){
-					tr/uU/tT/;
-					$seq .= $_;
-					next;
-				}
-				if(/((\.|\(|\))+)/){$struct .=$1;}
-				if(/\((\s*-\d+\.\d+)\)/){$mfe = $1;}
-			}
-        }
-    }
-    $hash_desc{$id}        = $desc;
-    $hash_seq{$id}         = $seq;
-    $hash_struct{$id}      = $struct;
-    $hash_mfe{$id}         = $mfe;
-    close FILE_STRUCT;
-    return;
-}
-
-
-sub score_s{
-
-    #this score message is appended to the end of the string of score messages outputted for the potential precursor
-
-    my $message=shift;
-    $message_score.=$message."\n";;
-    return;
-}
-
-
-
-sub score_p{
-
-   #this score message is appended to the beginning of the string of score messages outputted for the potential precursor
-
-    my $message=shift;
-    $message_score=$message."\n".$message_score;
-    return;
-}
-
-
-
-sub filter_s{
-
-    #this filtering message is appended to the end of the string of filtering messages outputted for the potential precursor
-
-    my $message=shift;
-    $message_filter.=$message."\n";
-    return;
-}
-
-
-sub filter_p{
-
-    #this filtering message is appended to the beginning of the string of filtering messages outputted for the potential precursor
-
-    my $message=shift;
-    if(defined $message_filter){$message_filter=$message."\n".$message_filter;}
-    else{$message_filter=$message."\n";}
-    return;
-}
-
-
-sub find_freq{
-
-    #finds the frequency of a given read query from its id.
-
-    my($query)=@_;
-
-    if($query=~/x(\d+)/i){
-	my $freq=$1;
-	return $freq;
-    }else{
-	#print STDERR "Problem with read format\n";
-	return 0;
-    }
-}
-
-
-sub print_hash_comp{
-
-    #prints the 'comp' hash
-
-    my @keys=sort keys %hash_comp;
-    foreach my $key(@keys){
-	my $value=$hash_comp{$key};
-	print "$key  \t$value\n";
-    }
-}
-
-
-
-sub print_hash_bp{
-
-    #prints the 'bp' hash
-
-    my @keys=sort {$a<=>$b} keys %hash_bp;
-    foreach my $key(@keys){
-	my $value=$hash_bp{$key};
-	print "$key\t$value\n";
-    }
-    print "\n";
-}
-
-
-
-sub find_strand{
-
-    #A subroutine to find the strand, parsing different blast formats
-
-    my($other)=@_;
-
-    my $strand="+";
-
-    if($other=~/-/){
-	$strand="-";
-    }
-
-    if($other=~/minus/i){
-	$strand="-";
-    }
-    return($strand);
-}
-
-
-sub contained{
-
-    #Is the stretch defined by the first positions contained in the stretch defined by the second?
-
-    my($beg1,$end1,$beg2,$end2)=@_;
-
-    testbeginend($beg1,$end1,$beg2,$end2);
-
-    if($beg2<=$beg1 and $end1<=$end2){
-	return 1;
-    }else{
-	return 0;
-    }
-}
-
-
-sub testbeginend{
-
-    #Are the beginposition numerically smaller than the endposition for each pair?
-
-    my($begin1,$end1,$begin2,$end2)=@_;
-
-    unless($begin1<=$end1 and $begin2<=$end2){
-	print STDERR "beg can not be larger than end for $subject_old\n";
-	exit;
-    }
-}
-
-
-sub rev_pos{
-
-#   The blast_parsed format always uses positions that are relative to the 5' of the given strand
-#   This means that for a sequence of length n, the first nucleotide on the minus strand base pairs with
-#   the n't nucleotide on the plus strand
-
-#   This subroutine reverses the begin and end positions of positions of the minus strand so that they
-#   are relative to the 5' end of the plus strand
-
-    my($beg,$end,$lng)=@_;
-
-    my $new_end=$lng-$beg+1;
-    my $new_beg=$lng-$end+1;
-
-    return($new_beg,$new_end);
-}
-
-sub round {
-
-    #rounds to nearest integer
-
-    my($number) = shift;
-    return int($number + .5);
-
-}
-
-
-sub rev{
-
-    #reverses the order of nucleotides in a sequence
-
-    my($sequence)=@_;
-
-    my $rev=reverse $sequence;
-
-    return $rev;
-}
-
-sub com{
-
-    #the complementary of a sequence
-
-    my($sequence)=@_;
-
-    $sequence=~tr/acgtuACGTU/TGCAATGCAA/;
-
-    return $sequence;
-}
-
-sub revcom{
-
-    #reverse complement
-
-    my($sequence)=@_;
-
-    my $revcom=rev(com($sequence));
-
-    return $revcom;
-}
-
-
-sub max2 {
-
-    #max of two numbers
-
-    my($a, $b) = @_;
-    return ($a>$b ? $a : $b);
-}
-
-sub min2  {
-
-    #min of two numbers
-
-    my($a, $b) = @_;
-    return ($a<$b ? $a : $b);
-}
-
-
-
-sub score_freq{
-
-#   scores the count of reads that map to the potential precursor
-#   Assumes geometric distribution as described in methods section of manuscript
-
-    my $freq=shift;
-
-    #parameters of known precursors and background hairpins
-    my $parameter_test=0.999;
-    my $parameter_control=0.6;
-
-    #log_odds calculated directly to avoid underflow
-    my $intercept=log((1-$parameter_test)/(1-$parameter_control));
-    my $slope=log($parameter_test/$parameter_control);
-    my $log_odds=$slope*$freq+$intercept;
-
-    #if no strong evidence for 3' overhangs, limit the score contribution to 0
-    unless($options{x} or $hash_comp{"star_read"}){$log_odds=min2($log_odds,0);}
-
-    return $log_odds;
-}
-
-
-
-##sub score_mfe{
-
-#   scores the minimum free energy in kCal/mol of the potential precursor
-#   Assumes Gumbel distribution as described in methods section of manuscript
-
-##    my $mfe=shift;
-
-    #numerical value, minimum 1
-##    my $mfe_adj=max2(1,-$mfe);
-
-    #parameters of known precursors and background hairpins, scale and location
-##    my $prob_test=prob_gumbel_discretized($mfe_adj,5.5,32);
-##    my $prob_background=prob_gumbel_discretized($mfe_adj,4.8,23);
-
-##    my $odds=$prob_test/$prob_background;
-##    my $log_odds=log($odds);
-
-##    return $log_odds;
-##}
-
-sub score_mfe{
-#	use bignum;
-
-#   scores the minimum free energy in kCal/mol of the potential precursor
-#   Assumes Gumbel distribution as described in methods section of manuscript
-
-    my ($mfe,$mlng)=@_;
-
-    #numerical value, minimum 1
-    my $mfe_adj=max2(1,-$mfe);
-my $mfe_adj1=$mfe/$mlng;
-    #parameters of known precursors and background hairpins, scale and location
-	my $a=1.339e-12;my $b=2.778e-13;my $c=45.834;
-	my $ev=$e**($mfe_adj1*$c);
-	#print STDERR "\n***",$ev,"**\t",$ev+$b,"\t";
-	my $log_odds=($a/($b+$ev));
-
-
-	my $prob_test=prob_gumbel_discretized($mfe_adj,5.5,32);
-    my $prob_background=prob_gumbel_discretized($mfe_adj,4.8,23);
-
-    my $odds=$prob_test/$prob_background;
-    my $log_odds_2=log($odds);
-	#print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n";
-   return $log_odds;
-}
-
-
-
-sub prob_gumbel_discretized{
-
-#   discretized Gumbel distribution, probabilities within windows of 1 kCal/mol
-#   uses the subroutine that calculates the cdf to find the probabilities
-
-    my ($var,$scale,$location)=@_;
-
-    my $bound_lower=$var-0.5;
-    my $bound_upper=$var+0.5;
-
-    my $cdf_lower=cdf_gumbel($bound_lower,$scale,$location);
-    my $cdf_upper=cdf_gumbel($bound_upper,$scale,$location);
-
-    my $prob=$cdf_upper-$cdf_lower;
-
-    return $prob;
-}
-
-
-sub cdf_gumbel{
-
-#   calculates the cumulative distribution function of the Gumbel distribution
-
-    my ($var,$scale,$location)=@_;
-
-    my $cdf=$e**(-($e**(-($var-$location)/$scale)));
-
-    return $cdf;
-}
-
--- a/miRNA_Express_and_sequence.pl	Thu Nov 13 22:43:35 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,173 +0,0 @@
-#!/usr/bin/perl -w
-#Filename:
-#Author: Tian Dongmei
-#Email: tiandm@big.ac.cn
-#Date: 2014-6-4
-#Modified:
-#Description: solexa miRNA express and sequence
-my $version=1.00;
-
-use strict;
-use Getopt::Long;
-
-my %opts;
-GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h");
-if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments
-&usage;
-}
-
-my $filein=$opts{'i'};
-my $fileout=$opts{'list'};
-my $out=$opts{'fa'};
-my $preout=$opts{'pre'};
-
-=cut
-my %hash_pri;
-open PRI,"<$opts{p}";
-while (my $aline=<PRI>) {
-	chomp $aline;
-	if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;}
-}
-close PRI;
-=cut
-
-open IN,"<$filein"; #input file
-open OUT,">$fileout"; #output file
-open FA ,">$out";
-open PRE,">$preout";
-
-print OUT "#ID\tcoordinate\tpos1\tpos2";
-my @marks=split/\,/,$opts{'tag'};
-foreach  (@marks) {
-	print OUT "\t",$_,"_matureExp";
-}
-foreach  (@marks) {
-	print OUT "\t",$_,"_starExp";
-}
-foreach  (@marks) {
-	print OUT "\t",$_,"_totalExp";
-}
-
-print OUT "\n";
-
-my (%uniq_id,$novel);
-while (my $aline=<IN>) {
-	chomp $aline;
-	until ($aline =~ /^score\s+[-\d\.]+/){
-		$aline = <IN>;
-		if (eof) {last;}
-	}
-	if (eof) {last;}
-########## miRNA ID ################
-	$novel++;
-########### annotate####################
-	do {$aline=<IN>;} until($aline=~/flank_first_end/) ;
-	chomp $aline;
-	my @flank1=split/\t/,$aline;
-	do {$aline=<IN>;} until($aline=~/flank_second_beg/) ;
-	chomp $aline;
-	my @flank2=split/\t/,$aline;
-#
-########## mature start loop pre ####
-	do {$aline=<IN>;} until($aline=~/mature_beg/) ;
-	chomp $aline;
-	my @start=split/\t/,$aline;
-#	$start[1] -=$flank1[1];
-	do {$aline=<IN>;} until($aline=~/mature_end/) ;
-	chomp $aline;
-	my @end=split/\t/,$aline;
-#	$end[1] -=$flank1[1];
-	do {$aline=<IN>;} until($aline=~/mature_seq/) ;
-	chomp $aline;
-	my @arr1=split/\t/,$aline;
-	do {$aline=<IN>;} until($aline=~/pre_seq/) ;
-	chomp $aline;
-	my @arr2=split/\t/,$aline;
-	do {$aline=<IN>;} until($aline=~/pri_id/) ;
-	chomp $aline;
-	my @pri_id=split/\t/,$aline;
-	do {$aline=<IN>;} until($aline=~/pri_seq/) ;
-	chomp $aline;
-	my @pri_seq=split/\t/,$aline;
-	do {$aline=<IN>;} until($aline=~/star_beg/) ;
-	chomp $aline;
-	my @star_start=split/\t/,$aline;
-#	$star_start[1] -=$flank1[1];
-	do {$aline=<IN>;} until($aline=~/star_end/) ;
-	chomp $aline;
-	my @star_end=split/\t/,$aline;
-#	$star_end[1] -=$flank1[1];
-	do {$aline=<IN>;} until($aline=~/star_seq/) ;
-	chomp $aline;
-	my @arr3=split/\t/,$aline;
-	print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t";
-	#print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t";
-	print FA ">miR-c-$novel\n$arr1[1]\n";
-	print PRE ">miR-c-$novel\n$pri_seq[1]\n";
-########## reads count #############
-	<IN>;
-	my @count1;my @count2;my @count3;my @count4;
-	$aline=<IN>;
-	do {
-		chomp $aline;
-		my @reads=split/\t/,$aline;
-		my @pos=();
-		$reads[5]=~/(\d+)\.\.(\d+)/;
-#		$pos[0] =$1-$flank1[1];
-#		$pos[1] =$2-$flank1[1];
-		$pos[0]=$1;
-		$pos[1]=$2;
-		$reads[0]=~/:([\d|_]+)_x(\d+)$/;
-		my @ss=split/_/,$1;
-		for (my $i=0;$i<@ss ;$i++) {
-			if (!(defined $count3[$i])) {
-				$count3[$i]=0;
-			}
-			if (!(defined $count4[$i])) {
-				$count4[$i]=0;
-			}
-			$count2[$i]+=$ss[$i];
-
-		}
-#		$count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 );
-#		$count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 );
-#		$count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1);
-#		$count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1);
-		if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 )
-		{
-			for (my $i=0;$i<@ss;$i++) {
-				$count3[$i]+=$ss[$i];
-			}
-		}
-		if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){
-			for (my $i=0;$i<@ss;$i++) {
-				$count4[$i]+=$ss[$i];
-			}
-		}
-		$aline=<IN>;
-		chomp $aline;
-	} until(length $aline < 1) ;
-	$"="\t";
-	print OUT "@count3\t@count4\t@count2\n";
-	$"=" ";
-}
-
-close IN;
-close OUT;
-
-sub usage{
-print <<"USAGE";
-Version $version
-Usage:
-$0 -i -list -fa -pre -tag
-options:
--i input file,predictions file
--list output file miRNA list file
--fa output file ,miRNA sequence fasta file.
--pre output file, miRNA precursor fasta file.
--tag string, sample names# eg: samA,samB,samC
--h help
-USAGE
-exit(1);
-}
-
--- a/miRPlant.pl	Thu Nov 13 22:43:35 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,506 +0,0 @@
-#!/usr/bin/perl -w
-#Filename:
-#Author: Tian Dongmei
-#Email: tiandm@big.ac.cn
-#Date: 2014-4-22
-#Modified:
-#Description: plant microRNA prediction
-my $version=1.00;
-
-use strict;
-use Getopt::Long;
-use threads;
-#use threads::shared;
-use File::Path;
-use File::Basename;
-#use RNA;
-#use Term::ANSIColor;
-
-my %opts;
-GetOptions(\%opts,"i:s@","tag:s@","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h");
-if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments
-&usage;
-}
-
-my $time=&Time();
-print "miPlant program start:\n   The time is $time!\n";
-print "Command line:\n   $0 @ARGV\n";
-
-my $format=$opts{'format'};
-if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") {
-	#&printErr();
-	die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
-}
-
-my $phred_qv=64;
-
-
-my @inputfiles=@{$opts{'i'}};
-my @inputtags=@{$opts{'tag'}};
-
-my  $mypath=`pwd`;
-chomp $mypath;
-
-my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/miRPlant_out/";
-
-
-unless ($dir=~/\/$/) {$dir.="/";}
-if (not -d $dir) {
-	mkdir $dir;
-}
-my $config=$dir."/input_config";
-open CONFIG,">$config";
-	for (my $i=0;$i<@inputfiles;$i++) {
-		print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n";
-	}
-close CONFIG;
-
-my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/";
-
-my $a="ATCTCGTATG";  #adapter
-if (defined $opts{'a'}) {$a=$opts{'a'};}
-
-my $m=6;  #adapter minimum mapped nt
-if (defined $opts{'M'}) {$m=$opts{'M'};}
-
-my $t=1; #threads number
-if (defined $opts{'t'}) {$t=$opts{'t'};}
-
-my $min_nt=19; # minimum reads length
-if (defined $opts{'min'}) {$min_nt=$opts{'min'};}
-
-my $max_nt=28; #maximum reads length
-if (defined $opts{'max'}) {$max_nt=$opts{'max'};}
-
-my $mis=0; #mismatch number for microRNA
-if (defined $opts{'mis'}) {$mis=$opts{'mis'};}
-
-my $mis_rfam=0;# mismatch number for rfam
-if (defined $opts{'v'}) {$mis_rfam=$opts{'v'};}
-
-my $hit=25; # maximum reads mapping hits in genome
-if (defined $opts{'r'}) {$hit=$opts{'r'};}
-
-my $upstream = 2; # microRNA 5' extension
-$upstream = $opts{'e'} if(defined $opts{'e'});
-
-my $downstream = 5;# microRNA 3' extension
-$downstream = $opts{'f'} if(defined $opts{'f'});
-
-my $maxd=defined $opts{'dis'} ? $opts{'dis'} : 200;
-my $flank=defined $opts{'flank'} ? $opts{'flank'} :10;
-my $mfe=defined $opts{'mfe'} ? $opts{'mfe'} : -20;
-
-$time=&Time();
-print "$time, Checking input file!\n";
-
-my (@filein,@mark,@clean);
-#&read_config();
-@filein=@inputfiles;
-@mark=@inputtags;
-
-&checkfa($opts{pre});
-&checkfa($opts{mat});
-&checkfa($opts{gfa});
-
-
-##### clip adpter --> clean data  start
-$time=&Time();
-print "$time, Preprocess:\n   trim adapter, reads collapse and filter reads by length.\n";
-
-$time=~s/:/-/g;
-$time=~s/ /-/g;
-my $preprocess=$dir."preProcess_${time}/";
-mkdir $preprocess;
-my $can_use_threads = eval 'use threads; 1';
-if ($can_use_threads) {
-# Do processing using threads
-	print "Do processing using threads\n";
-	my @filein1=@filein; my @mark1=@mark;
-	while (@filein1>0) {
-		my @thrs; my @res;
-		for (my $i=0;$i<$t ;$i++) {
-			last if(@filein1==0);
-			my $in=shift @filein1;
-			my $out=shift @mark1;
-			push @clean,$preprocess.$out."_clips_adapter.fq";
-			$thrs[$i]=threads->create(\&clips,$in,$out);
-		}
-		for (my $i=0;$i<@thrs;$i++) {
-			$res[$i]=$thrs[$i]->join();
-		}
-	}
-} else {
-# Do not processing using threads
-	print "Do not processing using threads\n";
-	for (my $i=0;$i<@filein ;$i++) {
-		my $in=$filein[$i];
-		my $out=$mark[$i];
-		push @clean,$preprocess.$out."_clips_adapter.fq";
-		&clips($in,$out);
-	}
-}
-
-##### clip adpter --> clean data  end
-
-my $collapsed=$preprocess."collapse_reads.fa";
-my $data=$preprocess."collapse_reads_${min_nt}_${max_nt}.fa"; ## raw clean data
-my $data2;   ### mirbase not mapped reads
-my $data3;   ### rfam not mapped reads
-&collapse(\@clean,$collapsed);   #collapse reads to tags
-
-&filterbylength();  # filter <$min_nt  && >$max_nt
-
-print "The final clean data file is $data, only contains reads which length is among $min_nt\~$max_nt\n\n";
-
-$time=Time();
-print "$time: known microRNA quantify!\n\n";
-
-chdir $dir;
-
-$time=~s/:/-/g;
-$time=~s/ /-/g;
-my $known_result=$dir."miRNA_Express_${time}/";
-&quantify(); ### known microRAN quantify
-
-
-#my $miR_exp_dir=&search($known_result,"miRNA_Express_");
-$data2=$known_result."/mirbase_not_mapped.fa";
-
-my $pathfile="$dir/path.txt";
-open PA,">$pathfile";
-print PA "$config\n";
-print PA "$preprocess\n";
-print PA "$known_result\n";
-
-if (defined $opts{'rfam'}) {              #rfam mapping and analysis
-	$time=Time();
-	print "$time: RNA annotate!\n\n";
-	$time=~s/:/-/g;
-	$time=~s/ /-/g;
-	my $rfam_exp_dir=$dir."rfam_match_${time}";
-	&rfam();
-	#my $rfam_exp_dir=&search($dir,"rfam_match_");
-	$data3=$rfam_exp_dir."/rfam_not_mapped.fa";
-print PA "$rfam_exp_dir\n";
-
-	my $tag=join "\\;" ,@mark;
-	system("perl $scipt_path/count_rfam_express.pl -i $rfam_exp_dir/rfam_mapped.bwt -tag $tag -o rfam_non-miRNA_annotation.txt");
-}
-
-my $data4=$data;
-if (defined $opts{'D'}) {                 #genome mapping
-	$data4=$data3;
-}else{
-	$data4=$data2;
-}
-
-$time=Time();
-print "$time: Genome alignment!\n\n";
-$time=~s/:/-/g;
-$time=~s/ /-/g;
-my $genome_map=$dir."genome_match_${time}";
-&genome($data4);
-print PA "$genome_map\n";
-#my $genome_map=&search($dir,"genome_match_");
-my $mapfile=$genome_map."/genome_mapped.bwt";
-my $mapfa=$genome_map."/genome_mapped.fa";
-my $unmap=$genome_map."/genome_not_mapped.fa";
-
-#$time=Time();
-#print "$time: Novel microRNA prediction!\n\n";
-
-&predict($mapfa);
-
-close PA;
-system("perl $scipt_path/html.pl -i $pathfile -format $format -o $dir/result.html");
-
-$time=Time();
-print "$time: Program end!!\n";
-
-############################## sub programs ###################################
-sub predict{
-	my ($file)=@_;
-	$time=&Time();
-	print "$time: Novel microRNA prediction!\n\n";
-	$time=~s/:/-/g;
-	$time=~s/ /-/g;
-	my $predict=$dir."miRNA_predict_${time}";
-print PA "$predict\n";
-	mkdir $predict;
-	chdir $predict;
-	system("perl $scipt_path/precursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe");
-#	print "\nprecursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe\n";
-
-	system("bowtie-build -f excised_precursor.fa excised_precursor");
-#	print "\nbowtie-build -f excised_precursor.fa excised_precursor\n";
-
-	system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt 2> run.log");
-#	print "\nbowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt\n";
-
-	system("perl $scipt_path/convert_bowtie_to_blast.pl  precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst");
-#	print "\nconvert_bowtie_to_blast.pl  precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst\n";
-
-	system("sort -k 4  precursor_mapped.bst  > signatures.bst");
-#	print "\nsort +3 -25  precursor_mapped.bst  > ../signatures.bst\n";
-
-	chdir $dir;
-	system("perl $scipt_path/miRDeep_plant.pl $predict/signatures.bst $predict/excised_precursor_struc.txt novel_tmp_dir -y > microRNA_prediction.mrd");
-#	print "\nmiRDeep_plant.pl $dir/signatures.bst $predict/excised_precursor_struc.txt tmp_dir -y > microRNA_prediction.txt\n";
-	#system("rm novel_tmp_dir -rf");
-	my $tag=join "," ,@mark;
-	system("perl $scipt_path/miRNA_Express_and_sequence.pl -i microRNA_prediction.mrd -list novel_microRNA_express.txt -fa novel_microRNA_mature.fa -pre novel_microRNA_precursor.fa -tag $tag");
-}
-
-sub genome{
-	my ($file)=@_;
-	if(defined $opts{'idx'}){
-		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time") ;
-#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n";
-	}else{
-		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time") ;
-#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n";
-	}
-}
-sub rfam{
-	if (defined $opts{'idx2'}) {
-		system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time");
-#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time\n";
-	}else{
-		system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time");
-#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time\n";
-	}
-}
-sub quantify{
-	my $tag=join "\\;" ,@mark;
-	system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag");
-	print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n";
-}
-sub filterbylength{
-	my $tmpmark=join ",", @mark;
-	system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
-	system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt  -o $preprocess/length.html");
-#	print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n";
-
-}
-sub collapse{
-	my ($ins,$data)=@_;
-		my $str="";
-	for (my $i=0;$i<@{$ins};$i++) {
-		$str .="-i $$ins[$i] ";
-	}
-	system ("perl $scipt_path/collapseReads2Tags.pl $str -mark seq -o $data -format $format");
-#	print "\ncollapseReads2Tags.pl $str -mark seq -o $data -format $format\n";
-}
-
-sub clips{
-	my ($in,$out)=@_;
-	my $adapter=$preprocess.$out."_clips_adapter.fq";
-	if($format eq "fq" || $format eq "fastq"){
-		system("fastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter") ;
-		print "\nfastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter\n";
-	}
-	if($format eq "fa" || $format eq "fasta"){
-		system("fastx_clipper -a $a -M $m -i $in -o $adapter") ;
-        #		print "\nfastx_clipper -a $a -M $m -i $in -o $adapter\n";
-	}
-	#my $clean=$preprocess.$out."_clean.fq";
-	#system("filterReadsByLength.pl -i $adapter -o $clean -min $min_nt -max $max_nt ");
-
-	return;
-}
-
-sub read_config{
-	open CON,"<$config";
-	while (my $aline=<CON>) {
-		chomp $aline;
-		my @tmp=split/\t/,$aline;
-		push @filein,$tmp[0];
-		push @mark,$tmp[1];
-		&check_rawdata($tmp[0]);
-	}
-	close CON;
-	if (@filein != @mark) {
-		#&printErr();
-		die "Maybe config file have some wrong!!!\n";
-	}
-}
-sub check_rawdata{
-	my ($fileforcheck)=@_;
-	if (!(-s $fileforcheck)) {
-		#&printErr();
-		die "Can not find $fileforcheck, or file is empty!!!\n";
-	}
-	if ($format eq "fasta" || $format eq "fa") {
-		&checkfa($fileforcheck);
-	}
-	if ($format eq "fastq" || $format eq "fq") {
-		&checkfq($fileforcheck);
-	}
-}
-sub checkfa{
-	my ($file_reads)=@_;
-	open N,"<$file_reads";
-	my $line=<N>;
-	chomp $line;
-    if($line !~ /^>\S+/){
-        #printErr();
-        die "The first line of file $file_reads does not start with '>identifier'
-Reads file $file_reads is not a valid fasta file\n\n";
-    }
-    if(<N> !~ /^[ACGTNacgtn]*$/){
-        #printErr();
-        die "File $file_reads contains not allowed characters in sequences
-Allowed characters are ACGTN
-Reads file $file_reads is not a fasta file\n\n";
-    }
-	close N;
-}
-sub checkfq{
-	my ($file_reads)=@_;
-
-	open N,"<$file_reads";
-	for (my $i=0;$i<10;$i++) {
-		my $a=<N>;
-		my $b=<N>;
-		my $c=<N>;
-		my $d=<N>;
-		chomp $a;
-		chomp $b;
-		chomp $c;
-		chomp $d;
-		if($a!~/^\@/){
-			#&printErr();
-			die "$file_reads is not a fastq file\n\n";
-		}
-		if($b!~ /^[ACGTNacgtn]*$/){
-			#&printErr();
-			die "File $file_reads contains not allowed characters in sequences
-Allowed characters are ACGTN
-Reads file $file_reads is not a fasta file\n\n";
-		}
-		if ($c!~/^\@/ && $c!~/^\+/) {
-			#&printErr();
-			die "$file_reads is not a fastq file\n\n";
-		}
-		if ((length $b) != (length $d)) {
-			#&printErr();
-			die "$file_reads is not a fastq file\n\n";
-		}
-		my @qv=split //,$d;
-		for (my $j=0;$j<@qv ;$j++) {
-			my $q=ord($qv[$j])-64;
-			if($q<0){$phred_qv=33;}
-		}
-	}
-	close N;
-}
-
-sub search{
-	my ($dir,$str)=@_;
-	opendir I,$dir;
-	my @ret;
-	while (my $file=readdir I) {
-		if ($file=~/$str/) {
-			push @ret, $file;
-		}
-	}
-	closedir I;
-	if (@ret != 1) {
-		#&printErr();
-
-		die "Can not find directory or file which name has string: $str !!!\n";
-	}
-	return $ret[0];
-}
-
-=cut
-
-sub printErr{
-    print STDERR color 'bold red';
-    print STDERR "Error: ";
-    print STDERR color 'reset';
-}
-sub Time{
-        my $time=time();
-        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
-        $month++;
-        $year+=1900;
-        if (length($sec) == 1) {$sec = "0"."$sec";}
-        if (length($min) == 1) {$min = "0"."$min";}
-        if (length($hour) == 1) {$hour = "0"."$hour";}
-        if (length($day) == 1) {$day = "0"."$day";}
-        if (length($month) == 1) {$month = "0"."$month";}
-        #print "$year-$month-$day $hour:$min:$sec\n";
-        return("$year-$month-$day-$hour-$min-$sec");
-}
-=cut
-sub Time{
-        my $time=time();
-        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
-        $month++;
-        $year+=1900;
-        if (length($sec) == 1) {$sec = "0"."$sec";}
-        if (length($min) == 1) {$min = "0"."$min";}
-        if (length($hour) == 1) {$hour = "0"."$hour";}
-        if (length($day) == 1) {$day = "0"."$day";}
-        if (length($month) == 1) {$month = "0"."$month";}
-        #print "$year-$month-$day $hour:$min:$sec\n";
-        return("$year-$month-$day $hour:$min:$sec");
-}
-
-
-sub usage{
-print <<"USAGE";
-Version $version
-Usage:
-
-$0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t  -o  -path
-options:
--i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
--tag string # raw data file names, -tag xxx -tag xxx
-
--format string,#specific input rawdata file format : fastq|fq|fasta|fa
-
--path scirpt path
-
--gfa string,  input file # genome fasta. sequence file
--idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
-                string must be the prefix of the bowtie index. For instance, if
-                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
-                the prefix is 'h_sapiens_37_asm'.##can be null
-
--pre string,  input file #species specific microRNA precursor sequences
--mat string,  input file #species specific microRNA mature sequences
-
--rfam string,  input file# rfam database file, microRNAs must not be contained in this file## if not define, rfam small RNA will not be count.
--idx2 string,  rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter
-                string must be the prefix of the bowtie index. For instance, if
-                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
-                the prefix is 'h_sapiens_37_asm'.##can be null
-
--D      If [-D] is specified,will discard rfam mapped reads(nead -rfam).
-
--a string,  ADAPTER string. default is ATCTCGTATG.
--M int,    require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't clip it.
--min int,  reads min length,default is 19.
--max int,  reads max length,default is 28.
-
--mis [int]     number of allowed mismatches when mapping reads to precursors, default 0
--e [int]     number of nucleotides upstream of the mature sequence to consider, default 2
--f [int]     number of nucleotides downstream of the mature sequence to consider, default 5
--v <int>     report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment
--r int       a read is allowed to map up to this number of positions in the genome,default is 25
-
--dis <int>   Maximal space between miRNA and miRNA* (200)
--flank <int>   Flank sequence length of miRNA precursor (10)
--mfe <folat> Maximal free energy allowed for a miRNA precursor (-20)
-
--t int,    number of threads [1]
-
--o output directory# absolute path
--h help
-USAGE
-exit(1);
-}
-
--- a/miRPlant.xml	Thu Nov 13 22:43:35 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-<tool id="plant_microRNA_v1" name="miRPlant" veision="1.0.0">
-  <description>tool for plant microRNA analisis</description>
-
-  <requirements>
-	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
-    <requirement type="package" version="0.12.7">bowtie</requirement>
-    <requirement type="set_environment">SCRIPT_PATH</requirement>
-    <!--requirement type="package" version="3.0.1">R</requirement!-->
-	<requirement type="package" version="2.59">SVG</requirement>
-	<requirement type="package" version="2.1.8">ViennaRNA</requirement>
-  </requirements>
-
-  <!--command interpreter="perl">miPlant.pl -i $input -format $format -gfa $gfa -idx $index -pre $pre -mat $mat -rfam $rfam -idx2 $idx2 -D $D -a $a -M $M -min $min -max $max -mis $mis -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe -t $t -o $output</command-->
-
-  <command interpreter="perl">miRPlant.pl
-   ## Change this to accommodate the number of threads you have available.
-        -t \${GALAXY_SLOTS:-4}
-   ## Do or not delet rfam mapped tags
-    #if $params.delet_rfam == "yes":
-	-D
-	#end if
-	-path \$SCRIPT_PATH
-
-    #for $j, $s in enumerate( $series )
-    ##rank_of_series=$j
-    -i ${s.input}
-    -tag ${s.tag}
-    #end for
-
-    -format $format -gfa $gfa -pre $pre -mat $mat -rfam $rfam  -a $a -M $mapnt -min $min -max $max -mis $mismatch -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe > run.log
-  </command>
-
-  <inputs>
-
-   <repeat name="series" title="Series">
-     <param name="input" type="data" label="Raw data"/>
-     <param name="tag" type="text" data_ref="input" label="Sample name of raw data"/>
-   </repeat>
-
-	<conditional name="params">
-		<param name="delet_rfam" type="select" label="delet rfam mapped reads">
-		  <option value="yes" selected="true">yes</option>
-		  <option value="no">no</option>
-		 </param>
-    </conditional> <!-- params -->
-
-	<!--param name="input" format="tabular"  type="data" label="input config file" /-->
-
-	<param name="format" type="select" lable="raw data format" multiple="false">
-	  <option value="fastq">Raw data is fastq. format</option>
-	  <option value="fasta">Raw data is fasta. format</option>
-	</param>
-
-	<param name="gfa"  type="data" label="genome sequence fasta file"/>
-	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
-	<param name="mat" type="data" label="mature microRNA sequence file" />
-	<param name="pre" type="data" label="precursor microRNA sequence fie" />
-	<param name="rfam" type="data" label="rfam sequence file" />
-	<!--param type="data" name="idx2" label="rfam sequence bowtie index " -->
-	<param name="a" type="text" value="ATCTCGTATG" label="3' adapter sequence" />
-	<param name="mapnt" type="integer" value="8" label="minimum adapter map nts" />
-	<param name="min" type="integer" value="19" label="minimum microRNA length" />
-	<param name="max" type="integer" value="28" label="maximum microRNA length" />
-	<param name="mismatch" type="integer" value="0" label="number of allowed mismatches when mapping reads to precursors" />
-	<param name="e" type="integer" value="2" label="number of nucleotides upstream of the mature sequence to consider" />
-	<param name="f" type="integer" value="5" label="number of nucleotides downstream of the mature sequence to consider" />
-	<param name="v" type="integer" value="0" label="report end-to-end hits less than v mismatches"/>
-	<param name="r" type="integer" value="25" label="a read is allowed to map up to this number of positions in the genome" />
-	<param name="dis" type="integer" value="200" label="Maximal space between miRNA and miRNA*" />
-	<param name="flank" type="integer" value="10" label="Flank sequence length of miRNA precursor" />
-	<param name="mfe" type="float" value="-30" label="Maximal free energy allowed for a miRNA precursor" />
-  </inputs>
-
-  <outputs>
-   <data format="txt" name="known microRNA express list" from_work_dir="miRPlant_out/known_microRNA_express.txt" label="${tool.name} on ${on_string}: known microRNA express list"/>
-   <data format="txt" name="known microRNA express alignment" from_work_dir="miRPlant_out/known_microRNA_express.aln" label="${tool.name} on ${on_string}: known microRNA express alignment"/>
-   <data format="txt" name="known microRNA moRs result" from_work_dir="miRPlant_out/known_microRNA_express.moRs" label="${tool.name} on ${on_string}: known microRNA moRs result"/>
-   <data format="txt" name="known microRNA precursor file" from_work_dir="miRPlant_out/known_microRNA_precursor.fa" label="${tool.name} on ${on_string}: known microRNA precursor file"/>
-   <data format="txt" name="known microRNA mature file" from_work_dir="miRPlant_out/known_microRNA_mature.fa" label="${tool.name} on ${on_string}: known microRNA mature file"/>
-   <data format="txt" name="novel microRNA express list" from_work_dir="miRPlant_out/novel_microRNA_express.txt" label="${tool.name} on ${on_string}: novel microRNA express list"/>
-   <data format="txt" name="novel microRNA precursor file" from_work_dir="miRPlant_out/novel_microRNA_precursor.fa" label="${tool.name} on ${on_string}: novel microRNA precursor file"/>
-   <data format="txt" name="novel microRNA mature sequence file" from_work_dir="miRPlant_out/novel_microRNA_mature.fa" label="${tool.name} on ${on_string}: novel microRNA mature sequence file"/>
-   <data format="html" name="analysis result" from_work_dir="miRPlant_out/result.html" label="${tool.name} on ${on_string}: analysis result"/>
-  </outputs>
-
- <help>
-
- </help>
- </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preProcess.pl	Wed Dec 03 01:54:29 2014 -0500
@@ -0,0 +1,383 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-12-2
+#Modified:
+#Description: RNA-seq data pre-process
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use threads;
+#use threads::shared;
+use File::Path;
+use File::Basename;
+#use RNA;
+#use Term::ANSIColor;
+
+my %opts;
+GetOptions(\%opts,"i:s@","tag:s@","format=s","phred:i","gfa=s","rfam:s","idx:s","idx2:s","mis:i","v:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","h");
+if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $time=&Time();
+print "miPlant program start:\n   The time is $time!\n";
+print "Command line:\n   $0 @ARGV\n";
+
+my $format=$opts{'format'};
+if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") {
+	#&printErr();
+	die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
+}
+
+my $phred_qv=64;
+if (defined $opts{'phred'}) {$phred_qv=$opts{'phred'};}
+
+my @inputfiles=@{$opts{'i'}};
+my @inputtags=@{$opts{'tag'}};
+
+my  $mypath=`pwd`;
+chomp $mypath;
+
+my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/preProcess/";
+
+
+unless ($dir=~/\/$/) {$dir.="/";}
+if (not -d $dir) {
+	mkdir $dir;
+}
+my $config=$dir."/input_config";
+open CONFIG,">$config";
+	for (my $i=0;$i<@inputfiles;$i++) {
+		print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n";
+	}
+close CONFIG;
+
+my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/";
+
+my $a="ATCTCGTATG";  #adapter
+if (defined $opts{'a'}) {$a=$opts{'a'};}
+
+my $m=6;  #adapter minimum mapped nt
+if (defined $opts{'M'}) {$m=$opts{'M'};}
+
+my $t=1; #threads number
+if (defined $opts{'t'}) {$t=$opts{'t'};}
+
+my $min_nt=19; # minimum reads length
+if (defined $opts{'min'}) {$min_nt=$opts{'min'};}
+
+my $max_nt=28; #maximum reads length
+if (defined $opts{'max'}) {$max_nt=$opts{'max'};}
+
+my $mis=0; #mismatch number for microRNA
+if (defined $opts{'mis'}) {$mis=$opts{'mis'};}
+
+my $mis_rfam=0;# mismatch number for rfam
+if (defined $opts{'v'}) {$mis_rfam=$opts{'v'};}
+
+my (@filein,@mark,@clean);
+#&read_config();
+@filein=@inputfiles;
+@mark=@inputtags;
+
+&checkfa($opts{gfa});
+
+
+##### clip adpter --> clean data  start
+my $preprocess=$dir."preProcess_clean/";
+mkdir $preprocess;
+my $can_use_threads = eval 'use threads; 1';
+if ($can_use_threads) {
+# Do processing using threads
+	print "Do processing using threads\n";
+	my @filein1=@filein; my @mark1=@mark;
+	while (@filein1>0) {
+		my @thrs; my @res;
+		for (my $i=0;$i<$t ;$i++) {
+			last if(@filein1==0);
+			my $in=shift @filein1;
+			my $out=shift @mark1;
+			push @clean,$preprocess.$out."_clips_adapter.fq";
+			$thrs[$i]=threads->create(\&clips,$in,$out);
+		}
+		for (my $i=0;$i<@thrs;$i++) {
+			$res[$i]=$thrs[$i]->join();
+		}
+	}
+} else {
+# Do not processing using threads
+	print "Do not processing using threads\n";
+	for (my $i=0;$i<@filein ;$i++) {
+		my $in=$filein[$i];
+		my $out=$mark[$i];
+		push @clean,$preprocess.$out."_clips_adapter.fq";
+		&clips($in,$out);
+	}
+}
+
+##### clip adpter --> clean data  end
+
+my $collapsed=$preprocess."collapse_reads.fa";
+my $data=$preprocess."collapse_reads_${min_nt}_${max_nt}.fa"; ## raw clean data
+&collapse(\@clean,$collapsed);   #collapse reads to tags
+
+&filterbylength();  # filter <$min_nt  && >$max_nt
+
+print "The final clean data file is $data, only contains reads which length is among $min_nt\~$max_nt\n\n";
+
+
+$time=Time();
+print "$time: Genome alignment!\n\n";
+my $genome_map=$dir."genome_match";
+&genome($data);
+#my $genome_map=&search($dir,"genome_match_");
+my $mapfile=$genome_map."/genome_mapped.bwt";
+my $mapfa=$genome_map."/genome_mapped.fa";
+my $unmap=$genome_map."/genome_not_mapped.fa";
+
+chdir $dir;
+my $pathfile="$dir/path.txt";
+open PA,">$pathfile";
+print PA "$config\n";
+print PA "$preprocess\n";
+print PA "$genome_map\n";
+
+if (defined $opts{'rfam'}) {              #rfam mapping and analysis
+	$time=Time();
+	print "$time: RNA annotate!\n\n";
+	$time=~s/:/-/g;
+	$time=~s/ /-/g;
+	my $rfam_exp_dir=$dir."rfam_match";
+	&rfam();
+	#my $rfam_exp_dir=&search($dir,"rfam_match_");
+print PA "$rfam_exp_dir\n";
+
+	my $tag=join "\\;" ,@mark;
+	system("perl $scipt_path/count_rfam_express.pl -i $rfam_exp_dir/rfam_mapped.bwt -tag $tag -o rfam_non-miRNA_annotation.txt");
+}
+
+
+close PA;
+system("perl $scipt_path/html_preprocess.pl -i $pathfile -format $format -min $min_nt -max $max_nt -o $dir/preprocessResult.html");
+
+$time=Time();
+print "$time: Program end!!\n";
+
+############################## sub programs ###################################
+sub genome{
+	my ($file)=@_;
+	if(defined $opts{'idx'}){
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -r 1000 -v $mis -p $t -o $dir -index $opts{idx}") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n";
+	}else{
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -r 1000 -v $mis -p $t -o $dir") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n";
+	}
+}
+sub rfam{
+	if (defined $opts{'idx2'}) {
+		system("perl $scipt_path/rfam.pl -i $mapfa -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} ");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time\n";
+	}else{
+		system("perl $scipt_path/rfam.pl -i $mapfa -ref $opts{rfam} -v $mis_rfam -p $t -o $dir ");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time\n";
+	}
+}
+sub filterbylength{
+	my $tmpmark=join ",", @mark;
+	system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
+	system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt  -o $preprocess/length.html");
+#	print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n";
+
+}
+sub collapse{
+	my ($ins,$data)=@_;
+		my $str="";
+	for (my $i=0;$i<@{$ins};$i++) {
+		$str .="-i $$ins[$i] ";
+	}
+	system ("perl $scipt_path/collapseReads2Tags.pl $str -mark seq -o $data -format $format");
+#	print "\ncollapseReads2Tags.pl $str -mark seq -o $data -format $format\n";
+}
+
+sub clips{
+	my ($in,$out)=@_;
+	my $adapter=$preprocess.$out."_clips_adapter.fq";
+	if($format eq "fq" || $format eq "fastq"){
+		system("fastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter") ;
+#		print "\nfastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter\n";
+	}
+	if($format eq "fa" || $format eq "fasta"){
+		system("fastx_clipper -a $a -M $m -i $in -o $adapter") ;
+        #		print "\nfastx_clipper -a $a -M $m -i $in -o $adapter\n";
+	}
+	#my $clean=$preprocess.$out."_clean.fq";
+	#system("filterReadsByLength.pl -i $adapter -o $clean -min $min_nt -max $max_nt ");
+
+	return;
+}
+
+sub read_config{
+	open CON,"<$config";
+	while (my $aline=<CON>) {
+		chomp $aline;
+		my @tmp=split/\t/,$aline;
+		push @filein,$tmp[0];
+		push @mark,$tmp[1];
+		&check_rawdata($tmp[0]);
+	}
+	close CON;
+	if (@filein != @mark) {
+		#&printErr();
+		die "Maybe config file have some wrong!!!\n";
+	}
+}
+sub check_rawdata{
+	my ($fileforcheck)=@_;
+	if (!(-s $fileforcheck)) {
+		#&printErr();
+		die "Can not find $fileforcheck, or file is empty!!!\n";
+	}
+	if ($format eq "fasta" || $format eq "fa") {
+		&checkfa($fileforcheck);
+	}
+	if ($format eq "fastq" || $format eq "fq") {
+		&checkfq($fileforcheck);
+	}
+}
+sub checkfa{
+	my ($file_reads)=@_;
+	open N,"<$file_reads";
+	my $line=<N>;
+	chomp $line;
+    if($line !~ /^>\S+/){
+        #printErr();
+        die "The first line of file $file_reads does not start with '>identifier'
+Reads file $file_reads is not a valid fasta file\n\n";
+    }
+    if(<N> !~ /^[ACGTNacgtn]*$/){
+        #printErr();
+        die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+    }
+	close N;
+}
+sub checkfq{
+	my ($file_reads)=@_;
+
+	open N,"<$file_reads";
+	for (my $i=0;$i<10;$i++) {
+		my $a=<N>;
+		my $b=<N>;
+		my $c=<N>;
+		my $d=<N>;
+		chomp $a;
+		chomp $b;
+		chomp $c;
+		chomp $d;
+		if($a!~/^\@/){
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if($b!~ /^[ACGTNacgtn]*$/){
+			#&printErr();
+			die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+		}
+		if ($c!~/^\@/ && $c!~/^\+/) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if ((length $b) != (length $d)) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		my @qv=split //,$d;
+		for (my $j=0;$j<@qv ;$j++) {
+			my $q=ord($qv[$j])-64;
+			if($q<0){$phred_qv=33;}
+		}
+	}
+	close N;
+}
+
+sub search{
+	my ($dir,$str)=@_;
+	opendir I,$dir;
+	my @ret;
+	while (my $file=readdir I) {
+		if ($file=~/$str/) {
+			push @ret, $file;
+		}
+	}
+	closedir I;
+	if (@ret != 1) {
+		#&printErr();
+
+		die "Can not find directory or file which name has string: $str !!!\n";
+	}
+	return $ret[0];
+}
+
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day $hour:$min:$sec");
+}
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -format -gfa -index -rfam -a -M -min -max -mis -v -t  -o  -path
+options:
+-i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
+-tag string # raw data file names, -tag xxx -tag xxx
+
+-format string,#specific input rawdata file format : fastq|fq|fasta|fa
+-phred  int # phred quality number, default is 64
+
+-path scirpt path
+
+-gfa string,  input file # genome fasta. sequence file
+-idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-rfam string,  input file# rfam database file, microRNAs must not be contained in this file## if not define, rfam small RNA will not be count.
+-idx2 string,  rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-a string,  ADAPTER string. default is ATCTCGTATG.
+-M int,    require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't clip it.
+-min int,  reads min length,default is 19.
+-max int,  reads max length,default is 28.
+
+-mis [int]     number of allowed mismatches when mapping reads to genome, default 0
+-v <int>     report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment
+
+-t int,    number of threads [1]
+
+-o output directory# absolute path
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preProcess.xml	Wed Dec 03 01:54:29 2014 -0500
@@ -0,0 +1,120 @@
+<tool id="preprocess" name="preProcess" veision="1.0.0">
+  <description>tool for Raw data preprocess analisis, including 3' adapter triming, reads collaping, genome mapping and  rfam non-miRNA analysis </description>
+
+  <requirements>
+	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
+    <requirement type="package" version="0.12.7">bowtie</requirement>
+    <requirement type="set_environment">SCRIPT_PATH</requirement>
+    <!--requirement type="package" version="3.0.1">R</requirement!-->
+	<requirement type="package" version="2.59">SVG</requirement>
+	<requirement type="package" version="2.1.8">ViennaRNA</requirement>
+  </requirements>
+
+  <!--command interpreter="perl">miPlant.pl -i $input -format $format -gfa $gfa -idx $index -pre $pre -mat $mat -rfam $rfam -idx2 $idx2 -D $D -a $a -M $M -min $min -max $max -mis $mis -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe -t $t -o $output</command-->
+
+  <command interpreter="perl">miRPlant.pl
+   ## Change this to accommodate the number of threads you have available.
+        -t \${GALAXY_SLOTS:-4}
+	-path \$SCRIPT_PATH
+
+    #for $j, $s in enumerate( $series )
+    ##rank_of_series=$j
+    -i ${s.input}
+    -tag ${s.tag}
+    #end for
+
+   ## Do or not annotate rfam non-miRNA RNAs
+    #if $params.annotate_rfam == "yes":
+	-rfam $rfam
+	#end if
+
+        ## prepare bowtie index
+        #set index_path = ''
+        #if str($reference_genome.source) == "history":
+            bowtie-build "$reference_genome.own_file" genome; ln -s "$reference_genome.own_file" genome.fa;
+            #set index_path = 'genome'
+        #else:
+            #set index_path = $reference_genome.index.fields.path
+		#end if
+
+    -format $format -gfa ${index_path}.fa -idx $index_path -a $a -M $mapnt -min $min -max $max -mis $mismatch -v $v > run.log
+  </command>
+
+  <inputs>
+
+   <repeat name="series" title="Series">
+     <param name="input" type="data" label="Raw data"/>
+     <param name="tag" type="text" data_ref="input" label="Sample name of raw data"/>
+   </repeat>
+
+	<!--param name="input" format="tabular"  type="data" label="input config file" /-->
+
+	<param name="format" type="select" lable="raw data format" multiple="false">
+	  <option value="fastq">Raw data is fastq. format</option>
+	  <option value="fasta">Raw data is fasta. format</option>
+	</param>
+
+	        <!-- reference genome -->
+        <conditional name="reference_genome">
+          <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+              <options from_data_table="bowtie_indexes">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+          </when>
+        </conditional>
+
+	<!--param name="gfa"  type="data" label="genome sequence fasta file"/-->
+	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
+	<param name="a" type="text" value="ATCTCGTATG" label="3' adapter sequence" />
+	<param name="mapnt" type="integer" value="8" label="minimum adapter map nts" />
+	<param name="min" type="integer" value="19" label="minimum microRNA length" />
+	<param name="max" type="integer" value="28" label="maximum microRNA length" />
+	<param name="mismatch" type="integer" value="0" label="number of allowed mismatches when mapping reads to genome" />
+
+	<conditional name="params">
+		<param name="annotate_rfam" type="select" label="annotate rfam nocoding RNAs(excluding miRNA)">
+		  <option value="yes" selected="true">yes</option>
+		  <option value="no">no</option>
+		 </param>
+		 <when value="yes">
+			<param name="rfam" type="data" label="rfam sequence file" />
+			<!--param type="data" name="idx2" label="rfam sequence bowtie index " -->
+			<param name="v" type="integer" value="0" label="report end-to-end hits less than v mismatches for rfam mapping"/>
+		 </when>
+    </conditional> <!-- params -->
+
+  </inputs>
+
+  <outputs>
+   <data format="html" name="preprocess result" from_work_dir="preProcess/preprocessResult.html" label="${tool.name} on ${on_string}: preprocess result"/>
+
+   <data format="txt" name="clean FASTA data" from_work_dir="preProcess/preProcess_clean/collapse_reads_$min_$max.fa" label="${tool.name} on ${on_string}: clean FASTA data"/>
+
+   <data format="txt" name="genome mapping result" from_work_dir="preProcess/genome_match/genome_mapped.bwt" label="${tool.name} on ${on_string}: genome mapping result"/>
+   <data format="txt" name="genome mapped FASTA reads" from_work_dir="preProcess/genome_match/genome_mapped.fa" label="${tool.name} on ${on_string}: genome mapped FASTA reads"/>
+
+   <data format="txt" name="Rfam mapping result" from_work_dir="preProcess/rfam_match/rfam_mapped.bwt" label="${tool.name} on ${on_string}: Rfam mapping result">
+   <filter>(params['annotate_rfam'] == 'Yes')</filter>
+   </data>
+   <data format="txt" name="Rfam mapped FASTA file" from_work_dir="preProcess/rfam_match/rfam_mapped.fa" label="${tool.name} on ${on_string}: Rfam mapped FASTA file">
+   <filter>(params['annotate_rfam'] == 'Yes')</filter>
+   </data>
+   <data format="txt" name="Rfam not mapped FASTA file" from_work_dir="preProcess/rfam_match/rfam_not_mapped.fa" label="${tool.name} on ${on_string}: Rfam not mapped FASTA file">
+   <filter>(params['annotate_rfam'] == 'Yes')</filter>
+   </data>
+  </outputs>
+
+ <help>
+
+ </help>
+ </tool>
--- a/precursors.pl	Thu Nov 13 22:43:35 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,829 +0,0 @@
-#!/usr/bin/perl -w
-#Filename:
-#Author: Tian Dongmei
-#Email: tiandm@big.ac.cn
-#Date: 2013/7/19
-#Modified:
-#Description:
-my $version=1.00;
-
-use strict;
-use Getopt::Long;
-#use RNA;
-
-my %opts;
-GetOptions(\%opts,"map=s","g=s","d:i","f:i","o=s","e:f","s=s","h");
-if (!(defined $opts{map} and defined $opts{g} and defined $opts{o} and defined $opts{s} ) || defined $opts{h}) { #necessary arguments
-&usage;
-}
-
-my $checkno=1;
-my $filein=$opts{'map'};
-my $faout=$opts{'o'};
-my $strout=$opts{'s'};
-my $genome= $opts{'g'};
-
-my $maxd=defined $opts{'d'} ? $opts{'d'} : 200;
-my $flank=defined $opts{'f'}? $opts{'f'} : 10;
-
-my $MAX_ENERGY=-18;
-if (defined $opts{'e'}) {$MAX_ENERGY=$opts{'e'};}
-my $MAX_UNPAIR=5;
-my $MIN_PAIR=15;
-my $MAX_SIZEDIFF=4;
-my $MAX_BULGE=2;
-my $ASYMMETRY=5;
-my $MIN_UNPAIR=0;
-my $MIN_SPACE=5;
-my $MAX_SPACE=$maxd;
-my $FLANK=$flank;
-
-######### load in genome sequences start ########
-my %genome;
-my %lng;
-my $name;
-open IN,"<$genome";
-while (my $aline=<IN>) {
-	chomp $aline;
-	next if($aline=~/^\#/);
-	if ($aline=~/^>(\S+)/) {
-		$name=$1;
-		next;
-	}
-	$genome{$name} .=$aline;
-}
-close IN;
-foreach my $key (keys %genome) {
-	$lng{$key}=length($genome{$key});
-}
-####### load in genome sequences end ##########
-
-my %breaks; ### reads number bigger than 3
-open IN,"<$filein"; #input file
-while (my $aline=<IN>) {
-	chomp $aline;
-	my @tmp=split/\t/,$aline;
-	$tmp[0]=~/_x(\d+)$/;
-	my $no=$1;
-	next if($no<3);
-	#my $trand=&find_strand($tmp[9]);
-	#my @pos=split/\.\./,$tmp[5];
-	my $end=$tmp[3]+length($tmp[4])-1;
-	if($tmp[1] eq "-"){$tmp[4]=revcom($tmp[4]);}
-	push @{$breaks{$tmp[2]}{$tmp[1]}},[$tmp[3],$end,$no,$tmp[4]]; ### 0 base
-}
-close IN;
-
-my %cites; ### peaks
-foreach my $chr (keys %breaks) {
-	foreach my $strand (keys %{$breaks{$chr}}) {
-		my @array=@{$breaks{$chr}{$strand}};
-		@array=sort{$a->[0]<=>$b->[0]} @array;
-		for (my $i=0;$i<@array;$i++) {
-			my $start=$array[$i][0];my $end=$array[$i][1];
-			my @subarray=();
-			push @subarray,$array[$i];
-
-			for (my $j=$i+1;$j<@array;$j++) {
-				if ($start<$array[$j][1] && $end>$array[$j][0]) {
-					push @subarray,$array[$j];
-					($start,$end)=&newpos($start,$end,$array[$j][0],$array[$j][1]);
-				}
-				else{
-					$i=$j;
-					&find_cites(\@subarray,$chr,$strand);
-					last;
-				}
-			}
-		}
-	}
-}
-
-open FA,">$faout"; #output file
-open STR,">$strout";
-foreach my $chr (keys %cites) {
-	foreach my $strand (keys %{$cites{$chr}}) {
-
-		my @array2=@{$cites{$chr}{$strand}};
-		@array2=sort{$a->[0]<=>$b->[0]} @array2;
-		&excise(\@array2,$chr,$strand);
-	}
-}
-close FA;
-close STR;
-sub oneCiteDn{
-	my ($array,$a,$chr,$strand)=@_;
-
-	my $ss=$$array[$a][0]-$flank;
-	$ss=0 if($ss<0);
-	my $ee=$$array[$a][1]+$maxd+$flank;
-	$ee=$lng{$chr} if($ee>$lng{$chr});
-
-	my $seq=substr($genome{$chr},$ss,$ee-$ss+1);
-	if($strand eq "-"){$seq=revcom($seq);}
-
-	my $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee);
-	return $val;
-}
-sub oneCiteUp{
-	my ($array,$a,$chr,$strand)=@_;
-
-	my $ss=$$array[$a][0]-$maxd-$flank;
-	$ss=0 if($ss<0);
-	my $ee=$$array[$a][1]+$flank;
-	$ee=$lng{$chr} if($ee>$lng{$chr});
-
-	my $seq=substr($genome{$chr},$ss,$ee-$ss+1);
-	if($strand eq "-"){$seq=revcom($seq);}
-
-	my $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee);
-	return $val;
-
-}
-
-sub twoCites{
-	my ($array,$a,$b,$chr,$strand)=@_;
-
-	my $ss=$$array[$a][0]-$flank;
-	$ss=0 if($ss<0);
-	my $ee=$$array[$b][1]+$flank;
-	$ee=$lng{$chr} if($ee>$lng{$chr});
-
-	my $seq=substr($genome{$chr},$ss,$ee-$ss+1);
-	if($strand eq "-"){$seq=revcom($seq);}
-
-#	my( $str,$mfe)=RNA::fold($seq);
-#	return 0 if($mfe>$MAX_ENERGY); ### minimum mfe
-	my $val=&ffw2($seq,$$array[$a][3],$$array[$b][3],$chr,$strand,$ss,$ee);
-
-	return $val;
-
-}
-sub excise{
-	my ($cluster,$chr,$strand)=@_;
-
-	my $last_pos=0;
-	for (my $i=0;$i<@{$cluster};$i++) {
-		next if($$cluster[$i][0]<$last_pos);
-		my $ok=0;
-		for (my $j=$i+1;$j<@{$cluster} ;$j++) {
-			if($$cluster[$j][0]-$$cluster[$i][1]>$maxd){
-				$i=$j;
-				last;
-			}else{
-				$ok=&twoCites($cluster,$i,$j,$chr,$strand);
-				if($ok){ $last_pos=$$cluster[$j][1]+$flank; $i=$j; last;}
-			}
-		}
-		next if($ok);
-
-		$ok=&oneCiteDn($cluster,$i,$chr,$strand);
-		if($ok){$last_pos=$$cluster[$i][1]+$maxd+$flank; next;}
-		$ok=&oneCiteUp($cluster,$i,$chr,$strand);
-		if($ok){$last_pos=$$cluster[$i][1]+$flank;next;}
-	}
-
-
-}
-
-sub ffw2{
-	my ($seq,$tag1,$tag2,$chr,$strand,$ss,$ee)=@_;
-
-	my $N_count=$seq=~tr/N//;  ## precursor sequence has not more than 5 Ns
-	if ($N_count > 5) {
-		return 0;
-	}
-
-	my $seq_length=length $seq;
-	# position tag1 and tag2
-	my $tag1_beg=index($seq,$tag1,0)+1;
-	if ($tag1_beg < 1) {
-		warn "[ffw2] coordinate error.\n";
-#		$fold->{reason}="coordinate error";
-		return 0;
-	}
-	my $tag2_beg=index($seq,$tag2,0)+1;
-	if ($tag2_beg < 1) {
-		warn "[ffw2] coordinate error.\n";
-#		$fold->{reason}="coordinate error";
-		return 0;
-	}
-	if ($tag2_beg < $tag1_beg) {
-		# swap tag1 and tag2
-		($tag1,$tag2)=($tag2,$tag1);
-		($tag1_beg,$tag2_beg)=($tag2_beg,$tag1_beg);
-	}
-	my $tag1_end=$tag1_beg+length($tag1)-1;
-	my $tag2_end=$tag2_beg+length($tag2)-1;
-	# re-clipping
-	my $beg=$tag1_beg-$FLANK; $beg=1 if $beg < 1;
-	my $end=$tag2_end+$FLANK; $end=$seq_length if $end > $seq_length;
-	$seq=substr($seq,$beg-1,$end-$beg+1);
-	$seq_length=length $seq;
-	# re-reposition
-	$tag1_beg=index($seq,$tag1,0)+1;
-	if ($tag1_beg < 1) {
-		warn "[ffw2] coordinate error.\n";
-#		$fold->{reason}="coordinate error";
-		return 0;
-	}
-
-	$tag2_beg=index($seq,$tag2,0)+1;
-	if ($tag2_beg < 1) {
-		warn "[ffw2] coordinate error.\n";
-#		$fold->{reason}="coordinate error";
-		return 0;
-	}
-	$tag1_end=$tag1_beg+length($tag1)-1;
-	$tag2_end=$tag2_beg+length($tag2)-1;
-
-	# fold
-	#my ($struct,$mfe)=RNA::fold($seq);
-		my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
-		my @rawfolds=split/\s+/,$rnafold;
-		my $struct=$rawfolds[1];
-		my $mfe=$rawfolds[-1];
-		$mfe=~s/\(//;
-		$mfe=~s/\)//;
-	#$mfe=sprintf "%.2f", $mfe;
-	if ($mfe > $MAX_ENERGY) {return 0;}
-
-	# tag1
-	my $tag1_length=$tag1_end-$tag1_beg+1;
-	my $tag1_struct=substr($struct,$tag1_beg-1,$tag1_length);
-	my $tag1_arm=which_arm($tag1_struct);
-	my $tag1_unpair=$tag1_struct=~tr/.//;
-	my $tag1_pair=$tag1_length-$tag1_unpair;
-	my $tag1_max_bulge=biggest_bulge($tag1_struct);
-	if ($tag1_arm ne "5p") { return 0;} # tag not in stem
-#	if ($tag1_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag1_unpair ($MAX_UNPAIR)"; return $pass}
-	if ($tag1_pair < $MIN_PAIR) {return 0;}
-	if ($tag1_max_bulge > $MAX_BULGE) {return 0;}
-
-	# tag2
-	my $tag2_length=$tag2_end-$tag2_beg+1;
-	my $tag2_struct=substr($struct,$tag2_beg-1,$tag2_length);
-	my $tag2_arm=which_arm($tag2_struct);
-	my $tag2_unpair=$tag2_struct=~tr/.//;
-	my $tag2_pair=$tag2_length-$tag2_unpair;
-	my $tag2_max_bulge=biggest_bulge($tag2_struct);
-	if ($tag2_arm ne "3p") {return 0;} # star not in stem
-#	if ($tag2_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag2_unpair ($MAX_UNPAIR)"; return $pass}
-	if ($tag2_pair < $MIN_PAIR) {return 0;}
-	if ($tag2_max_bulge > $MAX_BULGE) {return 0;}
-
-	# space size between miR and miR*
-	my $space=$tag2_beg-$tag1_end-1;
-	if ($space < $MIN_SPACE) {return 0;}
-	if ($space > $MAX_SPACE) {return 0;}
-
-	# size diff of miR and miR*
-	my $size_diff=abs($tag1_length-$tag2_length);
-	if ($size_diff > $MAX_SIZEDIFF) {return 0;}
-
-	# build base pairing table
-	my %pairtable;
-	&parse_struct($struct,\%pairtable); # coords count from 1
-
-	my $asy1=get_asy(\%pairtable,$tag1_beg,$tag1_end);
-	my $asy2=get_asy(\%pairtable,$tag2_beg,$tag2_end);
-	my $asy=($asy1 < $asy2) ? $asy1 : $asy2;
-	if ($asy > $ASYMMETRY) {return 0}
-
-	# duplex fold, determine whether two matures like a miR/miR* ike duplex
-	my ($like_mir_duplex1,$duplex_pair,$overhang1,$overhang2)=likeMirDuplex1($tag1,$tag2);
-	# parse hairpin, determine whether two matures form miR/miR* duplex in hairpin context
-	my ($like_mir_duplex2,$duplex_pair2,$overhang_b,$overhang_t)=likeMirDuplex2(\%pairtable,$tag1_beg,$tag1_end,$tag2_beg,$tag2_end);
-	if ($like_mir_duplex1==0 && $like_mir_duplex2==0) {
-		return 0;
-	}
-
-	print FA ">$chr:$strand:$ss..$ee\n$seq\n";
-	print STR ">$chr:$strand:$ss..$ee\n$seq\n$struct\t($mfe)\n";
-
-	return 1;
-}
-
-sub ffw1{
-	my ($seq,$tag,$chr,$strand,$ss,$ee)=@_;
-	my $pass=0;
-
-	my $N_count=$seq=~tr/N//;
-	if ($N_count > 5) {
-		return 0;
-	}
-
-	my $seq_length=length $seq;
-	my $tag_length=length $tag;
-
-	# position
-	my $tag_beg=index($seq,$tag,0)+1;
-	if ($tag_beg < 1) {
-		 warn "[ffw1] coordinate error.\n";
-		 return $pass;
-	}
-	my $tag_end=$tag_beg+length($tag)-1;
-
-
-	# define candidate precursor by hybrid short arm to long arm, not solid enough
-	my($beg,$end)=define_precursor($seq,$tag);
-	if (not defined $beg) {
-		return $pass;
-	}
-	if (not defined $end) {
-		return $pass;
-	}
-	$seq=substr($seq,$beg-1,$end-$beg+1);
-	$seq_length=length $seq;
-
-
-	# fold
-	#my ($struct,$mfe)=RNA::fold($seq);
-		my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
-		my @rawfolds=split/\s+/,$rnafold;
-		my $struct=$rawfolds[1];
-		my $mfe=$rawfolds[-1];
-		$mfe=~s/\(//;
-		$mfe=~s/\)//;
-
-	if ($mfe > $MAX_ENERGY) {
-		$pass=0;
-		return $pass;
-	}
-
-	# reposition
-	$tag_beg=index($seq,$tag,0)+1;
-	if ($tag_beg < 1) {
-		 warn "[ffw1] coordinate error.\n";
-		 return 0;
-	}
-	$tag_end=$tag_beg+length($tag)-1;
-
-	my $tag_struct=substr($struct,$tag_beg-1,$tag_length);
-	my $tag_arm=which_arm($tag_struct);
-	my $tag_unpair=$tag_struct=~tr/.//;
-	my $tag_pair=$tag_length-$tag_unpair;
-	my $tag_max_bulge=biggest_bulge($tag_struct);
-	if ($tag_arm eq "-") { return $pass;}
-#	if ($tag_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag_unpair ($MAX_UNPAIR)"; return $pass}
-	if ($tag_pair < $MIN_PAIR) { return $pass;}
-	if ($tag_max_bulge > $MAX_BULGE) {return $pass;}
-
-	# build base pairing table
-	my %pairtable;
-	&parse_struct($struct,\%pairtable); # coords count from 1
-
-	# get star
-	my ($star_beg,$star_end)=get_star(\%pairtable,$tag_beg,$tag_end);
-	my $star=substr($seq,$star_beg-1,$star_end-$star_beg+1);
-	my $star_length=$star_end-$star_beg+1;
-	my $star_struct=substr($struct,$star_beg-1,$star_end-$star_beg+1);
-	my $star_arm=which_arm($star_struct);
-	my $star_unpair=$star_struct=~tr/.//;
-	my $star_pair=$star_length-$star_unpair;
-	my $star_max_bulge=biggest_bulge($star_struct);
-	if ($star_arm eq "-") { return $pass;}
-#	if ($star_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$star_unpair ($MAX_UNPAIR)"; return $pass}
-	if ($star_pair < $MIN_PAIR) {return $pass;}
-	if ($star_max_bulge > $MAX_BULGE) {return $pass;}
-
-	if ($tag_arm eq $star_arm) {return $pass;}
-
-	# space size between miR and miR*
-	my $space;
-	if ($tag_beg < $star_beg) {
-		$space=$star_beg-$tag_end-1;
-	}
-	else {
-		$space=$tag_beg-$star_end-1;
-	}
-	if ($space < $MIN_SPACE) { return $pass;}
-	if ($space > $MAX_SPACE) { return $pass;}
-
-	# size diff
-	my $size_diff=abs($tag_length-$star_length);
-	if ($size_diff > $MAX_SIZEDIFF) { return $pass;}
-
-	# asymmetry
-	my $asy=get_asy(\%pairtable,$tag_beg,$tag_end);
-	if ($asy > $ASYMMETRY) {return $pass;}
-
-	$pass=1;
-	print FA ">$chr:$strand:$ss..$ee\n$seq\n";
-	print STR ">$chr:$strand:$ss..$ee\n$seq\n$struct\t($mfe)\n";
-	return $pass;
-
-}
-sub get_star {
-	my($table,$beg,$end)=@_;
-
-	my ($s1,$e1,$s2,$e2); # s1 pair to s2, e1 pair to e2
-	foreach my $i ($beg..$end) {
-		if (defined $table->{$i}) {
-			my $j=$table->{$i};
-			$s1=$i;
-			$s2=$j;
-			last;
-		}
-	}
-	foreach my $i (reverse ($beg..$end)) {
-		if (defined $table->{$i}) {
-			my $j=$table->{$i};
-			$e1=$i;
-			$e2=$j;
-			last;
-		}
-	}
-#	print "$s1,$e1 $s2,$e2\n";
-
-	# correct terminus
-	my $off1=$s1-$beg;
-	my $off2=$end-$e1;
-	$s2+=$off1;
-	$s2+=2; # 081009
-	$e2-=$off2; $e2=1 if $e2 < 1;
-	$e2+=2; $e2=1 if $e2 < 1; # 081009
-	($s2,$e2)=($e2,$s2) if ($s2 > $e2);
-	return ($s2,$e2);
-}
-
-sub define_precursor {
-	my $seq=shift;
-	my $tag=shift;
-
-	my $seq_length=length $seq;
-	my $tag_length=length $tag;
-	my $tag_beg=index($seq,$tag,0)+1;
-	my $tag_end=$tag_beg+$tag_length-1;
-
-	# split the candidate region into short arm and long arm
-	my $tag_arm;
-        my ($larm,$larm_beg,$larm_end);
-	my ($sarm,$sarm_beg,$sarm_end);
-        if ($tag_beg-1 < $seq_length-$tag_end) { # on 5' arm
-                $sarm=substr($seq,0,$tag_end);
-		$larm=substr($seq,$tag_end);
-		$sarm_beg=1;
-		$sarm_end=$tag_end;
-		$larm_beg=$tag_end+1;
-		$larm_end=$seq_length;
-		$tag_arm="5p";
-        }
-        else {
-            $larm=substr($seq,0,$tag_beg-1); # on 3' arm
-			$sarm=substr($seq,$tag_beg-1);
-			$larm_beg=1;
-			$larm_end=$tag_beg-1;
-			$sarm_beg=$tag_beg;
-			$sarm_end=$seq_length;
-			$tag_arm="3p";
-        }
-
-#	print "$sarm_beg,$sarm_end $sarm\n";
-#	print "$larm_beg,$larm_end $larm\n";
-
-	# clipping short arm
-	if ($tag_arm eq "5p") {
-		$sarm_beg=$tag_beg-$flank; $sarm_beg=1 if $sarm_beg < 1;
-		$sarm=substr($seq,$sarm_beg-1,$sarm_end-$sarm_beg+1);
-	}
-	else {
-		$sarm_end=$tag_end+$flank; $sarm_end=$seq_length if $sarm_end > $seq_length;
-		$sarm=substr($seq,$sarm_beg-1,$sarm_end-$sarm_beg+1);
-	}
-#	print "$sarm_beg,$sarm_end $sarm\n";
-#	print "$larm_beg,$larm_end $larm\n";
-
-	# define the precursor by hybriding short arm to long arm
-=cut #modify in 2014-10-28
-	my $duplex=RNA::duplexfold($sarm,$larm);
-	my $struct=$duplex->{structure};
-	my $energy=sprintf "%.2f", $duplex->{energy};
-	my ($str1,$str2)=split(/&/,$struct);
-	my $pair=$str1=~tr/(//;
-#	print "pair=$pair\n";
-	my $beg1=$duplex->{i}+1-length($str1);
-	my $end1=$duplex->{i};
-	my $beg2=$duplex->{j};
-	my $end2=$duplex->{j}+length($str2)-1;
-=cut
-###### new codes begin
-	my $duplex=`perl -e 'print "$sarm\n$larm"' | RNAduplex`;
-	#(.(.(((.....(((.&))))))...).).   1,16  :   1,13  (-7.20)
-	my @tmpduplex=split/\s+/,$duplex;
-	my $struct=$tmpduplex[0];
-	$tmpduplex[-1]=~s/[(|)]//g;
-	my $energy=$tmpduplex[-1];
-	my ($str1,$str2)=split(/&/,$struct);
-	my $pair=$str1=~tr/(//;
-	my ($beg1,$end1)=split/,/,$tmpduplex[1];
-	my ($beg2,$end2)=split/,/,$tmpduplex[3];
-######## new codes end
-
-#	print "$beg1:$end1 $beg2:$end2\n";
-	# transform coordinates
-	$beg1=$beg1+$sarm_beg-1;
-	$end1=$end1+$sarm_beg-1;
-	$beg2=$beg2+$larm_beg-1;
-	$end2=$end2+$larm_beg-1;
-#	print "$beg1:$end1 $beg2:$end2\n";
-
-	my $off5p=$beg1-$sarm_beg;
-	my $off3p=$sarm_end-$end1;
-	$beg2-=$off3p; $beg2=1 if $beg2 < 1;
-	$end2+=$off5p; $end2=$seq_length if $end2 > $seq_length;
-
-#	print "$beg1:$end1 $beg2:$end2\n";
-
-	my $beg=$sarm_beg < $beg2 ? $sarm_beg : $beg2;
-	my $end=$sarm_end > $end2 ? $sarm_end : $end2;
-
-	return if $pair < $MIN_PAIR;
-#	print "$beg,$end\n";
-	return ($beg,$end);
-}
-
-
-# duplex fold, judge whether two short seqs like a miRNA/miRNA* duplex
-sub likeMirDuplex1 {
-	my $seq1=shift;
-	my $seq2=shift;
-	my $like_mir_duplex=1;
-
-	my $length1=length $seq1;
-	my $length2=length $seq2;
-=cut
-	my $duplex=RNA::duplexfold($seq1, $seq2);
-	my $duplex_struct=$duplex->{structure};
-	my $duplex_energy=sprintf "%.2f", $duplex->{energy};
-	my ($str1,$str2)=split(/&/,$duplex_struct);
-	my $beg1=$duplex->{i}+1-length($str1);
-	my $end1=$duplex->{i};
-	my $beg2=$duplex->{j};
-	my $end2=$duplex->{j}+length($str2)-1;
-=cut
-	my $duplex=`perl -e 'print "$seq1\n$seq2"' | RNAduplex`;
-	#(.(.(((.....(((.&))))))...).).   1,16  :   1,13  (-7.20)
-	my @tmpduplex=split/\s+/,$duplex;
-	my $duplex_struct=$tmpduplex[0];
-	$tmpduplex[-1]=~s/[(|)]//g;
-	my $duplex_energy=$tmpduplex[-1];
-	my ($str1,$str2)=split(/&/,$duplex_struct);
-	#my $pair=$str1=~tr/(//;
-	my ($beg1,$end1)=split/,/,$tmpduplex[1];
-	my ($beg2,$end2)=split/,/,$tmpduplex[3];
-
-	# revise beg1, end1, beg2, end2
-	$str1=~/^(\.*)/;
-	$beg1+=length($1);
-	$str1=~/(\.*)$/;
-	$end1-=length($1);
-	$str2=~/^(\.*)/;
-	$beg2+=length($1);
-	$str2=~/(\.*)$/;
-	$end2-=length($1);
-
-	my $pair_num=$str1=~tr/(//;
-	my $overhang1=($length2-$end2)-($beg1-1); # 3' overhang at hairpin bottom
-	my $overhang2=($length1-$end1)-($beg2-1); # 3' overhang at hairpin neck
-#	print $pair_num,"\n";
-#	print $overhang1,"\n";
-#	print $overhang2,"\n";
-	if ($pair_num < 13) {
-		$like_mir_duplex=0;
-	}
-	if ($overhang1 < 0 || $overhang2 < 0 ) {
-		$like_mir_duplex=0;
-	}
-	if ($overhang1 > 4 || $overhang2 > 4) {
-		$like_mir_duplex=0;
-	}
-	return ($like_mir_duplex,$pair_num,$overhang1,$overhang2);
-}
-
-# judge whether two matures form miR/miR* duplex, in hairpin context
-sub likeMirDuplex2 {
-	my ($table,$beg1,$end1,$beg2,$end2)=@_;
-	my $like_mir_duplex=1;
-
-#	   s1         e1
-#   5 ----------------------------3
-#      | | |||| |||               |
-#3 -------------------------------5
-#      e2         s2
-
-	my $pair_num=0;
-	my $overhang1=0;
-	my $overhang2=0;
-	my ($s1,$e1,$s2,$e2);
-	foreach my $i ($beg1..$end1) {
-		if (defined $table->{$i}) {
-			my $j=$table->{$i};
-			if ($j <= $end2 && $j >= $beg2) {
-				$s1=$i;
-				$e2=$j;
-				last;
-			}
-		}
-	}
-	foreach my $i (reverse ($beg1..$end1)) {
-		if (defined $table->{$i}) {
-			my $j=$table->{$i};
-			if ($j <= $end2 && $j >= $beg2) {
-				$e1=$i;
-				$s2=$j;
-				last;
-			}
-		}
-	}
-
-#	print "$beg1,$end1 $s1,$e1\n";
-#	print "$beg2,$end2 $s2,$e2\n";
-
-	foreach my $i ($beg1..$end1) {
-		if (defined $table->{$i}) {
-			my $j=$table->{$i};
-			if ($j <= $end2 && $j >= $beg2) {
-				++$pair_num;
-			}
-		}
-	}
-	if (defined $s1 && defined $e2) {
-		$overhang1=($end2-$e2)-($s1-$beg1);
-	}
-	if (defined $e1 && defined $s2) {
-		$overhang2=($end1-$e1)-($s2-$beg2);
-	}
-
-	if ($pair_num < 13) {
-		$like_mir_duplex=0;
-	}
-	if ($overhang1 < 0 && $overhang2 < 0) {
-		$like_mir_duplex=0;
-	}
-	return ($like_mir_duplex,$pair_num,$overhang1,$overhang2);
-}
-sub parse_struct {
-	my $struct=shift;
-	my $table=shift;
-
-	my @t=split('',$struct);
-	my @lbs; # left brackets
-	foreach my $k (0..$#t) {
-		if ($t[$k] eq "(") {
-			push @lbs, $k+1;
-		}
-		elsif ($t[$k] eq ")") {
-			my $lb=pop @lbs;
-			my $rb=$k+1;
-			$table->{$lb}=$rb;
-			$table->{$rb}=$lb;
-		}
-	}
-	if (@lbs) {
-		warn "unbalanced RNA struct.\n";
-	}
-}
-sub which_arm {
-	my $substruct=shift;
-	my $arm;
-	if ($substruct=~/\(/ && $substruct=~/\)/) {
-		$arm="-";
-	}
-	elsif ($substruct=~/\(/) {
-		$arm="5p";
-	}
-	else {
-		$arm="3p";
-	}
-	return $arm;
-}
-sub biggest_bulge {
-	my $struct=shift;
-	my $bulge_size=0;
-	my $max_bulge=0;
-	while ($struct=~/(\.+)/g) {
-		$bulge_size=length $1;
-		if ($bulge_size > $max_bulge) {
-			$max_bulge=$bulge_size;
-		}
-	}
-	return $max_bulge;
-}
-sub get_asy {
-	my($table,$a1,$a2)=@_;
-	my ($pre_i,$pre_j);
-	my $asymmetry=0;
-	foreach my $i ($a1..$a2) {
-		if (defined $table->{$i}) {
-			my $j=$table->{$i};
-			if (defined $pre_i && defined $pre_j) {
-				my $diff=($i-$pre_i)+($j-$pre_j);
-				$asymmetry += abs($diff);
-			}
-			$pre_i=$i;
-			$pre_j=$j;
-		}
-	}
-	return $asymmetry;
-}
-
-sub peaks{
-	my @cluster=@{$_[0]};
-
-	return if(@cluster<1);
-
-	my $max=0; my $index=-1;
-	for (my $i=0;$i<@cluster;$i++) {
-		if($cluster[$i][2]>$max){
-			$max=$cluster[$i][2];
-			$index=$i;
-		}
-	}
-#	&excise(\@cluster,$index,$_[1],$_[2]);
-	return($index);
-}
-
-sub find_cites{
-	my @tmp=@{$_[0]};
-	my $i=&peaks(\@tmp);
-
-	my $start=$tmp[$i][0];
-	my $total=0; my $node5=0;
-	for (my $j=0;$j<@tmp ;$j++) {
-		$total+=$tmp[$j][2];
-		$node5 +=$tmp[$j][2] if($tmp[$j][0]-$start<=2 && $tmp[$j][0]-$start>=-2);
-	}
-	push @{$cites{$_[1]}{$_[2]}},$tmp[$i] if($node5/$total>0.80 && $tmp[$i][2]/$node5>0.5);
-}
-
-sub newpos{
-	my ($a,$b,$c,$d)=@_;
-	my $s= $a>$c ? $c : $a;
-	my $e=$b>$d ? $b : $d;
-	return($s,$e);
-}
-
-sub rev{
-
-    my($sequence)=@_;
-
-    my $rev=reverse $sequence;
-
-    return $rev;
-}
-
-sub com{
-
-    my($sequence)=@_;
-
-    $sequence=~tr/acgtuACGTU/TGCAATGCAA/;
-
-    return $sequence;
-}
-
-sub revcom{
-
-    my($sequence)=@_;
-
-    my $revcom=rev(com($sequence));
-
-    return $revcom;
-}
-
-sub find_strand{
-
-    #A subroutine to find the strand, parsing different blast formats
-    my($other)=@_;
-
-    my $strand="+";
-
-    if($other=~/-/){
-	$strand="-";
-    }
-
-    if($other=~/minus/i){
-	$strand="-";
-    }
-
-    return($strand);
-}
-sub usage{
-print <<"USAGE";
-Version $version
-Usage:
-$0 -map -g -d -f -o -s -e
-options:
-  -map input file# align result # bst. format
-  -g input file # genome sequence fasta format
-  -d <int>   Maximal space between miRNA and miRNA* (200)
-  -f <int>   Flank sequence length of miRNA precursor (10)
-  -o output file# percursor fasta file
-  -s output file# precursor structure file
-  -e <folat> Maximal free energy allowed for a miRNA precursor (-18 kcal/mol)
-
-  -h help
-USAGE
-exit(1);
-}
-
--- a/quantify.pl	Thu Nov 13 22:43:35 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,503 +0,0 @@
-#!/usr/bin/perl -w
-#Filename:
-#Author: Tian Dongmei
-#Email: tiandm@big.ac.cn
-#Date: 2013/7/19
-#Modified:
-#Description:
-my $version=1.00;
-
-use File::Path;
-use strict;
-use File::Basename;
-#use Getopt::Std;
-use Getopt::Long;
-#use RNA;
-
-my %opts;
-GetOptions(\%opts,"r=s","p=s","m=s","mis:i","t:i","e:i","f:i","tag:s","o=s","time:s","h");
-if (!(defined $opts{r} and defined $opts{p} and defined $opts{m} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
-&usage;
-}
-
-my $read=$opts{'r'};
-my $pre=$opts{'p'};
-my $mature=$opts{'m'};
-
-my $dir=$opts{'o'};
-unless ($dir=~/\/$/) {$dir .="/";}
-if (not -d $dir) {
-	mkdir $dir;
-}
-
-my $threads=defined $opts{'t'} ? $opts{'t'} : 1;
-my $mismatch=defined $opts{'mis'} ?  $opts{'mis'} : 0;
-
-my $upstream = 2;
-my $downstream = 5;
-
-$upstream = $opts{'e'} if(defined $opts{'e'});
-$downstream = $opts{'f'} if(defined $opts{'f'});
-
-my $marks=defined $opts{'tag'} ? $opts{'tag'} : "";
-
-my $time=Time();
-if (defined $opts{'time'}) { $time=$opts{'time'};}
-
-my $tmpdir="${dir}/miRNA_Express_${time}";
-if(not -d $tmpdir){
-	mkdir($tmpdir);
-}
-chdir $tmpdir;
-
-`cp $pre ./`;
-my $pre_file_name=basename($pre);
-
-&mapping(); # matures align to precursors && reads align to precursors;
-
-my %pre_mature; # $pre_mature{pre_id}{matre_ID}{"mature"}[0]->start; $pre_mature{pre_id}{matre_ID}{"mature"}[1]->end;
-&maturePosOnPre(); # acknowledge mature positions on precursor
-
-my %pre_read;
-&readPosOnPre(); # acknowledge reads positions on precursors
-
-if(!(defined $opts{'tag'})){
-	foreach my $key (keys %pre_read) {
-		$pre_read{$key}[0][0]=~/:([\d|_]+)_x(\d+)$/;
-		my @ss=split/_/,$1;
-		for (my $i=1;$i<=@ss;$i++) {
-			$marks .="Smp$i;";
-		}
-		last;
-	}
-}
-
-my %pre;## read in precursor sequences #$pre{pre_id}="CGTA...."
-&attachPre();
-
-my $preno=scalar (keys %pre);
-print "Total Precursor Number is $preno !!!!\n";
-
-my %struc; #mature  star  loop; $struc{$key}{"struc"}=$str; $struc{$key}{"mfe"}=$mfe;
-&structure();
-
-
-##### analysis and print out  && moRs
-my $aln=$dir."known_microRNA_express.aln";
-my $list=$dir."known_microRNA_express.txt";
-my $moRs=$dir."known_microRNA_express.moRs";
-
-system("ln $mature $dir/known_microRNA_mature.fa ");
-system("ln $pre $dir/known_microRNA_precursor.fa ");
-
-open ALN,">$aln";
-open LIST,">$list";
-open MORS,">$moRs";
-
-$"="\t"; ##### @array print in \t
-
-my @marks=split/\;/,$marks;
-#print LIST "#matueID\tpreID\tpos1\tpos2\tmatureExp\tstarExp\ttotalExp\n";
-print LIST "#matueID\tpreID\tpos1\tpos2";
-for (my $i=0;$i<@marks;$i++) {
-	print LIST "\t",$marks[$i],"_matureExp";
-}
-for (my $i=0;$i<@marks;$i++) {
-	print LIST "\t",$marks[$i],"_starExp";
-}
-for (my $i=0;$i<@marks;$i++) {
-	print LIST "\t",$marks[$i],"_totalExp";
-}
-print LIST "\n";
-print ALN "#>precursor ID \n#precursor sequence\n#precursor structure (mfe)\n#RNA_seq\t@marks\ttotal\n";
-print MORS "#>precursor ID\tstrand\texpress_reads\texpress_reads\/total_reads\tblock_number\tprecursor_sequence\n#\tblock_start\tblock_end\t@marks\ttotal\ttag_number\tsequence\n";
-my %moRs;
-
-foreach my $key (keys %pre) {
-	print ALN ">$key\n$pre{$key}\n$struc{$key}{struc}  ($struc{$key}{mfe})\n";
-	next if(! (exists $pre_read{$key}));
-	my @array=@{$pre_read{$key}};
-	@array=sort{$a->[3]<=> $b->[3]} @array;
-
-	my $length=length($pre{$key});
-
-	my $maxline=-1;my $max=0; ### storage the maxinum express read line
-	my $totalReadsNo=0;
-	my @not_over=(); ### new read format better for moRs analysis
-
-####print out Aln file start
-	for (my $i=0;$i<@array;$i++) {
-		my $maps=$array[$i][3]+1;
-		my $mape=$array[$i][3]+length($array[$i][4]);
-		my $str="";
-		$str .= "." x ($maps-1);
-		$str .=$array[$i][4];
-		$str .="." x ($length-$mape);
-		$str .="        ";
-
-		$array[$i][0]=~/:([\d|_]+)_x(\d+)$/;
-		my @sample=split /\_/,$1;
-		my $total=$2;
-		print ALN $str,"@sample","\t",$total,"\n";
-
-		if($total>$max){$max=$total; $maxline=$i;}
-		$totalReadsNo+=$total;
-
-		push @not_over,[$key,$maps,$mape,$array[$i][0],$total,"+"];
-	}
-####print out Aln file end
-
-#### express list start
-	my ($ms,$me,$ss,$se);
-	if (!(exists($pre_mature{$key}))) {
-		$ms=$array[$maxline][3]+1;
-		$me=$array[$maxline][3]+length($array[$maxline][4]);
-		($ss,$se)=&other_pair($ms,$me,$struc{$key}{'struc'});
-
-		my ($mexp,$sexp,$texp)=&express($ms-$upstream,$me+$downstream,$ss-$upstream,$se+$downstream,\@array);
-		print LIST "$key\t$key\tmature:$ms..$me\tstar:$ss..$se\t@$mexp\t@$sexp\t@$texp\n";
-	}
-	else{
-		foreach my $maID (keys %{$pre_mature{$key}}) {
-			$ms=$pre_mature{$key}{$maID}{"mature"}[0];
-			$me=$pre_mature{$key}{$maID}{"mature"}[1];
-			$ss=$pre_mature{$key}{$maID}{"star"}[0];
-			$se=$pre_mature{$key}{$maID}{"star"}[1];
-			my ($mexp,$sexp,$texp)=&express($ms-$upstream,$me+$downstream,$ss-$upstream,$se+$downstream,\@array);
-			print LIST "$maID\t$key\tmature:$ms..$me\tstar:$ss..$se\t@$mexp\t@$sexp\t@$texp\n";
-		}
-	}
-#### express list end
-
-#### analysis moRs start
-	my @result; my @m_texp;my $m_texp=0; ### moRs informations
-
-	while (@not_over>0) {
-		my @over=@not_over;
-		@not_over=();
-
-#������tag
-		my $m_max=0;my $m_maxline=-1;my $m_start=0;my $m_end=0;my $m_exp=0;my @m_exp;my $m_no=1;
-		for (my $i=0;$i<@over;$i++) {
-			my @m_array=@{$over[$i]};
-			if ($m_max<$m_array[4]) {
-				$m_max=$m_array[4];
-				$m_maxline=$i;
-			}
-		}
-		$m_start=$over[$m_maxline][1];
-		$m_end=$over[$m_maxline][2];
-		$m_exp=$m_max;
-		$over[$m_maxline][3]=~/:([\d|_]+)_x(\d+)$/;
-		my @m_nums=split/_/,$1;
-		for (my $j=0;$j<@m_nums;$j++) {
-			$m_exp[$j]=$m_nums[$j];
-		}
-
-#ͳ���Է�����tagΪ�����reads, ����λ�ò��첻����3nt
-		for (my $i=0;$i<@over;$i++) {
-			next if($i==$m_maxline);
-			my @m_array=@{$over[$i]};
-			if (abs($m_array[1]-$m_start)<=3 && abs($m_array[2]-$m_end)<=3) {
-				$m_exp+=$m_array[4];
-				$m_no++;
-				$m_array[3]=~/:([\d|_]+)_x(\d+)$/;
-				my @m_nums=split/_/,$1;
-				for (my $j=0;$j<@m_nums;$j++) {
-					$m_exp[$j] +=$m_nums[$j];
-				}
-			}
-			elsif($m_array[1]>=$m_end || $m_array[2]<=$m_start){push @not_over,[@{$over[$i]}];} #ȥ����Խblock��reads
-		}
-		if($m_exp>5){### 5��reads
-			$m_texp+=$m_exp;
-			for (my $j=0;$j<@m_exp;$j++) {
-				$m_texp[$j]+=$m_exp[$j];
-			}
-			my $string=&subseq($pre{$key},$m_start,$m_end,"+");
-			push @result,"\t$m_start\t$m_end\t@m_exp\t$m_exp\t$m_no\t$string" ;
-		}
-	}
-
-	my $str=scalar @result;
-	my $percent=sprintf("%.2f",$m_texp/$totalReadsNo);
-	$str=">$key\t+\t$m_texp\t$percent\t".$str."\t$pre{$key}";
-	@{$moRs{$str}}=@result;
-
-#### analysis moRs end
-}
-
-##### moRs print out start
-foreach my $key (keys %moRs) {
-	my @tmp=split/\t/,$key;
-	next if ($tmp[4]<=2);
-	next if($tmp[3]<0.95);
-	my @over;
-	for (my $i=0;$i<@{$moRs{$key}};$i++) {
-		my @arrayi=split/\t/,$moRs{$key}[$i];
-		for (my $j=0;$j<@{$moRs{$key}};$j++) {
-			next if($i==$j);
-			my @arrayj=split/\t/,$moRs{$key}[$j];
-			if ((($arrayj[1]-$arrayi[2]>=0 && $arrayj[1]-$arrayi[2] <=3) || ($arrayj[1]-$arrayi[2]>=18 && $arrayj[1]-$arrayi[2] <=25) )||(($arrayi[1]-$arrayj[2]>=0 && $arrayi[1]-$arrayj[2] <=3)||($arrayi[1]-$arrayj[2]>=18 && $arrayi[1]-$arrayj[2] <=25))) {
-				push @over,$moRs{$key}[$i];
-			}
-		}
-	}
-	if (@over>0) {
-		print MORS "$key\n";
-		foreach  (@{$moRs{$key}}) {
-			print MORS "$_\n";
-		}
-	}
-}
-###### moRs print out end
-close ALN;
-close LIST;
-close MORS;
-
-$"=" ";##### reset
-
-
-################### Sub programs #################
-sub express{
-	my ($ms,$me,$ss,$se,$read)=@_;
-	my (@mexp,@sexp,@texp);
-	$$read[0][0]=~/:([_|\d]+)_x(\d+)$/;
-	my @numsample=split/_/,$1;
-	for (my $i=0;$i<@numsample;$i++) {
-		$mexp[$i]=0;
-		$sexp[$i]=0;
-		$texp[$i]=0;
-	}
-
-	for (my $i=0;$i<@{$read};$i++) {
-		my $start=$$read[$i][3]+1;
-		my $end=$$read[$i][3]+length($$read[$i][4]);
-		$$read[$i][0]=~/:([_|\d]+)_x(\d+)$/;
-		my $expresses=$1;
-		my @nums=split/_/,$expresses;
-
-		for (my $j=0;$j<@nums;$j++) {
-			$texp[$j]+=$nums[$j];
-		}
-		if ($start>=$ms && $end<=$me) {
-			for (my $j=0;$j<@nums;$j++) {
-				$mexp[$j]+=$nums[$j];
-			}
-		}
-		if ($start>=$ss && $end<=$se) {
-			for (my $j=0;$j<@nums;$j++) {
-				$sexp[$j]+=$nums[$j];
-			}
-		}
-	}
-	return(\@mexp,\@sexp,\@texp);
-}
-
-sub structure{
-	foreach my $key (keys %pre_mature) {
-		if (!(defined $pre{$key})){die "!!!!! No precursor sequence $key, please check it!\n";}
-		#my ($str,$mfe)=RNA::fold($pre{$key});
-		my $rnafold=`perl -e 'print "$pre{$key}"' | RNAfold --noPS`;
-		my @rnafolds=split/\s+/,$rnafold;
-		my $str=$rnafolds[1];
-		my $mfe=$rnafolds[-1];
-		$mfe=~s/\(//;
-		$mfe=~s/\)//;
-
-		$struc{$key}{"struc"}=$str;
-		#$struc{$key}{"mfe"}=sprintf ("%.2f",$mfe);
-		$struc{$key}{"mfe"}=$mfe;
-
-		foreach my $id (keys %{$pre_mature{$key}}) {
-			($pre_mature{$key}{$id}{"star"}[0],$pre_mature{$key}{$id}{"star"}[1])=&other_pair($pre_mature{$key}{$id}{"mature"}[0],$pre_mature{$key}{$id}{"mature"}[1],$str);
-		}
-=cut
-##### Nucleotide complementary
-		my @tmp=split//,$str;
-		my %a2b;
-		my @bps;
-		for (my $i=0;$i<@tmp;$i++) {
-			if ($tmp[$i] eq "("){push @bps,$i+1 ; next;}
-			if ($tmp[$i] eq ")") {
-				my $up=pop @bps;
-				$a2b{$i+1}=$up;
-				$a2b{$up}=$i+1;
-			}
-		}
-
-##### search star position
-		foreach my $id (keys %{$pre_mature{$key}}) {
-			my $n=0;
-			for (my $i=$pre_mature{$key}{$id}{"mature"}[0];$i<=$pre_mature{$key}{$id}{"mature"}[1] ; $i++) {
-				if (defined $a2b{$i}) {
-					my $a=$i; my $b=$a2b{$i};
-					if($a>$b){
-						$pre_mature{$key}{$id}{"star"}[0]=$b-$n+2;
-						$pre_mature{$key}{$id}{"star"}[1]=$b-$n+2+($pre_mature{$key}{$id}{"mature"}[1]-$pre_mature{$key}{$id}{"mature"}[0]);
-					}
-					if($a<$b{
-						$pre_mature{$key}{$id}{"star"}[1]=$b+$n+2;
-						$pre_mature{$key}{$id}{"star"}[0]=$b+$n+2-($pre_mature{$key}{$id}{"mature"}[1]-$pre_mature{$key}{$id}{"mature"}[0]);
-					}
-					last;
-				}
-				$n++;
-			}
-		}
-=cut
-	}
-}
-sub other_pair{
-	my ($start,$end,$structure)=@_;
-	##### Nucleotide complementary
-	my @tmp=split//,$structure;
-	my %a2b;	my @bps;
-	for (my $i=0;$i<@tmp;$i++) {
-		if ($tmp[$i] eq "("){push @bps,$i+1 ; next;}
-		if ($tmp[$i] eq ")") {
-			my $up=pop @bps;
-			$a2b{$i+1}=$up;
-			$a2b{$up}=$i+1;
-		}
-	}
-##### search star position
-	my $n=0;my $startpos; my $endpos;
-	for (my $i=$start;$i<=$end ; $i++) {
-		if (defined $a2b{$i}) {
-			my $a=$i; my $b=$a2b{$i};
-#			if($a>$b){
-#				$startpos=$b-$n+2;
-#				$endpos=$b-$n+2+($end-$start);
-#			}
-#			if($a<$b){
-				$endpos=$b+$n+2;
-				if($endpos>length($structure)){$endpos=length($structure);}
-				$startpos=$b+$n+2-($end-$start);
-				if($startpos<1){$startpos=1;}
-#			}
-			last;
-		}
-		$n++;
-	}
-	return ($startpos,$endpos);
-}
-sub attachPre{
-	open IN, "<$pre_file_name";
-	my $name;
-	while (my $aline=<IN>) {
-		chomp $aline;
-		if ($aline=~/^>(\S+)/) {
-			$name=$1;
-			next;
-		}
-		$pre{$name} .=$aline;
-	}
-	close IN;
-}
-sub readPosOnPre{
-	open IN,"<read_mapped.bwt";
-	while (my $aline=<IN>) {
-		chomp $aline;
-		my @tmp=split/\t/,$aline;
-		my $id=lc($tmp[2]);
-		push @{$pre_read{$tmp[2]}},[@tmp];
-	}
-	close IN;
-}
-sub maturePosOnPre{
-	open IN,"<mature_mapped.bwt";
-	while (my $aline=<IN>) {
-		chomp $aline;
-		my @tmp=split/\t/,$aline;
-		my $mm=$tmp[0];
-#		$mm=~s/\-3P|\-5P//i;
-		$mm=lc($mm);
-		my $pm=$tmp[2];
-		$pm=lc($pm);
-
-#		next if ($mm ne $pm);### stringent mapping let7a only allowed to map pre-let7a
-		next if($mm!~/$pm/);
-#		print "$tmp[2]\t$tmp[0]\n";
-#		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]-$upstream;
-#		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=0 if($pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]<0);
-#		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4])-1+$downstream;
-		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]+1;
-		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4]);
-	}
-	close IN;
-}
-sub mapping{
-    my $err;
-## build bowtie index
-    #print STDERR "building bowtie index\n";
-    $err = `bowtie-build $pre_file_name miRNA_precursor`;
-
-## map mature sequences against precursors
-    #print STDERR "mapping mature sequences against index\n";
-	$err = `bowtie -p $threads -f -v 0 -a --best --strata --norc miRNA_precursor $mature > mature_mapped.bwt 2> run.log`;
-
-## map reads against precursors
-    #print STDERR "mapping read sequences against index\n";
-    $err=`bowtie -p $threads -f -v $mismatch -a --best --strata --norc miRNA_precursor $read --al mirbase_mapped.fa --un mirbase_not_mapped.fa > read_mapped.bwt 2> run.log`;
-
-}
-
-sub subseq{
-	my $seq=shift;
-	my $beg=shift;
-	my $end=shift;
-	my $strand=shift;
-
-	my $subseq=substr($seq,$beg-1,$end-$beg+1);
-	if ($strand eq "-") {
-		$subseq=revcom($subseq);
-	}
-	return uc $subseq;
-}
-
-sub revcom{
-	my $seq=shift;
-	$seq=~tr/ATCGatcg/TAGCtagc/;
-	$seq=reverse $seq;
-	return uc $seq;
-}
-
-sub Time{
-        my $time=time();
-        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
-        $month++;
-        $year+=1900;
-        if (length($sec) == 1) {$sec = "0"."$sec";}
-        if (length($min) == 1) {$min = "0"."$min";}
-        if (length($hour) == 1) {$hour = "0"."$hour";}
-        if (length($day) == 1) {$day = "0"."$day";}
-        if (length($month) == 1) {$month = "0"."$month";}
-        #print "$year-$month-$day $hour:$min:$sec\n";
-        return("$year-$month-$day-$hour-$min-$sec");
-}
-
-sub usage{
-print <<"USAGE";
-Version $version
-Usage:
-$0 -r -p -m -mis -t -e -f -tag -o -time
-mandatory parameters:
--p precursor.fa  miRNA precursor sequences from miRBase # must be absolute path
--m mature.fa     miRNA sequences from miRBase # must be absolute path
--r reads.fa      your read sequences #must be absolute path
-
--o output directory
-
-options:
--mis [int]     number of allowed mismatches when mapping reads to precursors, default 0
--t [int]     threads number,default 1
--e [int]     number of nucleotides upstream of the mature sequence to consider, default 2
--f [int]     number of nucleotides downstream of the mature sequence to consider, default 5
--tag [string] sample marks# eg. sampleA;sampleB;sampleC
--time sting #make directory time,default is the local time
--h help
-USAGE
-exit(1);
-}
-
--- a/rfam.pl	Thu Nov 13 22:43:35 2014 -0500
+++ b/rfam.pl	Wed Dec 03 01:54:29 2014 -0500
@@ -12,7 +12,7 @@
 use File::Basename;

 my %opts;
-GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","time:s","h");
+GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","h");
 if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
 &usage;
 }
@@ -31,10 +31,8 @@


 my $time=Time();
-if (defined $opts{'time'}) {
-	$time=$opts{'time'};
-}
-my $mapdir=$dir."/rfam_match_".$time;
+
+my $mapdir=$dir."/rfam_match";
 if(not -d $mapdir){
 	mkdir $mapdir;
 }
@@ -95,7 +93,6 @@
 -p/--threads <int> number of alignment threads to launch (default: 1)

 -o output directory
--time sting #make directory time,default is the local time
 -h help
 USAGE
 exit(1);
--- a/tool_dependencies.xml	Thu Nov 13 22:43:35 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,48 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="fastx_toolkit" version="0.0.13">
-        <repository changeset_revision="ec66ae4c269b" name="package_fastx_toolkit_0_0_13" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <package name="bowtie" version="0.12.7">
-        <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-    <set_environment version="1.0">
-        <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>
-    </set_environment>
-	<!--package name="R" version="3.0.1">
-	   <repository name="package_r_3_0_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
-	</package!-->
-
-	<package name="ViennaRNA" version="2.1.8">
-	   <install version="1.0">
-		   <actions>
-		     <action type="download_by_url">http://www.tbi.univie.ac.at/RNA/packages/source/ViennaRNA-2.1.8.tar.gz</action>
-			 <action type="shell_command">./configure --prefix=$INSTALL_DIR </action>
-			 <action type="shell_command">make</action>
-			 <action type="shell_command">make install</action>
-			 <action type="set_environment">
-			   <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
-			 </action>
-		   </actions>
-	   </install>
-	</package>
-
-	<package name="SVG" version="2.59">
-	   <install version="1.0">
-		   <actions>
-		     <action type="download_by_url">http://www.cpan.org/authors/id/S/SZ/SZABGAB/SVG-2.59.tar.gz</action>
-			<action type="make_directory">$INSTALL_DIR/lib/perl5</action>
-			<action type="shell_command">
-				perl Makefile.PL INSTALL_BASE=$INSTALL_DIR &amp;&amp;
-				make &amp;&amp;
-				make install
-			</action>
-			<action type="set_environment">
-				<environment_variable action="append_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5/:$INSTALL_DIR/lib/perl5/x86_64-linux-gnu-thread-multi/</environment_variable>
-			</action>
-		   </actions>
-	   </install>
-	</package>
-
-
-</tool_dependency>