changeset 0:87fe81de0931 draft default tip

Uploaded
author bigrna
date Sun, 04 Jan 2015 02:47:25 -0500
parents
children
files Annotate.pl ClassAnnotate.pl DEGseq_2.pl Length_Distibution.pl SampleDEGseqMerge.pl collapseReads2Tags.pl conventional.pl convert_bowtie_to_blast.pl count_ref_length.pl count_rfam_express.pl filterReadsByCount.pl filterReadsByLength.pl get_genelist.pl html_miRPlant.pl html_preprocess.pl html_siRNA.pl install_DEG.R matching.pl miRDeep_plant.pl miRNA_Express_and_sequence.pl microRNA.pl microRNA.xml microRNA_pipeline.pl microRNA_pipeline.xml nibls.pl non_miRNA_reads.pl phased_siRNA.pl preProcess.pl preProcess.xml preProcess_core.pl precursors.pl quantify.pl quantify_siRNA.pl rfam.pl sRNA_plot.pl sam2Bed_bowtie.pl siRNA.pl siRNA.xml siRNA_pipeline.pl siRNA_pipeline.xml tool-data/bowtie_indices.loc.sample tool-data/nat_annotate.loc.sample tool-data/repeat_annotate.loc.sample tool-data/rfam_bowtie_indices.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 46 files changed, 13025 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Annotate.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,178 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Chentt
+#Email: chentt@big.ac.cn
+#Date: 2014/4/10
+#Modified:
+#Description: cluster annotate by priority
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","d=i","g=s","o=s","t=s","h");
+if (!(defined $opts{i} and defined $opts{g} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+#my $genelistout=$opts{'t'};
+my $dis=defined $opts{'d'}? $opts{'d'}:1000;
+my %gene;
+
+#open OUT,">$genelistout"; #output file  
+#print OUT "#ID\tchr\tstart\tend\tstrand\ns";
+open IN,"<$opts{g}";
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\#/);
+	my @tmp=split/\t/,$aline;#ID chr start end strand
+	#push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]];
+	$gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[4]];
+}
+#while (my $aline=<IN>) {
+#	chomp $aline;
+#	next if($aline=~/^\#/);
+#	my @tmp=split/\t/,$aline;
+#	my $ID;
+#	if ($tmp[2] eq "gene") {
+#		$tmp[0]=~s/Chr\./Chr/;
+#		$tmp[0]=~s/Chr/chr/;
+#		my @infor=split/;/,$tmp[8];
+#		for (my $i=0;$i<@infor ;$i++) {
+#			if ($infor[$i]=~/Alias\=(\S+)$/) {
+#				$ID=$1;
+#				last;
+#			}
+#		}
+#		$gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand]
+#		print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n";
+#	}
+#}
+close IN;
+#close OUT;
+
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'o'};
+
+open IN,"<$filein"; #input file  
+open OUT,">$fileout"; #output file  
+while (my $aline=<IN>) {
+	chomp $aline;
+	my @tmp=split/\t/,$aline;
+	if($aline=~/^\#/){print OUT "$aline\tP_annotate\n";next}
+	my @result;
+	#shift @tmp;
+	my @id=split/:/,$tmp[0];
+	$id[0]=~s/Chr0/Chr/;
+	my @posi=split/-/,$id[1];
+	foreach my $key (keys %{$gene{$id[0]}}) {
+		if ($posi[0]<$gene{$id[0]}{$key}[1] && $posi[1]>$gene{$id[0]}{$key}[0]) {
+			push @result,"gene-body;$key;$gene{$id[0]}{$key}[2]";#$te{$key}";
+			next;
+		}
+		#if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-1000) {
+		if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-$dis) {
+			push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+");
+			push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-");
+			next;
+		}
+		#if ($posi[0]<$gene{$id[0]}{$key}[1]+1000 && $posi[1]>$gene{$id[0]}{$key}[1]) {
+		if ($posi[0]<$gene{$id[0]}{$key}[1]+$dis && $posi[1]>$gene{$id[0]}{$key}[1]) {
+			push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+");
+			push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-");
+			next;
+		}
+	}
+	my $result;
+	if (!(@result)) {
+		$result="intergenic";
+	}
+	elsif($#result==0){
+		$result=$result[0];
+	
+	}
+	else{
+		$result=join "\t",@result;
+	}
+#	else{
+#		my $te_num=0;
+#		my @te_overlap;
+#		my @te_up_down;
+#		my @non_overlap;
+#		my @non_up_down;
+#		for (my $k=0;$k<@result ;$k++) {
+#			my @rr=split/\;/,$result[$k];
+#			if ($rr[3] eq "Y") {
+#				$te_num++;
+#				if ($rr[0] eq "overlap") {
+#					push @te_overlap,$result[$k];
+#				}
+#				else{
+#					push @te_up_down,$result[$k];
+#				}
+#			}
+#			else{
+#				if ($rr[0] eq "overlap") {
+#					push @non_overlap,$result[$k];
+#				}
+#				else{
+#					push @non_up_down,$result[$k];
+#				}
+#			}
+#		}
+#		if ($te_num==0) {#non TE 
+#			if (!(@te_overlap)) {#down up
+#				if ($#non_up_down==0) {
+#					$result=$non_up_down[0];
+#				}
+#				else{#overlap
+#					my $all_2=join "\t",@non_up_down;
+#					$result="up&down1-kb\t".$all_2;
+#				}
+#			}
+#			else{
+#				$result=join "\t",@non_overlap;
+#				if ($#non_overlap>=1) {
+#					print "$aline\t$result\n";
+#				}
+#			}
+#		}
+#		else{#TE
+#			if (!(@te_overlap)) {#down up
+#				if ($#te_up_down==0) {
+#					$result=$te_up_down[0];
+#				}
+#				else{#overlap
+#					my $all_2=join "\t",@te_up_down;
+#					$result="up&down1-kb\t".$all_2;
+#				}
+#			}
+#			else{
+#				$result=join "\t",@te_overlap;
+#				if ($#te_overlap>=1) {
+#					print "$aline\t$result\n";
+#				}
+#			}
+#		}
+#	}
+	print OUT "$aline\t$result\n";
+}
+
+close IN;
+close OUT;
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o -g -d 
+options:
+-i input file
+-g genelist file
+-d int the length of the upstream and downstream,default 1000
+-o output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ClassAnnotate.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,251 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Chen Tingting
+#Email: chentt@big.ac.cn
+#Date: 2014/5/13
+#Modified:
+#Description: cluster annotate
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","g=s","n=s","r=s","p=s","o=s","t=s","l=s","h");
+if (!(defined $opts{i} and defined $opts{g} and defined $opts{n} and defined $opts{r} and defined $opts{p} and defined $opts{o} and defined $opts{t}  and defined $opts{l}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+#my %gene;
+my %gene1;
+open IN,"<$opts{g}";
+open OUT ,">$opts{l}";
+print OUT "#ID\tchr\tstart\tend\tstrand\n";
+my $n=1;
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\#/);
+	my @tmp=split/\t/,$aline;
+	my $ID;
+	if ($tmp[2] eq "gene") {
+		$tmp[0]=~s/Chr\./Chr/;
+		#$tmp[0]=~s/Chr/chr/;
+		my @infor=split/;/,$tmp[8];
+		for (my $i=0;$i<@infor ;$i++) {
+			if ($infor[$i]=~/Alias\=(\S+)$/) {
+				$ID=$1;
+				last;
+			}
+			else {
+				$ID="unknown$n";
+				$n++;
+			}
+		}
+		#$gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand]
+		push @{$gene1{$ID}},[$tmp[3],$tmp[4],$tmp[0]];
+		print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n";
+	}
+}
+#while (my $aline=<IN>) {
+#	chomp $aline;
+#	next if($aline=~/^\#/);
+#	my @tmp=split/\t/,$aline;#ID chr start end strand
+#	push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]];
+#	#$gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[1]];
+#}
+close IN;
+close OUT;
+
+my %nat;
+open TMP,">$opts{t}";
+print TMP "#NAT_ID\tGene\tStrand\tChr\tGene_start\tGene_end\tAntiGene\tStrand\tChr\tAntiGene_start\tAntiGene_end\tType1\tType2\tNATS1_start\tNATS1_end\tNATS2_start\tNATS2_end\n";
+
+open IN,"<$opts{n}";
+my $nat_n=1;
+while (my $aline=<IN>) {
+	next if($aline=~/^\#/);#osj LOC_Os05g02659 - LOC_Os01g24200 + trans 1559 1802 660 905 246 100nt -
+	chomp $aline;
+	my @arr=split /\t/,$aline;
+	my ($ns,$ne,$ns2,$ne2)=(0,0,0,0);
+	if ($arr[11]=~/Nearby/) {
+		($ns,$ne)=&nearby($gene1{$arr[1]}[0][0],$gene1{$arr[1]}[0][1],$gene1{$arr[3]}[0][0],$gene1{$arr[3]}[0][1]);
+		push @{$nat{$gene1{$arr[1]}[0][2]}},[$ns,$ne,$arr[5],$arr[11],"NATs".$nat_n];
+		print TMP "NATs$nat_n\t$arr[1]\t$arr[2]\t$gene1{$arr[1]}[0][2]\t$gene1{$arr[1]}[0][0]\t$gene1{$arr[1]}[0][1]\t$arr[3]\t$arr[4]\t$gene1{$arr[3]}[0][2]\t$gene1{$arr[3]}[0][0]\t$gene1{$arr[3]}[0][1]\t$arr[5]\t$arr[11]\t$ns\t$ne\t$ns\t$ne\n";
+		$nat_n++;
+	}else{
+		$ns=$gene1{$arr[1]}[0][0]+$arr[6]-1;
+		$ne=$gene1{$arr[1]}[0][0]+$arr[7]-1;
+		$ns2=$gene1{$arr[3]}[0][1]-$arr[9]+1;
+		$ne2=$gene1{$arr[3]}[0][1]-$arr[8]+1;
+		push @{$nat{$gene1{$arr[1]}[0][2]}},[$ns,$ne,$arr[5],$arr[11],"NATs$nat_n"."_1"];#start,end,class1,class2
+		push @{$nat{$gene1{$arr[3]}[0][2]}},[$ns2,$ne2,$arr[5],$arr[11],"NATs$nat_n"."_2"];
+		print TMP "NATs$nat_n\t$arr[1]\t$arr[2]\t$gene1{$arr[1]}[0][2]\t$gene1{$arr[1]}[0][0]\t$gene1{$arr[1]}[0][1]\t$arr[3]\t$arr[4]\t$gene1{$arr[3]}[0][2]\t$gene1{$arr[3]}[0][0]\t$gene1{$arr[3]}[0][1]\t$arr[5]\t$arr[11]\t$ns\t$ne\t$ns2\t$ne2\n";
+		$nat_n++;
+	}
+}
+close IN;
+close TMP;
+
+my %repeat;
+open IN,"<$opts{'r'}";
+my $first=<IN>;
+$first=<IN>;
+$first=<IN>;
+while (my $aline=<IN>) {
+	chomp $aline;
+	$aline=~s/^\s+//;
+	my @tmp=split/\s+/,$aline;
+	$tmp[4]=~s/chr0/Chr/;
+	$tmp[4]=~s/chr/Chr/;
+	push @{$repeat{$tmp[4]}},[$tmp[5],$tmp[6],$tmp[10]];#start,end,class
+	#print "$tmp[4]\t$tmp[5]\t$tmp[6]\t$tmp[10]\n";
+}
+close IN;
+
+my %phase;
+open IN,"<$opts{'p'}";
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\#/);
+	my @tmp=split/\t/,$aline;
+	if ($tmp[5]>=25) {
+		$phase{$tmp[0]}=$tmp[5];
+	}
+}
+close IN;
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'o'};
+open IN,"<$filein"; #input file  
+open OUT,">$fileout"; #output file  
+while (my $aline=<IN>) {
+	chomp $aline;
+	if($aline=~/^\#/){
+		print OUT "$aline\tPhase\tLong\tRepeatClass\tNatClass1\tNatClass2\tNatID\n";
+		next;
+	}
+	my @tmp=split/\t/,$aline;
+	my @inf=split/\:/,$tmp[0];
+	my @pos=split/\-/,$inf[1];
+	my $chr=$inf[0];
+	$chr=~s/Chr0/Chr/;
+	my $start=$pos[0];
+	my $end=$pos[1];
+	#=========Repeat============
+	my @repeat;
+	if (defined(@{$repeat{$chr}})) {
+		my @r_array=sort {$a->[0] <=> $b->[0]} @{$repeat{$chr}};
+		for (my $i=0;$i<@r_array ;$i++) {
+			if ($start<$r_array[$i][0] && $end>$r_array[$i][0]) {
+				push @repeat,$r_array[$i][2];
+			}
+			elsif($start>$r_array[$i][0] && $start<$r_array[$i][1]){
+				push @repeat,$r_array[$i][2];
+
+			}
+			elsif($end<$r_array[$i][0]){
+				last;
+			}
+			else{next;}
+		}
+	}
+	my $repeat;
+	if (@repeat==0) {
+		$repeat="\/";
+	}
+	else{
+		$repeat=join ";",@repeat;
+	}
+	#=========nat===============
+	my @nat1;#class 1
+	my @nat2;#class 2
+	my @nat;#nat ID
+	#foreach my $chr (keys %nat) {
+		my @n_array=sort {$a->[0] <=> $b->[0] } @{$nat{$chr}};
+		for (my $i=0;$i<@n_array ;$i++) {
+			if ($start<$n_array[$i][0] && $end>$n_array[$i][0]) {
+				push @nat1,$n_array[$i][2];
+				push @nat2,$n_array[$i][3];
+				push @nat,$n_array[$i][4];
+			}
+			elsif($start>$n_array[$i][0] && $start<$n_array[$i][1]){
+				push @nat1,$n_array[$i][2];
+				push @nat2,$n_array[$i][3];
+				push @nat,$n_array[$i][4];
+			}
+			elsif($end<$n_array[$i][0]){
+				last;
+			}
+			else{next;}
+		}
+	#}
+
+	my $nat1;
+	my $nat2;
+	my $nat;
+	if (@nat1==0) {
+		$nat1="\/";
+	}
+	else{
+		$nat1=join ";",@nat1;
+	}
+	if (@nat2==0) {
+		$nat2="\/";
+	}
+	else{
+		$nat2=join ";",@nat2;
+	}
+	if (@nat==0) {
+		$nat="\/";
+	}
+	else{
+		$nat=join ";",@nat;
+	}
+	#========phase==============
+	my $phase="\/";
+	if (defined($phase{$tmp[0]})) {
+		$phase="phase";
+	}
+	#=========long===============
+	my $long="\/";
+	if ($tmp[1] eq "\>30nt" and $tmp[2]>=0.5) {
+		$long="long";
+	}
+	print OUT "$aline\t$phase\t$long\t$repeat\t$nat1\t$nat2\t$nat\n";
+}
+
+close IN;
+close OUT;
+
+sub nearby{
+	my @p=@_;
+	my ($s,$e)=(0,0);
+	if ($p[1]<$p[2]) {
+		$s=$p[1];
+		$e=$p[2];
+	}else{
+		$s=$p[3];
+		$e=$p[0];
+		}
+	return ($s,$e);
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o -g -n -r -p -t -l
+options:
+-i input file
+	-g gff file
+	-n NATs file
+	-r repeat file
+	-p phase file
+-o output file
+-t nat output file
+-l genelist output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DEGseq_2.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,73 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2009-05-06
+#Modified:
+#Description: ɾ³ýmatched reads 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","outdir=s","column1:i","mark1=s","depth1:i","depth2:i","column2:i","mark2=s","h");
+if (!(defined $opts{i} and defined $opts{outdir} and defined $opts{mark1} and defined $opts{mark2}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $outputdir=$opts{'outdir'};
+unless ($outputdir=~/\/$/) {$outputdir .="/";}
+my $column1=defined $opts{column1} ? $opts{column1} : 3;
+my $column2=defined $opts{column2} ? $opts{column2} : 4;
+my $mark1=$opts{mark1};
+my $mark2=$opts{mark2};
+my $fileout=$outputdir."degseq.R";
+
+open OUT,">$fileout"; #output file  
+#my ($name,$dir);
+#$name=basename($filein);
+print OUT "library(DEGseq)\n";
+print OUT "geneExpFile <- system.file(package=\"DEGseq\")\n";
+print OUT "geneExpFile<-file.path(\"$filein\")\n";
+print OUT "layout(matrix(c(1,2,3,4,5,6), 3, 2, byrow=TRUE))\npar(mar=c(2, 2, 2,2))\n";
+print OUT "outputdir<-file.path(\"$outputdir\")\n";
+print OUT "geneExpMatrix1 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column1))\n";
+print OUT "geneExpMatrix2 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column2))\n";
+if(defined $opts{'depth1'} && defined $opts{'depth2'}){
+print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",depth1=$opts{depth1},depth2=$opts{depth2},outputDir=outputdir,method=\"MARS\")\n";
+}
+else{
+print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",outputDir=outputdir,method=\"MARS\")\n";
+}
+close OUT;
+
+
+system("R CMD BATCH $fileout");
+
+wait;
+
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -outdir -column1 -mark1 -column2 -mark2 -depth1 -depth2
+options:
+-i input file
+-outdir output file dir
+-column1 the first column for DEGseq
+-mark1 the name of the column1
+-depth1 depth for the first file,use for normalize
+-column2 the second column for DEGseq
+-mark2 the name of the column2
+-depth2 depth for the second file,use for normalize
+
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Length_Distibution.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,219 @@
+#!/usr/bin/perl -w
+#==========================================================================================
+# Date: 
+# Title: 
+# Comment: Program to plot gene structure
+# Input: 1. input file of Gene region annotation which format like GenePred
+#        2. input file of Transcripts region annotation which format like GenePred
+#        3. input file of gene snp detail info
+# Output: output file of gene structure graph by html or svg formt
+# Test Usage: 
+#========================================================================================
+#use strict;
+my $version=1.00;
+use SVG;
+use Getopt::Long;
+my %opt;
+GetOptions(\%opt,"i=s","o=s",,"h");
+if (!(defined $opt{i} and defined $opt{o}) || defined $opt{h}) {
+&usage;
+}
+#===============================Define Attribute==========================================
+my %attribute=(
+	canvas=>{
+		'width'=>1500,
+		'height'=>1800
+	},
+	text=>{
+		'stroke'=>"#000000",
+		'fill'=>"none",
+		'stroke-width'=>0.5
+		#'stroke-width2'=>1
+	},
+	line=>{
+		'stroke'=>"black",
+		'stroke-width'=>1
+	},
+	font=>{
+		'fill'=>"#000000",
+		'font-size'=>12,
+		'font-size2'=>10,
+		'font-weight'=>'bold',
+		'font-family'=>"Arial"
+		#'font-family2'=>"ArialNarrow-bold"
+	},
+	rect=>{
+		'fill'=>"lightgreen",
+		'stroke'=>"black",
+		'stroke-width'=>0.5
+	},
+	readwidth=>0.5
+);
+#my $Xscale=600/$length;#¶¨ÒåXÖá±ÈÀý³ß 1:1000 xÖáµÄ×ø±ê³¤¶È¶¼Òª°´Õմ˱ÈÀý³ß»»Ëã
+#========================================data============================
+open(IN,"$opt{i}")||die"cannot open the file $opt{i}";
+my @R_length;
+my @T_length;
+my $R_number=0;
+my $T_number=0;
+my $R_max=0;
+my $T_max=0;
+
+my $title=<IN>;
+chomp $title;
+my @title=split/\t/,$title;
+my @mark=split/\s+/,$title[1];
+my $sample_number=@mark;
+while (my $aline=<IN>) {
+	if ($aline=~/^\s/) {
+		my $T_title=<IN>;
+		chomp $T_title;
+		while (my $a_aline=<IN>) {
+			chomp $a_aline;
+			my @temp=split/\t/,$a_aline;
+			my @number=split/\s+/,$temp[1];
+			for (my $i=0;$i<@number ;$i++) {
+				if ($R_max<$number[$i]) {
+					$R_max=$number[$i];
+				}
+			}
+			push @R_length,[$temp[0],@number];
+			$R_number++;
+		}
+	}
+	else {
+		chomp $aline;
+		my @temp=split/\t/,$aline;
+		my @number=split/\s+/,$temp[1];
+		for (my $i=0;$i<@number ;$i++) {
+			if ($T_max<$number[$i]) {
+				$T_max=$number[$i];
+			}
+		}
+		push @T_length,[$temp[0],@number];
+		$T_number++;
+	}
+}
+close IN;
+print "Tag max: $T_max\nRead max: $R_max\n";
+my $kd_number=5;
+##=======================Reads ×Ý×ø±ê¿Ì¶È==========================
+my $r=1;
+my $rr=1;
+my $R=$R_max;
+while ($R>10) {
+	$R=$R/10;
+	$r=$r*10;
+	$rr++;
+}
+$R=int($R+0.5);
+my $R_xg=$R/$kd_number*$r;#×Ý×ø±êһС¸ñ´óС£¨Ò»¹²10¸ñ£©
+my $R_kedu_scale_x=6*$rr;#×Ý×ø±ê¿Ì¶ÈÎÄ×Ö
+##=======================Tags ×Ý×ø±ê¿Ì¶È==========================
+my $t=1;
+my $tt=1;
+my $T=$T_max;
+while ($T>10) {
+	$T=$T/10;
+	$t=$t*10;
+	$tt++;
+}
+$T=int($T+0.5);
+my $T_xg=$T/$kd_number*$t;#×Ý×ø±êһС¸ñ´óС£¨Ò»¹²10¸ñ£©
+my $T_kedu_scale_x=6*$tt;#×Ý×ø±ê¿Ì¶ÈÎÄ×Ö
+
+#############################s#define start coordinate and scale
+my $XOFFSET=50;
+my $YOFFSET=60;
+my $width=800;
+my $heigth=800;
+my $X_width=600;
+#my $height=1600;
+####    Starting    ####
+#н¨»­²¼
+my $svg=SVG->new(width=>$width,height=>$heigth);
+####×ø±êÖá
+my $axisL=300;#read ×Ý×ø±ê³¤¶È
+my $x_margin = 50;
+#=========Reads number setting==========================================
+my $Y_R_title=30;#±êÌâµÄ×ÝÏò¿í¶È
+my $Y_R_0=$YOFFSET+$axisL+$Y_R_title;
+my $X_R_0=$XOFFSET+$x_margin;
+my $R_Yscale=$axisL/$R_xg/$kd_number;
+my $R_Xscale=$X_width/$R_number/($sample_number+1);
+#=====================================Reads Y axis======================
+$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0,'y2',$Y_R_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+for (my $i=1;$i<$kd_number ;$i++) {
+	$svg->line('x1',$X_R_0-5,'y1',$Y_R_0-$i*$R_xg*$R_Yscale,'x2',$X_R_0,'y2',$Y_R_0-$i*$R_xg*$R_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+	$svg->text('x',$X_R_0-$R_kedu_scale_x,'y',$Y_R_0-$i*$R_xg*$R_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$R_xg);
+}
+#=====================================Reads X axis======================
+$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0+$X_width,'y2',$Y_R_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+
+#print "$R_number\t$sample_number\n";
+for ($i=0;$i<$R_number ;$i++) {
+	for (my $j=1;$j<$sample_number+1 ;$j++) {
+		my $red=$j/$sample_number*255;
+		$svg->rect('x',$X_R_0+($j+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0-$R_length[$i][$j]*$R_Yscale,'width',$R_Xscale,'height',$R_length[$i][$j]*$R_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+	}
+	$svg->text('x',$X_R_0+(1+$sample_number/2+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$R_length[$i][0]);
+}
+#===Reads number title
+$svg->text('x',$XOFFSET+400,'y',$YOFFSET+$Y_R_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Reads Length Distribution");
+#===Reads 
+for (my $i=0;$i<$sample_number ;$i++) {
+	my $red=($i+1)/$sample_number*255;
+	$svg->rect('x',$X_R_0+550,'y',$YOFFSET+$Y_R_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+	$svg->text('x',$X_R_0+550+30,'y',$YOFFSET+$Y_R_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]);
+}
+####==================================================================================
+#=========================================Tag s
+my $Y_T_title=30;#±êÌâµÄ×ÝÏò¿í¶È
+my $Y_T_0=$Y_R_0+$axisL+$Y_R_title+50;#length size
+my $X_T_0=$XOFFSET+$x_margin;
+my $T_Yscale=$axisL/$T_xg/$kd_number;
+my $T_Xscale=$X_width/$T_number/($sample_number+1);
+#=====================================Tags Y axis======================
+$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0,'y2',$Y_T_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+for (my $i=1;$i<$kd_number ;$i++) {
+	$svg->line('x1',$X_T_0-5,'y1',$Y_T_0-$i*$T_xg*$T_Yscale,'x2',$X_T_0,'y2',$Y_T_0-$i*$T_xg*$T_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+	$svg->text('x',$X_T_0-$T_kedu_scale_x,'y',$Y_T_0-$i*$T_xg*$T_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$T_xg);
+}
+#=====================================Tags X axis======================
+$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0+$X_width,'y2',$Y_T_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+
+#print "$R_number\t$sample_number\n";
+for ($i=0;$i<$T_number ;$i++) {
+	for (my $j=1;$j<$sample_number+1 ;$j++) {
+		my $red=$j/$sample_number*255;
+		$svg->rect('x',$X_T_0+($j+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0-$T_length[$i][$j]*$T_Yscale,'width',$T_Xscale,'height',$T_length[$i][$j]*$T_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+	}
+	$svg->text('x',$X_T_0+(1+$sample_number/2+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$T_length[$i][0]);
+}
+#===Reads number title
+$svg->text('x',$XOFFSET+400,'y',$Y_R_0+30+$Y_T_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Tags Length Distribution");
+#===Reads 
+for (my $i=0;$i<$sample_number ;$i++) {
+	my $red=($i+1)/$sample_number*255;
+	$svg->rect('x',$X_T_0+550,'y',$Y_R_0+30+$Y_T_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+	$svg->text('x',$X_T_0+550+30,'y',$Y_R_0+30+$Y_T_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]);
+}
+
+
+
+
+open (OUT,">$opt{o}");
+print OUT $svg->xmlify();
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 
+options:
+-i 
+-o svg output
+-h help
+USAGE
+exit(1);
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SampleDEGseqMerge.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,94 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: chentt@big.ac.cn
+#Date: 2014-05-21
+#Modified:
+#Description: merged deg file and total information
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i:s@","mark:s@","f:s","o=s","n=s","h");
+if (!(defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my @filein=@{$opts{'i'}};
+my @mark=@{$opts{'mark'}};
+my $fileout=$opts{'o'};
+my $number=$opts{'n'};
+
+my %hash;
+open IN,"<$filein[0]"; #input file  
+
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\"/);
+	my @temp=split/\t/,$aline;
+	$hash{$temp[0]}=$temp[4]."\t".$temp[6]."\t".$temp[7]."\t".$temp[-1];
+}
+close IN;
+
+for (my $i=1;$i<=$#filein;$i++) {
+	open IN,"<$filein[$i]"; #input file  
+
+	while (my $aline=<IN>) {
+		chomp $aline;
+		next if($aline=~/^\"/);
+		my @temp=split/\t/,$aline;
+		if (!(defined $hash{$temp[0]})) {
+			print "Not find $temp[0]in sample one!\n";
+			next;
+		}
+		$hash{$temp[0]} .="\t".$temp[4]."\t".$temp[6]."\t".$temp[7]."\t".$temp[-1];
+	}
+	close IN;
+}
+
+open OUT,">$fileout"; #output file  
+my $deg_title;
+foreach (@mark) {
+	$deg_title.="log2(Fold_change)\tp_value\tq_value\t".$_."\t";
+}
+$deg_title=~s/\s+$//;
+my %function;
+my $title;
+open F,"<$opts{f}";
+while (my $aline=<F>) {
+	chomp $aline;
+	if($aline=~/^\#/){
+		my $title=$aline;
+		my @title=split/\t/,$aline;
+		$title[2+$number].="\t".$deg_title;
+		$title=join"\t",@title;
+		print OUT "$title\n";
+		next;
+	}
+	my @temp=split/\t/,$aline;
+	$temp[2+$number].="\t".$hash{$temp[0]};
+	my $temp=join"\t",@temp;
+	print OUT "$temp\n";
+
+}
+close F;
+close OUT;
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o -mark -f
+options:
+-i input file # -i output_score.txt -i output_score.txt -i output_score.txt
+-mark  sample name  # -mark sam1_VS_sam2 -mark sam1_VS_sam3 -mark sam2_VS_sam3
+-f cluster file
+-n sample number
+-o output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/collapseReads2Tags.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,170 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-3-20
+#Modified:
+#Description: fastq file form reads cluster(the same sequence in the same cluster)
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i:s@","format=s","mark:s","qual:s","qv:i","o=s","h");
+if (!(defined $opts{o} and defined $opts{'format'})  || defined $opts{h}) { #necessary arguments
+&usage;
+}
+my @filein=@{$opts{i}} if(defined $opts{i});
+my $name=defined $opts{'mark'} ? $opts{'mark'} : "seq";
+my $fileout=$opts{'o'};
+my $pq=defined $opts{'qv'} ? $opts{'qv'} : 33;
+my %hash;##·Ö¿é´æ·ÅԭʼÐòÁÐ
+
+my $format=$opts{'format'};
+if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") {
+	die "Parameter -format is error!\n";
+}
+
+my ($qualT,$qualV);
+if (defined $opts{'qual'} && ($format eq "fastq" || $format eq "fq")) {  #quality filter
+	my @temp=split /:/,$opts{'qual'};
+	$qualT=$temp[0];
+	$qualV=$temp[1];
+
+	for (my $i=0;$i<@filein;$i++) {
+		open IN,"<$filein[$i]";
+		while (my $aline=<IN>) {
+			my $seq=<IN>;
+			my $n=<IN>;
+			my $qv=<IN>;
+			my $tag=&qvcheck($qv,$qualT,$qualV);
+			next if(!$tag);
+			my $str=substr($seq,0,6);
+			$hash{$str}[$i].=$seq;
+		}
+		close IN;
+	}
+}
+elsif($format eq "fastq" || $format eq "fq"){ ### do not filter low quality reads
+	for (my $i=0;$i<@filein;$i++) {
+		open IN,"<$filein[$i]";
+		while (my $aline=<IN>) {
+			my $seq=<IN>;
+			my $n=<IN>;
+			my $qv=<IN>;
+			my $str=substr($seq,0,6);
+			$hash{$str}[$i].=$seq;
+		}
+		close IN;
+	}
+
+}
+elsif($format eq "fasta" || $format eq "fa"){
+	for (my $i=0;$i<@filein;$i++) {
+		open IN,"<$filein[$i]";
+		while (my $aline=<IN>) {
+			my $seq=<IN>;
+			my $str=substr($seq,0,6);
+			$hash{$str}[$i].=$seq;
+		}
+		close IN;
+	}
+}
+
+open OUT,">$fileout"; #output file  
+my $count=0;
+foreach my $key (keys %hash) {
+	my %cluster;
+	for (my $i=0;$i<@filein;$i++) {
+		next if(!(defined $hash{$key}[$i]));
+		my @tmp=split/\n/,$hash{$key}[$i];
+		foreach  (@tmp) {
+			$cluster{$_}[$i]++;
+		}
+	}
+	
+	foreach my $seq (keys %cluster) {
+		my $exp=""; my $ee=0;
+		for (my $i=0;$i<@filein;$i++) {
+			if (defined $cluster{$seq}[$i]) {
+				$exp.="_$cluster{$seq}[$i]";
+				$ee+=$cluster{$seq}[$i];
+			}else{
+				$exp.="_0";
+			}
+		}
+		$count+=$ee;
+		$exp=~s/^_//;
+		print OUT ">$name","_$count:$exp","_x$ee\n$seq\n";
+	}
+}
+close OUT;
+
+
+sub qvcheck{
+	my ($str,$t,$v)=@_;
+	my $qv=0;
+	if($t eq "mean"){ 
+		$qv=&getMeanQuality($str);
+	}
+	elsif($t eq "min"){
+		$qv=&getMinQuality($str);
+	}
+	if ($qv<$v) {
+		return 0;
+	}
+	return 1;
+}
+
+sub getMeanQuality(){
+	chomp $_[0];
+	my @bases = split(//,$_[0]);
+	my $sum = 0;
+	for(my $i = 0; $i <= $#bases; $i++){
+		my $num = ord($bases[$i]) - $pq;
+		$sum += $num;
+	}
+	
+	return $sum/($#bases+1);
+	
+}
+
+###
+### This function gives back the Q-value of the worst base
+sub getMinQuality(){
+	chomp $_[0];
+	my @bases = split(//,$_[0]);
+	my $worst = 1000;
+	for(my $i = 0; $i <= $#bases; $i++){
+#		printf ("base: $bases[$i]  --> %d\n",ord($bases[$i]));
+		my $num = ord($bases[$i]) - $pq;
+		if($num < $worst){
+			$worst = $num;
+		}
+	}
+	return $worst;
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -format -mark -qual -qv -o 
+options:
+-i input file#fastq file ##can be multiple -i file1 -i file2 ...
+-mark string#quary name,default is "seq"
+-o output file
+-format string # fastq|fasta|fq|fa
+
+-qual #reads filter
+   eg:(min:value/mean:value)
+   This parameter just for solexa reads.
+   If the input files are solid and needs filter,please do filter first .
+   
+-qv  integer #Phred quality64/33,default 33
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conventional.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,156 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Chentt
+#Email: chentt@big.ac.cn
+#Date: 2014/04/09
+#Modified:
+#Description: islands merged of merged samples 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","d=i","o=s","N=i","t=s","mark=s","h");
+if (!(defined $opts{i} and defined $opts{d} and defined $opts{N} and defined $opts{mark} and defined $opts{t} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'o'};
+my $distance=$opts{'d'};
+my $tempout=$opts{'t'};
+my $mark=$opts{'mark'};
+my @sample=split/\#/,$mark;
+$mark=join"\"\t\"",@sample;
+
+open IN,"<$filein"; #input file  
+open OUT,">$fileout"; #output file  
+print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
+open TMP,">$tempout";
+print TMP "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
+my %hash;
+while (my $aline=<IN>) {
+	chomp $aline;
+	if($aline=~/^\#/){
+		#print OUT "$aline\n";
+		next;
+	}
+	my @tmp=split/\t/,$aline;
+	my $chr=shift @tmp;
+	#shift @tmp;
+	push @{$hash{$chr}},[@tmp];
+}
+
+close IN;
+
+foreach my $key (keys %hash) {
+	my @tag=sort{$a->[1] <=> $b->[1]} @{$hash{$key}};
+	my @sample;
+	my $start=$tag[0][1];
+	my $end=$tag[0][2];
+	push @sample,[@{$tag[0]}];
+	for (my $i=1;$i<@tag-1;$i++) {
+		if ($tag[$i][1]-$end<=$distance) {
+			if ($tag[$i][2]>$end) {
+				$end=$tag[$i][2];
+			}
+			push @sample,[@{$tag[$i]}];
+		}
+		else{
+			my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
+			my $cluster_exp=join"\t",@cluster_exp;
+			if ($max_length>30) {
+				print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
+				$max_length="\>30";
+			}
+			else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
+			print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";			
+			$start=$tag[$i][1];
+			$end=$tag[$i][2];
+
+			@sample=();
+			push @sample,[@{$tag[$i]}];
+		}
+	}
+	if ($tag[$#tag][1]-$end<=$distance) {
+			if ($tag[$#tag][2]>$end) {
+				$end=$tag[$#tag][2];
+			}
+			push @sample,[@{$tag[$#tag]}];
+			my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
+			my $cluster_exp=join"\t",@cluster_exp;
+			if ($max_length>30) {
+				$max_length="\>30";
+				print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
+			}
+			else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
+			print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
+	}
+	else{
+		my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
+		my $cluster_exp=join"\t",@cluster_exp;
+		if ($max_length>30) {
+				$max_length="\>30";
+				print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
+		}
+		else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
+		print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
+
+	}
+}
+close OUT;
+close TMP;
+sub Max_length{
+	my @exp=@{$_[0]};
+	my %sample_length;
+	my $total_exp;
+	my @each;
+	my @tag;
+	for (my $i=0;$i<=$#exp ;$i++) {
+		my $length=$exp[$i][2]-$exp[$i][1]+1;
+		#if ($length>30) {
+		#	$length=40;
+		#}
+		my $exp=0;
+		foreach  (1..$opts{'N'}) {
+			$exp+=$exp[$i][$_+2];
+			$each[$_-1]+=$exp[$i][$_+2];
+		}	
+		$sample_length{$length}+=$exp;
+		$total_exp+=$exp;
+		push @tag,($exp[$i][1].",".$exp[$i][2].",".$exp[$i][0].",".$exp);
+	}
+	my $max=0;
+	my $max_key;
+	foreach my $key (sort keys %sample_length) {
+		my $p=$sample_length{$key}/$total_exp;
+		if ($p>$max) {
+			$max=$p;
+			$max_key=$key;
+		}
+		$sample_length{$key}=sprintf("%.2f",$p);
+	}
+	my $tag_n=@tag;
+	my $tag=join";",@tag;
+	$tag=$tag_n."\t".$tag;
+	return($max_key,$sample_length{$max_key},$tag,@each);
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o -d -N -t -mark
+options:
+-i input file
+-d distance of two islands
+-mark sample name;
+-o output file
+-N sample number
+-t temp output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_bowtie_to_blast.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,126 @@
+#!/usr/bin/perl 
+
+
+use warnings;
+use strict;
+use Getopt::Std;
+
+######################################### USAGE ################################
+
+my $usage=
+"$0 file_bowtie_result file_solexa_seq file_chromosome
+
+This is a converter which changes Bowtie output into Blast format.
+The input includes three files: a Bowtie result file (default Bowtie
+output file), a fasta file consisting of small Reads and a chromosome
+fasta file. It outputs the alignments in blast_parsed format.
+
+file_bowtie_result likes:
+
+AtFlower100010_x2	+	MIR319c	508	AAGGAGATTCTTTCAGTCCAG	IIIIIIIIIIIIIIIIIIIII	0	
+AtFlower1000188_x1	+	MIR2933a	421	TCGGAGAGGAAATTCGTCGGCG	IIIIIIIIIIIIIIIIIIIIII	0	
+
+file_solexa_seq likes:
+
+>AtFlower100010_x2
+AAGGAGATTCTTTCAGTCCAG
+
+file_chromosome contains chromosome seq in fasta format
+
+";
+
+
+####################################### INPUT FILES ############################
+
+my $file_bowtie_result=shift or die $usage;
+my $file_short_seq=shift or die $usage;
+my $file_chromosome_seq=shift or die $usage;
+
+
+##################################### GLOBAL VARIBALES #########################
+
+my %short_seq_length=();
+my %chromosome_length=();
+
+
+######################################### MAIN ################################# 
+
+#get the short sequence id and its length
+sequence_length($file_short_seq,\%short_seq_length);
+
+#get the chromosome sequence id and its length
+sequence_length($file_chromosome_seq,\%chromosome_length);
+
+#convert bowtie result format to blast format;
+change_format($file_bowtie_result);
+
+exit;
+
+
+##################################### SUBROUTINES ##############################
+
+sub sequence_length{
+    my ($file,$hash) = @_;
+    my ($id, $desc, $sequence, $seq_length) = ();
+
+    open (FASTA, "<$file") or die "can not open $$file\n";
+    while (<FASTA>)
+    {
+        chomp;
+        if (/^>(\S+)(.*)/)
+	{
+	    $id       = $1;
+	    $desc     = $2;
+	    $sequence = "";
+	    while (<FASTA>){
+                chomp;
+                if (/^>(\S+)(.*)/){
+		    $$hash{$id}  = length $sequence;
+		    $id         = $1;
+		    $desc       = $2;
+		    $sequence   = "";
+		    next;
+                }
+		$sequence .= $_;
+            }
+        }
+    }
+    $seq_length=length($sequence);
+    $$hash{$id} = $seq_length;
+    close FASTA;
+}
+
+
+
+
+
+sub change_format{
+	#Change Bowtie format into blast format
+	my $file=shift @_;
+	open(FILE,"<$file")||die"can not open the bowtie result file:$!\n";
+  #open(BLASTOUT,">blastout")||die"can not create the blastout file:$!\n";
+	
+	while(<FILE>){
+		chomp;
+		my @tmp=split("\t",$_);
+	#Clean the reads ID
+		my @tmp1=split(" ",$tmp[0]);
+		print  "$tmp1[0]"."\t"."$short_seq_length{$tmp1[0]}"."\t"."1".'..'."$short_seq_length{$tmp1[0]}"."\t"."$tmp[2]"."\t"."$chromosome_length{$tmp[2]}"."\t";
+		if($tmp[1] eq "+"){
+			my $seq_end=$tmp[3] + $short_seq_length{$tmp1[0]};
+			my $seq_bg=$tmp[3] + 1;
+			print  "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Plus"."\n";
+			}
+		if($tmp[1] eq "-"){
+			my $seq_end=$chromosome_length{$tmp[2]} - $tmp[3];
+			my $seq_bg=$seq_end - $short_seq_length{$tmp1[0]} + 1;
+			print  "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Minus"."\n";
+			}
+		}
+	
+#	close BLASTOUT;
+
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/count_ref_length.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,58 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2009-05-06
+#Modified:
+#Description: ɾ³ýmatched reads 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","o=s","h");
+if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'o'};
+
+open IN,"<$filein"; #input file  
+open OUT,">$fileout"; #output file  
+
+my ($name,$seq);
+while (my $aline=<IN>) {
+	chomp $aline;
+	if ($aline=~/^>(\S+)/) {
+		$name=$1;
+		while (my $new=<IN>) {
+			chomp $new;
+			if ($new=~/^>(\S+)/) {
+				print OUT $name,"\t",length($seq),"\n";
+				$seq ="";
+				$name=$1;
+				next;
+			}
+			else{$seq .=$new;}
+		}
+	}
+	print OUT $name,"\t",length($seq),"\n";
+}
+ 
+close IN;
+close OUT;
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o
+options:
+-i input file
+-o output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/count_rfam_express.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,1800 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2013/7/19
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","o=s","tag:s","h");
+if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'o'};
+
+my $marks=defined $opts{'tag'} ? $opts{'tag'} : "";
+
+if(!(defined $opts{'tag'})){
+	my $line=`head -1 $filein`;
+	my @tmp=split/\t/,$line;
+	$tmp[0]=~/:([\d|_]+)_x(\d+)$/;
+	my @ss=split/_/,$1;
+	for (my $i=1;$i<=@ss;$i++) {
+		$marks .="Smp$i;";
+	}
+}
+
+my @marks=split/\;/,$marks;
+
+my %rfam_key;
+while(<DATA>){
+    chomp;
+    if(/^(\S+)\s+(\S+)$/){
+		$rfam_key{$1}=$2;
+    }
+}
+
+
+my %reads;
+my %tags;
+open IN,"<$filein";
+while (my $aline=<IN>) {
+	chomp $aline;
+	my @tmp=split/\t/,$aline;
+	$tmp[0]=~/:([\d|_]+)_x(\d+)$/;
+
+	my @exp=split/_/,$1;
+	my @tag=split/\;/,$tmp[2];
+
+	if (defined $rfam_key{$tag[0]}) {
+		for (my $i=0;$i<@exp;$i++) {
+			$reads{$rfam_key{$tag[0]}}[$i]+=$exp[$i];
+			$tags{$rfam_key{$tag[0]}}[$i]++ if($exp[$i]!=0);
+		}
+	}else{
+		for (my $i=0;$i<@exp;$i++) {
+			$reads{other}[$i]+=$exp[$i];
+			$tags{other}[$i]++ if($exp[$i]!=0);
+		}
+	}
+
+}
+close IN;
+
+$"="\t"; ##### @array print in \t
+open OUT,">$fileout";
+print OUT "####################################\n# small RNA expressed reads number #\n####################################\n";
+print OUT "#RNAname\t@marks\n";
+foreach my $key (keys %reads) {
+	print OUT $key;
+	for (my $i=0;$i<@{$reads{$key}} ;$i++) {
+		print OUT "\t",$reads{$key}[$i];
+	}
+	print OUT "\n";
+}
+
+print OUT "\n\n####################################\n# small RNA expressed tags number #\n####################################\n";
+print OUT "#RNAname\t@marks\n";
+
+foreach my $key (keys %tags) {
+	print OUT  $key;
+	for (my $i=0;$i<@{$reads{$key}} ;$i++) {
+		if(defined $tags{$key}[$i]){print OUT "\t",$tags{$key}[$i];}
+		else{print OUT "\t0";}
+	}
+	print OUT  "\n";
+}
+
+close OUT;
+$"=" "; ##### @array print in \t
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -tag -o
+options:
+-i input file# rfam bowtie bwt. format mapping result
+-tag [string] sample marks# eg. sampleA;sampleB;sampleC
+-o output file
+
+-h help
+USAGE
+exit(1);
+}
+
+__DATA__
+RF00635	lncRNA
+RF01868	lncRNA
+RF01869	lncRNA
+RF01870	lncRNA
+RF01871	lncRNA
+RF01872	lncRNA
+RF01873	lncRNA
+RF01874	lncRNA
+RF01875	lncRNA
+RF01876	lncRNA
+RF01877	lncRNA
+RF01878	lncRNA
+RF01879	lncRNA
+RF01880	lncRNA
+RF01881	lncRNA
+RF01882	lncRNA
+RF01883	lncRNA
+RF01884	lncRNA
+RF01885	lncRNA
+RF01886	lncRNA
+RF01887	lncRNA
+RF01888	lncRNA
+RF01889	lncRNA
+RF01890	lncRNA
+RF01891	lncRNA
+RF01892	lncRNA
+RF01893	lncRNA
+RF01894	lncRNA
+RF01904	lncRNA
+RF01905	lncRNA
+RF01906	lncRNA
+RF01907	lncRNA
+RF01908	lncRNA
+RF01909	lncRNA
+RF01928	lncRNA
+RF01929	lncRNA
+RF01930	lncRNA
+RF01931	lncRNA
+RF01932	lncRNA
+RF01933	lncRNA
+RF01934	lncRNA
+RF01935	lncRNA
+RF01946	lncRNA
+RF01947	lncRNA
+RF01948	lncRNA
+RF01950	lncRNA
+RF01951	lncRNA
+RF01952	lncRNA
+RF01953	lncRNA
+RF01954	lncRNA
+RF01955	lncRNA
+RF01956	lncRNA
+RF01957	lncRNA
+RF01958	lncRNA
+RF01961	lncRNA
+RF01962	lncRNA
+RF01963	lncRNA
+RF01964	lncRNA
+RF01965	lncRNA
+RF01966	lncRNA
+RF01967	lncRNA
+RF01968	lncRNA
+RF01969	lncRNA
+RF01970	lncRNA
+RF01971	lncRNA
+RF01972	lncRNA
+RF01973	lncRNA
+RF01974	lncRNA
+RF01975	lncRNA
+RF01976	lncRNA
+RF01977	lncRNA
+RF01978	lncRNA
+RF01979	lncRNA
+RF01980	lncRNA
+RF01981	lncRNA
+RF01983	lncRNA
+RF01984	lncRNA
+RF01985	lncRNA
+RF01986	lncRNA
+RF01987	lncRNA
+RF01992	lncRNA
+RF02038	lncRNA
+RF02039	lncRNA
+RF02040	lncRNA
+RF02041	lncRNA
+RF02042	lncRNA
+RF02043	lncRNA
+RF02044	lncRNA
+RF02045	lncRNA
+RF02046	lncRNA
+RF02047	lncRNA
+RF02085	lncRNA
+RF02086	lncRNA
+RF02087	lncRNA
+RF02089	lncRNA
+RF02090	lncRNA
+RF02091	lncRNA
+RF02098	lncRNA
+RF02101	lncRNA
+RF02102	lncRNA
+RF02103	lncRNA
+RF02104	lncRNA
+RF02105	lncRNA
+RF02106	lncRNA
+RF02107	lncRNA
+RF02108	lncRNA
+RF02109	lncRNA
+RF02110	lncRNA
+RF02112	lncRNA
+RF02113	lncRNA
+RF02114	lncRNA
+RF02115	lncRNA
+RF02116	lncRNA
+RF02117	lncRNA
+RF02118	lncRNA
+RF02119	lncRNA
+RF02120	lncRNA
+RF02121	lncRNA
+RF02122	lncRNA
+RF02123	lncRNA
+RF02124	lncRNA
+RF02125	lncRNA
+RF02126	lncRNA
+RF02127	lncRNA
+RF02128	lncRNA
+RF02129	lncRNA
+RF02130	lncRNA
+RF02131	lncRNA
+RF02132	lncRNA
+RF02133	lncRNA
+RF02134	lncRNA
+RF02135	lncRNA
+RF02136	lncRNA
+RF02137	lncRNA
+RF02138	lncRNA
+RF02139	lncRNA
+RF02140	lncRNA
+RF02141	lncRNA
+RF02142	lncRNA
+RF02143	lncRNA
+RF02145	lncRNA
+RF02146	lncRNA
+RF02147	lncRNA
+RF02148	lncRNA
+RF02149	lncRNA
+RF02150	lncRNA
+RF02152	lncRNA
+RF02153	lncRNA
+RF02154	lncRNA
+RF02155	lncRNA
+RF02156	lncRNA
+RF02157	lncRNA
+RF02158	lncRNA
+RF02159	lncRNA
+RF02160	lncRNA
+RF02161	lncRNA
+RF02164	lncRNA
+RF02165	lncRNA
+RF02166	lncRNA
+RF02167	lncRNA
+RF02168	lncRNA
+RF02169	lncRNA
+RF02170	lncRNA
+RF02171	lncRNA
+RF02172	lncRNA
+RF02173	lncRNA
+RF02174	lncRNA
+RF02175	lncRNA
+RF02176	lncRNA
+RF02177	lncRNA
+RF02178	lncRNA
+RF02179	lncRNA
+RF02180	lncRNA
+RF02181	lncRNA
+RF02182	lncRNA
+RF02183	lncRNA
+RF02184	lncRNA
+RF02185	lncRNA
+RF02186	lncRNA
+RF02187	lncRNA
+RF02188	lncRNA
+RF02189	lncRNA
+RF02190	lncRNA
+RF02191	lncRNA
+RF02192	lncRNA
+RF02193	lncRNA
+RF02195	lncRNA
+RF02196	lncRNA
+RF02197	lncRNA
+RF02198	lncRNA
+RF02199	lncRNA
+RF02200	lncRNA
+RF02201	lncRNA
+RF02202	lncRNA
+RF02203	lncRNA
+RF02204	lncRNA
+RF02205	lncRNA
+RF02206	lncRNA
+RF02207	lncRNA
+RF02208	lncRNA
+RF02209	lncRNA
+RF02210	lncRNA
+RF02211	lncRNA
+RF02212	lncRNA
+RF02213	lncRNA
+RF02215	lncRNA
+RF02216	lncRNA
+RF02217	lncRNA
+RF02218	lncRNA
+RF02219	lncRNA
+RF02220	lncRNA
+RF02246	lncRNA
+RF02247	lncRNA
+RF02248	lncRNA
+RF02249	lncRNA
+RF02250	lncRNA
+RF02251	lncRNA
+RF02252	lncRNA
+RF02255	lncRNA
+RF02256	lncRNA
+RF02257	lncRNA
+RF02258	lncRNA
+RF02259	lncRNA
+RF02267	lncRNA
+RF02272	lncRNA
+RF00027	miRNA
+RF00047	miRNA
+RF00051	miRNA
+RF00052	miRNA
+RF00053	miRNA
+RF00073	miRNA
+RF00074	miRNA
+RF00075	miRNA
+RF00076	miRNA
+RF00103	miRNA
+RF00104	miRNA
+RF00129	miRNA
+RF00130	miRNA
+RF00131	miRNA
+RF00143	miRNA
+RF00144	miRNA
+RF00178	miRNA
+RF00237	miRNA
+RF00239	miRNA
+RF00241	miRNA
+RF00244	miRNA
+RF00245	miRNA
+RF00246	miRNA
+RF00247	miRNA
+RF00248	miRNA
+RF00249	miRNA
+RF00250	miRNA
+RF00251	miRNA
+RF00253	miRNA
+RF00254	miRNA
+RF00255	miRNA
+RF00256	miRNA
+RF00257	miRNA
+RF00258	miRNA
+RF00363	miRNA
+RF00364	miRNA
+RF00365	miRNA
+RF00366	miRNA
+RF00367	miRNA
+RF00445	miRNA
+RF00446	miRNA
+RF00451	miRNA
+RF00452	miRNA
+RF00455	miRNA
+RF00456	miRNA
+RF00464	miRNA
+RF00486	miRNA
+RF00637	miRNA
+RF00638	miRNA
+RF00639	miRNA
+RF00640	miRNA
+RF00641	miRNA
+RF00642	miRNA
+RF00643	miRNA
+RF00644	miRNA
+RF00645	miRNA
+RF00646	miRNA
+RF00647	miRNA
+RF00648	miRNA
+RF00649	miRNA
+RF00650	miRNA
+RF00651	miRNA
+RF00652	miRNA
+RF00653	miRNA
+RF00654	miRNA
+RF00655	miRNA
+RF00656	miRNA
+RF00657	miRNA
+RF00658	miRNA
+RF00659	miRNA
+RF00660	miRNA
+RF00661	miRNA
+RF00662	miRNA
+RF00663	miRNA
+RF00664	miRNA
+RF00665	miRNA
+RF00666	miRNA
+RF00667	miRNA
+RF00668	miRNA
+RF00669	miRNA
+RF00670	miRNA
+RF00671	miRNA
+RF00672	miRNA
+RF00673	miRNA
+RF00674	miRNA
+RF00675	miRNA
+RF00676	miRNA
+RF00677	miRNA
+RF00678	miRNA
+RF00679	miRNA
+RF00680	miRNA
+RF00681	miRNA
+RF00682	miRNA
+RF00683	miRNA
+RF00684	miRNA
+RF00685	miRNA
+RF00686	miRNA
+RF00687	miRNA
+RF00688	miRNA
+RF00689	miRNA
+RF00690	miRNA
+RF00691	miRNA
+RF00692	miRNA
+RF00693	miRNA
+RF00694	miRNA
+RF00695	miRNA
+RF00696	miRNA
+RF00697	miRNA
+RF00698	miRNA
+RF00699	miRNA
+RF00700	miRNA
+RF00701	miRNA
+RF00702	miRNA
+RF00703	miRNA
+RF00704	miRNA
+RF00705	miRNA
+RF00706	miRNA
+RF00707	miRNA
+RF00708	miRNA
+RF00709	miRNA
+RF00710	miRNA
+RF00711	miRNA
+RF00712	miRNA
+RF00713	miRNA
+RF00714	miRNA
+RF00715	miRNA
+RF00716	miRNA
+RF00717	miRNA
+RF00718	miRNA
+RF00719	miRNA
+RF00720	miRNA
+RF00721	miRNA
+RF00722	miRNA
+RF00723	miRNA
+RF00724	miRNA
+RF00725	miRNA
+RF00726	miRNA
+RF00727	miRNA
+RF00728	miRNA
+RF00729	miRNA
+RF00730	miRNA
+RF00731	miRNA
+RF00732	miRNA
+RF00733	miRNA
+RF00734	miRNA
+RF00735	miRNA
+RF00736	miRNA
+RF00737	miRNA
+RF00739	miRNA
+RF00740	miRNA
+RF00741	miRNA
+RF00742	miRNA
+RF00743	miRNA
+RF00744	miRNA
+RF00745	miRNA
+RF00746	miRNA
+RF00747	miRNA
+RF00748	miRNA
+RF00749	miRNA
+RF00750	miRNA
+RF00751	miRNA
+RF00752	miRNA
+RF00753	miRNA
+RF00754	miRNA
+RF00755	miRNA
+RF00756	miRNA
+RF00757	miRNA
+RF00758	miRNA
+RF00760	miRNA
+RF00761	miRNA
+RF00762	miRNA
+RF00763	miRNA
+RF00764	miRNA
+RF00765	miRNA
+RF00766	miRNA
+RF00767	miRNA
+RF00768	miRNA
+RF00769	miRNA
+RF00770	miRNA
+RF00771	miRNA
+RF00772	miRNA
+RF00773	miRNA
+RF00774	miRNA
+RF00775	miRNA
+RF00776	miRNA
+RF00777	miRNA
+RF00778	miRNA
+RF00779	miRNA
+RF00780	miRNA
+RF00781	miRNA
+RF00782	miRNA
+RF00783	miRNA
+RF00784	miRNA
+RF00785	miRNA
+RF00786	miRNA
+RF00787	miRNA
+RF00788	miRNA
+RF00789	miRNA
+RF00790	miRNA
+RF00791	miRNA
+RF00792	miRNA
+RF00793	miRNA
+RF00794	miRNA
+RF00795	miRNA
+RF00796	miRNA
+RF00797	miRNA
+RF00798	miRNA
+RF00799	miRNA
+RF00800	miRNA
+RF00801	miRNA
+RF00802	miRNA
+RF00803	miRNA
+RF00804	miRNA
+RF00805	miRNA
+RF00806	miRNA
+RF00807	miRNA
+RF00808	miRNA
+RF00809	miRNA
+RF00810	miRNA
+RF00811	miRNA
+RF00812	miRNA
+RF00813	miRNA
+RF00814	miRNA
+RF00815	miRNA
+RF00816	miRNA
+RF00817	miRNA
+RF00818	miRNA
+RF00819	miRNA
+RF00820	miRNA
+RF00821	miRNA
+RF00822	miRNA
+RF00823	miRNA
+RF00824	miRNA
+RF00825	miRNA
+RF00826	miRNA
+RF00827	miRNA
+RF00828	miRNA
+RF00829	miRNA
+RF00830	miRNA
+RF00831	miRNA
+RF00832	miRNA
+RF00833	miRNA
+RF00834	miRNA
+RF00835	miRNA
+RF00836	miRNA
+RF00837	miRNA
+RF00838	miRNA
+RF00839	miRNA
+RF00840	miRNA
+RF00841	miRNA
+RF00842	miRNA
+RF00843	miRNA
+RF00844	miRNA
+RF00845	miRNA
+RF00846	miRNA
+RF00847	miRNA
+RF00848	miRNA
+RF00849	miRNA
+RF00850	miRNA
+RF00851	miRNA
+RF00852	miRNA
+RF00853	miRNA
+RF00854	miRNA
+RF00855	miRNA
+RF00856	miRNA
+RF00857	miRNA
+RF00858	miRNA
+RF00859	miRNA
+RF00861	miRNA
+RF00862	miRNA
+RF00863	miRNA
+RF00864	miRNA
+RF00865	miRNA
+RF00866	miRNA
+RF00867	miRNA
+RF00868	miRNA
+RF00869	miRNA
+RF00870	miRNA
+RF00871	miRNA
+RF00872	miRNA
+RF00873	miRNA
+RF00874	miRNA
+RF00875	miRNA
+RF00876	miRNA
+RF00877	miRNA
+RF00878	miRNA
+RF00879	miRNA
+RF00882	miRNA
+RF00883	miRNA
+RF00884	miRNA
+RF00885	miRNA
+RF00886	miRNA
+RF00887	miRNA
+RF00888	miRNA
+RF00890	miRNA
+RF00891	miRNA
+RF00892	miRNA
+RF00893	miRNA
+RF00894	miRNA
+RF00895	miRNA
+RF00896	miRNA
+RF00897	miRNA
+RF00898	miRNA
+RF00899	miRNA
+RF00900	miRNA
+RF00901	miRNA
+RF00902	miRNA
+RF00903	miRNA
+RF00904	miRNA
+RF00905	miRNA
+RF00906	miRNA
+RF00907	miRNA
+RF00908	miRNA
+RF00909	miRNA
+RF00910	miRNA
+RF00911	miRNA
+RF00912	miRNA
+RF00914	miRNA
+RF00915	miRNA
+RF00917	miRNA
+RF00918	miRNA
+RF00919	miRNA
+RF00920	miRNA
+RF00921	miRNA
+RF00922	miRNA
+RF00925	miRNA
+RF00926	miRNA
+RF00927	miRNA
+RF00928	miRNA
+RF00929	miRNA
+RF00931	miRNA
+RF00932	miRNA
+RF00933	miRNA
+RF00934	miRNA
+RF00935	miRNA
+RF00936	miRNA
+RF00937	miRNA
+RF00939	miRNA
+RF00940	miRNA
+RF00941	miRNA
+RF00942	miRNA
+RF00943	miRNA
+RF00945	miRNA
+RF00946	miRNA
+RF00947	miRNA
+RF00948	miRNA
+RF00949	miRNA
+RF00950	miRNA
+RF00951	miRNA
+RF00952	miRNA
+RF00953	miRNA
+RF00954	miRNA
+RF00955	miRNA
+RF00956	miRNA
+RF00957	miRNA
+RF00958	miRNA
+RF00959	miRNA
+RF00960	miRNA
+RF00961	miRNA
+RF00962	miRNA
+RF00963	miRNA
+RF00964	miRNA
+RF00965	miRNA
+RF00966	miRNA
+RF00967	miRNA
+RF00968	miRNA
+RF00969	miRNA
+RF00970	miRNA
+RF00971	miRNA
+RF00972	miRNA
+RF00973	miRNA
+RF00974	miRNA
+RF00975	miRNA
+RF00976	miRNA
+RF00977	miRNA
+RF00978	miRNA
+RF00979	miRNA
+RF00980	miRNA
+RF00981	miRNA
+RF00983	miRNA
+RF00984	miRNA
+RF00985	miRNA
+RF00986	miRNA
+RF00987	miRNA
+RF00988	miRNA
+RF00989	miRNA
+RF00990	miRNA
+RF00991	miRNA
+RF00992	miRNA
+RF00993	miRNA
+RF00994	miRNA
+RF00995	miRNA
+RF00996	miRNA
+RF00997	miRNA
+RF00998	miRNA
+RF00999	miRNA
+RF01000	miRNA
+RF01001	miRNA
+RF01002	miRNA
+RF01003	miRNA
+RF01004	miRNA
+RF01005	miRNA
+RF01006	miRNA
+RF01007	miRNA
+RF01008	miRNA
+RF01009	miRNA
+RF01010	miRNA
+RF01011	miRNA
+RF01012	miRNA
+RF01013	miRNA
+RF01014	miRNA
+RF01015	miRNA
+RF01016	miRNA
+RF01017	miRNA
+RF01018	miRNA
+RF01019	miRNA
+RF01020	miRNA
+RF01021	miRNA
+RF01022	miRNA
+RF01023	miRNA
+RF01024	miRNA
+RF01025	miRNA
+RF01026	miRNA
+RF01027	miRNA
+RF01028	miRNA
+RF01029	miRNA
+RF01030	miRNA
+RF01031	miRNA
+RF01032	miRNA
+RF01033	miRNA
+RF01034	miRNA
+RF01035	miRNA
+RF01036	miRNA
+RF01037	miRNA
+RF01038	miRNA
+RF01039	miRNA
+RF01040	miRNA
+RF01041	miRNA
+RF01042	miRNA
+RF01043	miRNA
+RF01044	miRNA
+RF01045	miRNA
+RF01059	miRNA
+RF01061	miRNA
+RF01063	miRNA
+RF01064	miRNA
+RF01117	miRNA
+RF01314	miRNA
+RF01413	miRNA
+RF01895	miRNA
+RF01896	miRNA
+RF01897	miRNA
+RF01898	miRNA
+RF01899	miRNA
+RF01900	miRNA
+RF01901	miRNA
+RF01902	miRNA
+RF01903	miRNA
+RF01910	miRNA
+RF01911	miRNA
+RF01912	miRNA
+RF01913	miRNA
+RF01914	miRNA
+RF01915	miRNA
+RF01916	miRNA
+RF01917	miRNA
+RF01918	miRNA
+RF01919	miRNA
+RF01920	miRNA
+RF01921	miRNA
+RF01922	miRNA
+RF01923	miRNA
+RF01924	miRNA
+RF01925	miRNA
+RF01926	miRNA
+RF01927	miRNA
+RF01936	miRNA
+RF01937	miRNA
+RF01938	miRNA
+RF01939	miRNA
+RF01940	miRNA
+RF01941	miRNA
+RF01942	miRNA
+RF01943	miRNA
+RF01944	miRNA
+RF01945	miRNA
+RF01996	miRNA
+RF01997	miRNA
+RF02000	miRNA
+RF02002	miRNA
+RF02006	miRNA
+RF02007	miRNA
+RF02008	miRNA
+RF02009	miRNA
+RF02010	miRNA
+RF02011	miRNA
+RF02013	miRNA
+RF02014	miRNA
+RF02015	miRNA
+RF02016	miRNA
+RF02017	miRNA
+RF02018	miRNA
+RF02019	miRNA
+RF02020	miRNA
+RF02021	miRNA
+RF02022	miRNA
+RF02023	miRNA
+RF02024	miRNA
+RF02025	miRNA
+RF02026	miRNA
+RF02027	miRNA
+RF02028	miRNA
+RF02061	miRNA
+RF02092	miRNA
+RF02093	miRNA
+RF02094	miRNA
+RF02095	miRNA
+RF02096	miRNA
+RF02097	miRNA
+RF02214	miRNA
+RF02244	miRNA
+RF02245	miRNA
+RF02254	miRNA
+RF00001	rRNA
+RF00002	rRNA
+RF01118	rRNA
+RF01960	rRNA
+RF00177	rRNA
+RF01959	rRNA
+RF00003	snRNA
+RF00004	snRNA
+RF00007	snRNA
+RF00012	snRNA
+RF00015	snRNA
+RF00016	snRNA
+RF00020	snRNA
+RF00026	snRNA
+RF00045	snRNA
+RF00046	snRNA
+RF00049	snRNA
+RF00054	snRNA
+RF00055	snRNA
+RF00056	snRNA
+RF00065	snRNA
+RF00066	snRNA
+RF00067	snRNA
+RF00068	snRNA
+RF00069	snRNA
+RF00070	snRNA
+RF00071	snRNA
+RF00072	snRNA
+RF00085	snRNA
+RF00086	snRNA
+RF00087	snRNA
+RF00088	snRNA
+RF00089	snRNA
+RF00090	snRNA
+RF00091	snRNA
+RF00092	snRNA
+RF00093	snRNA
+RF00095	snRNA
+RF00096	snRNA
+RF00097	snRNA
+RF00099	snRNA
+RF00105	snRNA
+RF00108	snRNA
+RF00132	snRNA
+RF00133	snRNA
+RF00134	snRNA
+RF00135	snRNA
+RF00136	snRNA
+RF00137	snRNA
+RF00138	snRNA
+RF00139	snRNA
+RF00142	snRNA
+RF00145	snRNA
+RF00147	snRNA
+RF00149	snRNA
+RF00150	snRNA
+RF00151	snRNA
+RF00152	snRNA
+RF00153	snRNA
+RF00154	snRNA
+RF00155	snRNA
+RF00156	snRNA
+RF00157	snRNA
+RF00158	snRNA
+RF00159	snRNA
+RF00160	snRNA
+RF00181	snRNA
+RF00186	snRNA
+RF00187	snRNA
+RF00188	snRNA
+RF00189	snRNA
+RF00190	snRNA
+RF00191	snRNA
+RF00200	snRNA
+RF00201	snRNA
+RF00202	snRNA
+RF00203	snRNA
+RF00204	snRNA
+RF00205	snRNA
+RF00206	snRNA
+RF00208	snRNA
+RF00211	snRNA
+RF00212	snRNA
+RF00213	snRNA
+RF00217	snRNA
+RF00218	snRNA
+RF00221	snRNA
+RF00231	snRNA
+RF00263	snRNA
+RF00264	snRNA
+RF00265	snRNA
+RF00266	snRNA
+RF00267	snRNA
+RF00268	snRNA
+RF00270	snRNA
+RF00271	snRNA
+RF00272	snRNA
+RF00273	snRNA
+RF00274	snRNA
+RF00275	snRNA
+RF00276	snRNA
+RF00277	snRNA
+RF00278	snRNA
+RF00279	snRNA
+RF00280	snRNA
+RF00281	snRNA
+RF00282	snRNA
+RF00283	snRNA
+RF00284	snRNA
+RF00285	snRNA
+RF00286	snRNA
+RF00287	snRNA
+RF00288	snRNA
+RF00289	snRNA
+RF00291	snRNA
+RF00292	snRNA
+RF00293	snRNA
+RF00294	snRNA
+RF00295	snRNA
+RF00296	snRNA
+RF00300	snRNA
+RF00301	snRNA
+RF00302	snRNA
+RF00303	snRNA
+RF00304	snRNA
+RF00305	snRNA
+RF00306	snRNA
+RF00307	snRNA
+RF00309	snRNA
+RF00310	snRNA
+RF00311	snRNA
+RF00312	snRNA
+RF00313	snRNA
+RF00314	snRNA
+RF00315	snRNA
+RF00316	snRNA
+RF00317	snRNA
+RF00318	snRNA
+RF00319	snRNA
+RF00320	snRNA
+RF00321	snRNA
+RF00322	snRNA
+RF00323	snRNA
+RF00324	snRNA
+RF00325	snRNA
+RF00326	snRNA
+RF00327	snRNA
+RF00328	snRNA
+RF00329	snRNA
+RF00330	snRNA
+RF00331	snRNA
+RF00332	snRNA
+RF00333	snRNA
+RF00334	snRNA
+RF00335	snRNA
+RF00336	snRNA
+RF00337	snRNA
+RF00338	snRNA
+RF00339	snRNA
+RF00340	snRNA
+RF00341	snRNA
+RF00342	snRNA
+RF00343	snRNA
+RF00344	snRNA
+RF00345	snRNA
+RF00348	snRNA
+RF00349	snRNA
+RF00350	snRNA
+RF00351	snRNA
+RF00352	snRNA
+RF00353	snRNA
+RF00355	snRNA
+RF00356	snRNA
+RF00357	snRNA
+RF00358	snRNA
+RF00359	snRNA
+RF00360	snRNA
+RF00361	snRNA
+RF00377	snRNA
+RF00392	snRNA
+RF00393	snRNA
+RF00394	snRNA
+RF00396	snRNA
+RF00397	snRNA
+RF00398	snRNA
+RF00399	snRNA
+RF00400	snRNA
+RF00401	snRNA
+RF00402	snRNA
+RF00403	snRNA
+RF00404	snRNA
+RF00405	snRNA
+RF00406	snRNA
+RF00407	snRNA
+RF00408	snRNA
+RF00409	snRNA
+RF00410	snRNA
+RF00411	snRNA
+RF00412	snRNA
+RF00413	snRNA
+RF00414	snRNA
+RF00415	snRNA
+RF00416	snRNA
+RF00417	snRNA
+RF00418	snRNA
+RF00419	snRNA
+RF00420	snRNA
+RF00421	snRNA
+RF00422	snRNA
+RF00423	snRNA
+RF00424	snRNA
+RF00425	snRNA
+RF00426	snRNA
+RF00427	snRNA
+RF00428	snRNA
+RF00429	snRNA
+RF00430	snRNA
+RF00431	snRNA
+RF00432	snRNA
+RF00438	snRNA
+RF00439	snRNA
+RF00440	snRNA
+RF00441	snRNA
+RF00443	snRNA
+RF00471	snRNA
+RF00472	snRNA
+RF00473	snRNA
+RF00474	snRNA
+RF00475	snRNA
+RF00476	snRNA
+RF00477	snRNA
+RF00478	snRNA
+RF00479	snRNA
+RF00482	snRNA
+RF00488	snRNA
+RF00492	snRNA
+RF00493	snRNA
+RF00494	snRNA
+RF00509	snRNA
+RF00526	snRNA
+RF00527	snRNA
+RF00528	snRNA
+RF00529	snRNA
+RF00530	snRNA
+RF00531	snRNA
+RF00532	snRNA
+RF00533	snRNA
+RF00535	snRNA
+RF00536	snRNA
+RF00537	snRNA
+RF00538	snRNA
+RF00539	snRNA
+RF00540	snRNA
+RF00541	snRNA
+RF00542	snRNA
+RF00543	snRNA
+RF00544	snRNA
+RF00545	snRNA
+RF00546	snRNA
+RF00548	snRNA
+RF00553	snRNA
+RF00554	snRNA
+RF00560	snRNA
+RF00561	snRNA
+RF00562	snRNA
+RF00563	snRNA
+RF00564	snRNA
+RF00565	snRNA
+RF00566	snRNA
+RF00567	snRNA
+RF00568	snRNA
+RF00569	snRNA
+RF00570	snRNA
+RF00571	snRNA
+RF00572	snRNA
+RF00573	snRNA
+RF00574	snRNA
+RF00575	snRNA
+RF00576	snRNA
+RF00577	snRNA
+RF00578	snRNA
+RF00579	snRNA
+RF00580	snRNA
+RF00581	snRNA
+RF00582	snRNA
+RF00584	snRNA
+RF00586	snRNA
+RF00588	snRNA
+RF00591	snRNA
+RF00592	snRNA
+RF00593	snRNA
+RF00594	snRNA
+RF00598	snRNA
+RF00599	snRNA
+RF00600	snRNA
+RF00601	snRNA
+RF00602	snRNA
+RF00603	snRNA
+RF00604	snRNA
+RF00606	snRNA
+RF00607	snRNA
+RF00608	snRNA
+RF00609	snRNA
+RF00610	snRNA
+RF00611	snRNA
+RF00612	snRNA
+RF00613	snRNA
+RF00614	snRNA
+RF00618	snRNA
+RF00619	snRNA
+RF01119	snRNA
+RF01120	snRNA
+RF01121	snRNA
+RF01122	snRNA
+RF01123	snRNA
+RF01124	snRNA
+RF01125	snRNA
+RF01126	snRNA
+RF01127	snRNA
+RF01128	snRNA
+RF01129	snRNA
+RF01130	snRNA
+RF01131	snRNA
+RF01132	snRNA
+RF01133	snRNA
+RF01134	snRNA
+RF01135	snRNA
+RF01136	snRNA
+RF01137	snRNA
+RF01138	snRNA
+RF01139	snRNA
+RF01140	snRNA
+RF01141	snRNA
+RF01142	snRNA
+RF01143	snRNA
+RF01144	snRNA
+RF01145	snRNA
+RF01146	snRNA
+RF01147	snRNA
+RF01148	snRNA
+RF01149	snRNA
+RF01150	snRNA
+RF01151	snRNA
+RF01152	snRNA
+RF01153	snRNA
+RF01155	snRNA
+RF01156	snRNA
+RF01157	snRNA
+RF01158	snRNA
+RF01159	snRNA
+RF01160	snRNA
+RF01161	snRNA
+RF01162	snRNA
+RF01163	snRNA
+RF01164	snRNA
+RF01165	snRNA
+RF01166	snRNA
+RF01167	snRNA
+RF01168	snRNA
+RF01169	snRNA
+RF01170	snRNA
+RF01171	snRNA
+RF01172	snRNA
+RF01173	snRNA
+RF01174	snRNA
+RF01175	snRNA
+RF01176	snRNA
+RF01177	snRNA
+RF01178	snRNA
+RF01179	snRNA
+RF01180	snRNA
+RF01181	snRNA
+RF01182	snRNA
+RF01183	snRNA
+RF01184	snRNA
+RF01185	snRNA
+RF01186	snRNA
+RF01188	snRNA
+RF01189	snRNA
+RF01190	snRNA
+RF01191	snRNA
+RF01192	snRNA
+RF01193	snRNA
+RF01194	snRNA
+RF01195	snRNA
+RF01196	snRNA
+RF01197	snRNA
+RF01198	snRNA
+RF01199	snRNA
+RF01200	snRNA
+RF01201	snRNA
+RF01202	snRNA
+RF01203	snRNA
+RF01204	snRNA
+RF01205	snRNA
+RF01206	snRNA
+RF01207	snRNA
+RF01208	snRNA
+RF01209	snRNA
+RF01210	snRNA
+RF01211	snRNA
+RF01212	snRNA
+RF01213	snRNA
+RF01214	snRNA
+RF01215	snRNA
+RF01216	snRNA
+RF01218	snRNA
+RF01219	snRNA
+RF01220	snRNA
+RF01221	snRNA
+RF01222	snRNA
+RF01223	snRNA
+RF01224	snRNA
+RF01225	snRNA
+RF01226	snRNA
+RF01227	snRNA
+RF01228	snRNA
+RF01229	snRNA
+RF01230	snRNA
+RF01231	snRNA
+RF01232	snRNA
+RF01233	snRNA
+RF01234	snRNA
+RF01235	snRNA
+RF01236	snRNA
+RF01237	snRNA
+RF01238	snRNA
+RF01239	snRNA
+RF01240	snRNA
+RF01241	snRNA
+RF01242	snRNA
+RF01243	snRNA
+RF01244	snRNA
+RF01245	snRNA
+RF01246	snRNA
+RF01247	snRNA
+RF01248	snRNA
+RF01249	snRNA
+RF01250	snRNA
+RF01251	snRNA
+RF01252	snRNA
+RF01253	snRNA
+RF01254	snRNA
+RF01255	snRNA
+RF01256	snRNA
+RF01257	snRNA
+RF01258	snRNA
+RF01259	snRNA
+RF01260	snRNA
+RF01261	snRNA
+RF01262	snRNA
+RF01263	snRNA
+RF01264	snRNA
+RF01265	snRNA
+RF01266	snRNA
+RF01267	snRNA
+RF01268	snRNA
+RF01269	snRNA
+RF01270	snRNA
+RF01271	snRNA
+RF01272	snRNA
+RF01273	snRNA
+RF01274	snRNA
+RF01275	snRNA
+RF01276	snRNA
+RF01277	snRNA
+RF01278	snRNA
+RF01279	snRNA
+RF01280	snRNA
+RF01281	snRNA
+RF01283	snRNA
+RF01284	snRNA
+RF01285	snRNA
+RF01286	snRNA
+RF01287	snRNA
+RF01288	snRNA
+RF01289	snRNA
+RF01290	snRNA
+RF01291	snRNA
+RF01292	snRNA
+RF01293	snRNA
+RF01294	snRNA
+RF01295	snRNA
+RF01296	snRNA
+RF01297	snRNA
+RF01298	snRNA
+RF01299	snRNA
+RF01300	snRNA
+RF01301	snRNA
+RF01302	snRNA
+RF01303	snRNA
+RF01304	snRNA
+RF01305	snRNA
+RF01306	snRNA
+RF01307	snRNA
+RF01308	snRNA
+RF01309	snRNA
+RF01310	snRNA
+RF01311	snRNA
+RF01312	snRNA
+RF01420	snRNA
+RF01421	snRNA
+RF01422	snRNA
+RF01423	snRNA
+RF01424	snRNA
+RF01425	snRNA
+RF01426	snRNA
+RF01427	snRNA
+RF01428	snRNA
+RF01429	snRNA
+RF01430	snRNA
+RF01431	snRNA
+RF01432	snRNA
+RF01433	snRNA
+RF01434	snRNA
+RF01435	snRNA
+RF01436	snRNA
+RF01437	snRNA
+RF01438	snRNA
+RF01439	snRNA
+RF01440	snRNA
+RF01441	snRNA
+RF01442	snRNA
+RF01443	snRNA
+RF01444	snRNA
+RF01445	snRNA
+RF01446	snRNA
+RF01447	snRNA
+RF01448	snRNA
+RF01449	snRNA
+RF01450	snRNA
+RF01451	snRNA
+RF01452	snRNA
+RF01498	snRNA
+RF01499	snRNA
+RF01500	snRNA
+RF01501	snRNA
+RF01505	snRNA
+RF01506	snRNA
+RF01507	snRNA
+RF01509	snRNA
+RF01511	snRNA
+RF01513	snRNA
+RF01514	snRNA
+RF01515	snRNA
+RF01516	snRNA
+RF01522	snRNA
+RF01523	snRNA
+RF01524	snRNA
+RF01525	snRNA
+RF01526	snRNA
+RF01531	snRNA
+RF01532	snRNA
+RF01533	snRNA
+RF01534	snRNA
+RF01535	snRNA
+RF01536	snRNA
+RF01537	snRNA
+RF01538	snRNA
+RF01539	snRNA
+RF01540	snRNA
+RF01541	snRNA
+RF01542	snRNA
+RF01543	snRNA
+RF01544	snRNA
+RF01545	snRNA
+RF01546	snRNA
+RF01547	snRNA
+RF01548	snRNA
+RF01549	snRNA
+RF01550	snRNA
+RF01551	snRNA
+RF01552	snRNA
+RF01553	snRNA
+RF01554	snRNA
+RF01555	snRNA
+RF01556	snRNA
+RF01557	snRNA
+RF01558	snRNA
+RF01559	snRNA
+RF01560	snRNA
+RF01561	snRNA
+RF01562	snRNA
+RF01563	snRNA
+RF01564	snRNA
+RF01565	snRNA
+RF01566	snRNA
+RF01567	snRNA
+RF01568	snRNA
+RF01569	snRNA
+RF01570	snRNA
+RF01572	snRNA
+RF01573	snRNA
+RF01574	snRNA
+RF01575	snRNA
+RF01576	snRNA
+RF01583	snRNA
+RF01584	snRNA
+RF01585	snRNA
+RF01586	snRNA
+RF01587	snRNA
+RF01588	snRNA
+RF01589	snRNA
+RF01590	snRNA
+RF01591	snRNA
+RF01592	snRNA
+RF01593	snRNA
+RF01594	snRNA
+RF01595	snRNA
+RF01596	snRNA
+RF01597	snRNA
+RF01598	snRNA
+RF01599	snRNA
+RF01600	snRNA
+RF01601	snRNA
+RF01602	snRNA
+RF01603	snRNA
+RF01604	snRNA
+RF01605	snRNA
+RF01606	snRNA
+RF01607	snRNA
+RF01608	snRNA
+RF01609	snRNA
+RF01610	snRNA
+RF01611	snRNA
+RF01612	snRNA
+RF01613	snRNA
+RF01614	snRNA
+RF01615	snRNA
+RF01617	snRNA
+RF01618	snRNA
+RF01620	snRNA
+RF01621	snRNA
+RF01622	snRNA
+RF01624	snRNA
+RF01625	snRNA
+RF01626	snRNA
+RF01627	snRNA
+RF01628	snRNA
+RF01629	snRNA
+RF01630	snRNA
+RF01631	snRNA
+RF01632	snRNA
+RF01633	snRNA
+RF01634	snRNA
+RF01635	snRNA
+RF01636	snRNA
+RF01637	snRNA
+RF01638	snRNA
+RF01639	snRNA
+RF01640	snRNA
+RF01641	snRNA
+RF01642	snRNA
+RF01644	snRNA
+RF01645	snRNA
+RF01646	snRNA
+RF01647	snRNA
+RF01648	snRNA
+RF01649	snRNA
+RF01650	snRNA
+RF01651	snRNA
+RF01652	snRNA
+RF01653	snRNA
+RF01654	snRNA
+RF01655	snRNA
+RF01658	snRNA
+RF01659	snRNA
+RF01660	snRNA
+RF01661	snRNA
+RF01662	snRNA
+RF01664	snRNA
+RF01802	snRNA
+RF01829	snRNA
+RF01844	snRNA
+RF01846	snRNA
+RF01847	snRNA
+RF01848	snRNA
+RF01860	snRNA
+RF01861	snRNA
+RF01862	snRNA
+RF01863	snRNA
+RF01864	snRNA
+RF01866	snRNA
+RF02163	snRNA
+RF00014	sRNA
+RF00018	sRNA
+RF00021	sRNA
+RF00034	sRNA
+RF00035	sRNA
+RF00057	sRNA
+RF00077	sRNA
+RF00078	sRNA
+RF00079	sRNA
+RF00081	sRNA
+RF00082	sRNA
+RF00083	sRNA
+RF00084	sRNA
+RF00101	sRNA
+RF00110	sRNA
+RF00111	sRNA
+RF00112	sRNA
+RF00113	sRNA
+RF00115	sRNA
+RF00116	sRNA
+RF00117	sRNA
+RF00118	sRNA
+RF00119	sRNA
+RF00121	sRNA
+RF00122	sRNA
+RF00124	sRNA
+RF00125	sRNA
+RF00126	sRNA
+RF00128	sRNA
+RF00166	sRNA
+RF00195	sRNA
+RF00368	sRNA
+RF00369	sRNA
+RF00370	sRNA
+RF00371	sRNA
+RF00372	sRNA
+RF00378	sRNA
+RF00444	sRNA
+RF00505	sRNA
+RF00519	sRNA
+RF00615	sRNA
+RF00616	sRNA
+RF01116	sRNA
+RF01385	sRNA
+RF01386	sRNA
+RF01387	sRNA
+RF01388	sRNA
+RF01389	sRNA
+RF01390	sRNA
+RF01391	sRNA
+RF01392	sRNA
+RF01393	sRNA
+RF01394	sRNA
+RF01395	sRNA
+RF01396	sRNA
+RF01397	sRNA
+RF01398	sRNA
+RF01399	sRNA
+RF01400	sRNA
+RF01401	sRNA
+RF01402	sRNA
+RF01403	sRNA
+RF01404	sRNA
+RF01405	sRNA
+RF01406	sRNA
+RF01407	sRNA
+RF01408	sRNA
+RF01409	sRNA
+RF01410	sRNA
+RF01411	sRNA
+RF01412	sRNA
+RF01457	sRNA
+RF01459	sRNA
+RF01460	sRNA
+RF01461	sRNA
+RF01462	sRNA
+RF01463	sRNA
+RF01464	sRNA
+RF01465	sRNA
+RF01466	sRNA
+RF01467	sRNA
+RF01468	sRNA
+RF01469	sRNA
+RF01470	sRNA
+RF01471	sRNA
+RF01472	sRNA
+RF01473	sRNA
+RF01474	sRNA
+RF01476	sRNA
+RF01477	sRNA
+RF01478	sRNA
+RF01479	sRNA
+RF01487	sRNA
+RF01488	sRNA
+RF01489	sRNA
+RF01492	sRNA
+RF01493	sRNA
+RF01494	sRNA
+RF01496	sRNA
+RF01503	sRNA
+RF01504	sRNA
+RF01512	sRNA
+RF01519	sRNA
+RF01520	sRNA
+RF01521	sRNA
+RF01527	sRNA
+RF01528	sRNA
+RF01529	sRNA
+RF01530	sRNA
+RF01571	sRNA
+RF01578	sRNA
+RF01579	sRNA
+RF01580	sRNA
+RF01581	sRNA
+RF01582	sRNA
+RF01619	sRNA
+RF01623	sRNA
+RF01643	sRNA
+RF01656	sRNA
+RF01663	sRNA
+RF01665	sRNA
+RF01668	sRNA
+RF01669	sRNA
+RF01670	sRNA
+RF01671	sRNA
+RF01672	sRNA
+RF01673	sRNA
+RF01674	sRNA
+RF01675	sRNA
+RF01676	sRNA
+RF01677	sRNA
+RF01678	sRNA
+RF01679	sRNA
+RF01680	sRNA
+RF01681	sRNA
+RF01682	sRNA
+RF01683	sRNA
+RF01684	sRNA
+RF01685	sRNA
+RF01686	sRNA
+RF01687	sRNA
+RF01690	sRNA
+RF01691	sRNA
+RF01693	sRNA
+RF01694	sRNA
+RF01696	sRNA
+RF01698	sRNA
+RF01699	sRNA
+RF01700	sRNA
+RF01701	sRNA
+RF01702	sRNA
+RF01703	sRNA
+RF01705	sRNA
+RF01706	sRNA
+RF01710	sRNA
+RF01712	sRNA
+RF01714	sRNA
+RF01718	sRNA
+RF01719	sRNA
+RF01722	sRNA
+RF01723	sRNA
+RF01728	sRNA
+RF01732	sRNA
+RF01742	sRNA
+RF01757	sRNA
+RF01762	sRNA
+RF01775	sRNA
+RF01781	sRNA
+RF01782	sRNA
+RF01783	sRNA
+RF01784	sRNA
+RF01789	sRNA
+RF01791	sRNA
+RF01793	sRNA
+RF01796	sRNA
+RF01808	sRNA
+RF01810	sRNA
+RF01812	sRNA
+RF01814	sRNA
+RF01815	sRNA
+RF01816	sRNA
+RF01817	sRNA
+RF01818	sRNA
+RF01819	sRNA
+RF01820	sRNA
+RF01821	sRNA
+RF01822	sRNA
+RF01823	sRNA
+RF01827	sRNA
+RF01828	sRNA
+RF01858	sRNA
+RF01867	sRNA
+RF02029	sRNA
+RF02030	sRNA
+RF02031	sRNA
+RF02049	sRNA
+RF02050	sRNA
+RF02051	sRNA
+RF02052	sRNA
+RF02053	sRNA
+RF02054	sRNA
+RF02055	sRNA
+RF02056	sRNA
+RF02057	sRNA
+RF02059	sRNA
+RF02060	sRNA
+RF02062	sRNA
+RF02063	sRNA
+RF02064	sRNA
+RF02065	sRNA
+RF02066	sRNA
+RF02067	sRNA
+RF02070	sRNA
+RF02071	sRNA
+RF02072	sRNA
+RF02073	sRNA
+RF02074	sRNA
+RF02075	sRNA
+RF02077	sRNA
+RF02078	sRNA
+RF02079	sRNA
+RF02080	sRNA
+RF02081	sRNA
+RF02082	sRNA
+RF02099	sRNA
+RF02100	sRNA
+RF02151	sRNA
+RF02221	sRNA
+RF02222	sRNA
+RF02223	sRNA
+RF02224	sRNA
+RF02225	sRNA
+RF02226	sRNA
+RF02227	sRNA
+RF02228	sRNA
+RF02230	sRNA
+RF02231	sRNA
+RF02232	sRNA
+RF02233	sRNA
+RF02234	sRNA
+RF02235	sRNA
+RF02236	sRNA
+RF02237	sRNA
+RF02238	sRNA
+RF02239	sRNA
+RF02240	sRNA
+RF02241	sRNA
+RF02242	sRNA
+RF02243	sRNA
+RF02268	sRNA
+RF02269	sRNA
+RF00127	sRNA
+RF01852	tRNA
+RF00005	tRNA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterReadsByCount.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,116 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2010-01
+#Modified:
+#Description:  
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","o=s","mark:s","h");
+if (!(defined $opts{i} and defined $opts{o}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $mark=defined $opts{'mark'} ? $opts{'mark'} : "Sample";
+my @mark=split /\#/,$mark;
+
+open OUT,">$opts{o}";
+open IN,"<$opts{i}";
+my %hash;my %reads;
+while (my $aline=<IN>) {
+	chomp $aline;
+	my $seq=<IN>;
+	chomp $seq;
+	if($aline=~/:([\d|_]+)_x(\d+)$/){
+		if ($2>3) {
+			my @ss=split/_/,$1;
+			for (my $i=0;$i<@ss;$i++) {
+				$hash{length($seq)}[$i]++ if($ss[$i]>0);
+				$hash{length($seq)}[$i] +=0 if($ss[$i]==0);
+				$reads{length($seq)}[$i]+=$ss[$i];
+			}
+			print OUT "$aline\n$seq\n";
+		}		
+	}
+}
+close IN;
+close OUT;
+
+my $dir=dirname($opts{'o'});
+chdir $dir;
+my $lengthfile=$dir."/reads_length_distribution_after_count_filter.txt";
+open OUT, ">$lengthfile";
+open R,">$dir/length_distribution_after_count_filter.R";
+
+print OUT "Tags length\t@mark\n";
+
+my $samNo=@mark;
+my $avalue="";
+my @length=sort{$a<=>$b} keys %hash;
+foreach  (@length) {
+	print OUT $_,"\t@{$hash{$_}}\n";
+	my $vv=join ", ",@{$hash{$_}};
+	$avalue .="$vv,";
+}
+$avalue =~s/,$//;
+my $lengths=join ",",@length;
+my $marks=join "\",\"",@mark;
+
+print R  "a<-c($avalue)
+b<-matrix(a,ncol=$samNo,byrow=T)
+cl<-colors()
+names=c($lengths)
+legends=c(\"$marks\")
+png(\"Tags_length_after_count_filter.png\",width=800,height=600)
+barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Tags Length Distribution After Count Filter\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\")
+abline(h=0)
+dev.off()
+
+";
+$avalue="";
+print OUT "\nReads length\t@mark\n";
+foreach  (@length) {
+	print OUT $_,"\t@{$reads{$_}}\n";
+	my $vv=join ", ", @{$reads{$_}};
+	$avalue .= "$vv,";
+}
+$avalue =~s/,$//;
+
+print R  "a<-c($avalue)\n
+b<-matrix(a,ncol=$samNo,byrow=T)
+
+png(\"Reads_length_after_count_filter.png\",width=800,height=600)
+barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Reads Length Distribution After Count Filter\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\")
+abline(h=0)
+dev.off()
+
+";
+close OUT;
+close R;
+
+system ("R CMD BATCH $dir/length_distribution_after_count_filter.R");
+
+#system ("rm $dir/length_distribution.R");
+#system ("rm $dir/length_distribution.Rout");
+#system ("rm $dir/.RData");
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o  -min -max -mark
+options:
+
+-i input file
+-o output file
+-mark string #sample name eg: samA#samB#samC
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/filterReadsByLength.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,121 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2010-01
+#Modified:
+#Description:  
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","min=i","max=i","o=s","mark:s","h");
+if (!(defined $opts{i} and defined $opts{o} and defined $opts{min} and defined $opts{max}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $mark=defined $opts{'mark'} ? $opts{'mark'} : "Sample";
+my @mark=split /,/,$mark;
+
+
+open OUT,">$opts{o}";
+open IN,"<$opts{i}";
+my %hash;my %reads;
+while (my $aline=<IN>) {
+	chomp $aline;
+	my $seq=<IN>;
+	chomp $seq;
+	
+	if($aline=~/:([\d|_]+)_x(\d+)$/){
+		my @ss=split/_/,$1;
+		for (my $i=0;$i<@ss;$i++) {
+			$hash{length($seq)}[$i]++ if($ss[$i]>0);
+			$hash{length($seq)}[$i] +=0 if($ss[$i]==0);
+			$reads{length($seq)}[$i]+=$ss[$i];
+		}
+	}
+	#else{$reads{length($seq)}+=1;}
+	if (length ($seq)>=$opts{'min'}  && length ($seq) <=$opts{'max'}) {
+		print OUT "$aline\n$seq\n";
+	}
+}
+close IN;
+close OUT;
+
+my $dir=dirname($opts{'o'});
+chdir $dir;
+my $lengthfile=$dir."/reads_length_distribution.txt";
+open OUT, ">$lengthfile";
+open R,">$dir/length_distribution.R";
+
+print OUT "Tags length\t@mark\n";
+
+my $samNo=@mark;
+my $avalue="";
+my @length=sort{$a<=>$b} keys %hash;
+foreach  (@length) {
+	print OUT $_,"\t@{$hash{$_}}\n";
+	my $vv=join ", ",@{$hash{$_}};
+	$avalue .="$vv,";
+}
+$avalue =~s/,$//;
+my $lengths=join ",",@length;
+my $marks=join "\",\"",@mark;
+
+print R  "a<-c($avalue)
+b<-matrix(a,ncol=$samNo,byrow=T)
+cl<-colors()
+names=c($lengths)
+legends=c(\"$marks\")
+png(\"Tags_length.png\",width=800,height=600)
+barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Tags Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\")
+abline(h=0)
+dev.off()
+
+";
+$avalue="";
+print OUT "\nReads length\t@mark\n";
+foreach  (@length) {
+	print OUT $_,"\t@{$reads{$_}}\n";
+	my $vv=join ", ", @{$reads{$_}};
+	$avalue .= "$vv,";
+}
+$avalue =~s/,$//;
+
+print R  "a<-c($avalue)\n
+b<-matrix(a,ncol=$samNo,byrow=T)
+
+png(\"Reads_length.png\",width=800,height=600)
+barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Reads Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\")
+abline(h=0)
+dev.off()
+
+";
+close OUT;
+close R;
+
+#system ("R CMD BATCH $dir/length_distribution.R");
+
+#system ("rm $dir/length_distribution.R");
+#system ("rm $dir/length_distribution.Rout");
+#system ("rm $dir/.RData");
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o  -min -max -mark
+options:
+
+-i input file
+-o output file
+-min reads min length.
+-max reads max length.
+-mark string #sample name eg: samA,samB,samC
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_genelist.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: chentt
+#Email:
+#Date: 2012-4-6
+#Modified:
+#Description:
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","o=s","h");
+if (!(defined $opts{i} and defined $opts{o}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+open IN,"<$opts{i}";
+open OUT ,">$opts{o}";
+print OUT "#ID\tchr\tstart\tend\tstrand\n";
+my $n=1;
+my %gene1;
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\#/);
+	my @tmp=split/\t/,$aline;
+	my $ID;
+	if ($tmp[2] eq "gene") {
+		$tmp[0]=~s/Chr\./Chr/;
+		#$tmp[0]=~s/Chr/chr/;
+		my @infor=split/;/,$tmp[8];
+		for (my $i=0;$i<@infor ;$i++) {
+			if ($infor[$i]=~/Alias\=(\S+)$/) {
+				$ID=$1;
+				last;
+			}
+			else {
+				$ID="unknown$n";
+				$n++;
+			}
+		}
+		#$gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand]
+		push @{$gene1{$ID}},[$tmp[3],$tmp[4],$tmp[0]];
+		print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n";
+	}
+}
+close IN;
+close OUT;
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o -h
+options:
+-i input cluster file
+-o output file
+-h help
+USAGE
+exit(1);
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/html_miRPlant.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,273 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-5-29
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","format=s","o=s","h");
+if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath);
+my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir);
+open IN,"<$opts{i}";
+$config=<IN>; chomp $config;
+$prepath=<IN>; chomp $prepath;
+$knownpath=<IN>; chomp $knownpath;
+$rfampath=<IN>;chomp $rfampath;
+$genomepath=<IN>; chomp $genomepath;
+$novelpath=<IN>; chomp $novelpath;
+close IN;
+my @tmp=split/\//,$prepath;
+$predir=$tmp[-1];
+@tmp=split/\//,$rfampath;
+$rfamdir=$tmp[-1];
+@tmp=split/\//,$knownpath;
+$knowndir=$tmp[-1];
+@tmp=split/\//,$genomepath;
+$genomedir=$tmp[-1];
+@tmp=split/\//,$novelpath;
+$noveldir=$tmp[-1];
+
+my $dir=dirname($opts{'o'});
+
+open OUT ,">$opts{'o'}";
+print OUT "<HTML>\n  <HEAD>\n  <TITLE> Analysis Report </TITLE>\n </HEAD>
+ <BODY bgcolor=\"lightgray\">\n  <h1 align=\"center\">\n    <font face=\"ºÚÌå\">\n	<b>Small RNA Analysis Report</b>\n  </font>\n  </h1>
+  <h2>1. Sequence No. and quality</h2>
+  <h3>1.1 Sequece No.</h3>
+";
+
+### raw data no
+open IN,"<$config";
+my @files;my @marks; my @rawNo;
+while (my $aline=<IN>) {
+	chomp $aline;
+	my @tmp=split/\t/,$aline;
+	push @files,$tmp[0];
+	
+	my $no=`less $tmp[0] |wc -l `;
+	chomp $no;
+	if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") {
+		$no=$no/4;
+	}
+	else{
+		$no=$no/2;
+	}
+	push @rawNo,$no;
+
+	push @marks,$tmp[1];
+}
+close IN;
+
+### preprocess 
+unless ($prepath=~/\/$/) {
+	$prepath .="/";
+}
+
+my @trimNo;my @collapse;
+my $collapsefile=$prepath."collapse_reads.fa";
+open IN,"<$collapsefile";
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @lng=split/_/,$1;
+	for (my $i=0;$i<@lng;$i++) {
+		if ($lng[$i]>0) {
+			$trimNo[$i] +=$lng[$i];
+			$collapse[$i] ++;
+		}
+	}
+}
+close IN;
+
+my @cleanR;my @cleanT;
+my $clean=$prepath."collapse_reads_19_28.fa";
+open IN,"<$clean";
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @lng=split/_/,$1;
+	for (my $i=0;$i<@lng;$i++) {
+		if ($lng[$i]>0) {
+			$cleanR[$i] +=$lng[$i];
+			$cleanT[$i] ++;
+		}
+	}
+}
+close IN;
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">
+<th>&nbsp;</th>
+";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Raw Reads No. </th>
+";
+foreach  (@rawNo) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Reads No. After Trimed 3\' adapter </th>
+";
+foreach  (@trimNo) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Unique Tags No. </th>
+";
+foreach  (@collapse) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT  "</tr>
+<tr align=\"center\">
+<th align=\"left\">Clean Reads No. </th>
+";
+foreach  (@cleanR) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Clean Tags No. </th>
+";
+foreach  (@cleanT) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>\n</table>";
+print OUT "<p>
+Note:<br />
+The raw data file path is: <b>$files[0]</b><br />
+";
+for (my $i=1;$i<@files;$i++) {
+	print OUT "&nbsp;&nbsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;<b>$files[$i]</b><br />";
+}
+print OUT "The collapsed file path is: <b>$collapsefile</b><br />
+The clean data file path is: <b>$clean</b><br />
+</p>
+<h2> 1. Sequence length count</h2>
+";
+print OUT "\n";
+
+my $length=$prepath."length.html";
+open IN,"<$length";
+while (my $aline=<IN>) {
+	chomp $aline;
+	print OUT "$aline\n";
+}
+
+print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a>
+</p>
+";
+
+#### rfam
+unless ($rfampath=~/\/$/) {
+	$rfampath .="/";
+}
+print OUT "<h2>2. Rfam non-miRNA annotation</h2>
+<h3>2.1 Reads count</h3>
+<table border=\"1\">
+<tr align=\"center\">
+";
+
+my @rfamR; my @rfamT;
+my $tag=1;
+open IN,"<$dir/rfam_non-miRNA_annotation.txt";
+while (my $aline=<IN>) {
+	chomp $aline;
+	$tag=0 if($aline=~/tags\s+number/);
+	next if($aline=~/^\#/);
+	next if($aline=~/^\s*$/);
+	my @tmp=split/\s+/,$aline;
+	if($tag == 1){push @rfamR,[@tmp];}
+	else{push @rfamT,[@tmp];}
+}
+close IN;
+
+
+print OUT "<th>RNA Name</th>\n";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@rfamR;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$rfamR[$i][0]</th>
+	";
+	for (my $j=1;$j<@{$rfamR[$i]} ;$j++) {
+	print OUT "<td> $rfamR[$i][$j]</td>\n";
+	}
+}
+
+print OUT "</tr>\n</table>
+  <h3>2.2 Tags count</h3>
+  <table border=\"1\">
+   <tr align=\"center\">
+   <th>RNA Name</th>\n";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@rfamT;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$rfamT[$i][0]</th>
+	";
+	for (my $j=1;$j<@{$rfamT[$i]} ;$j++) {
+	print OUT "<td> $rfamT[$i][$j]</td>\n";
+	}
+}
+print OUT "</tr>\n</table>
+<p>Note:<br />The rfam mapping results is: <b>$rfampath</b>";
+print OUT "<b>rfam_mapped.bwt</b></p>
+  <h2>3. MicroRNA result</h2>
+  <h3>3.1 known microRNA</h3>
+  <p>The known microRNA express list: <a href=\"./known_microRNA_express.txt\"> known_microRNA_express.txt</a><br/>
+  The known microRNA alngment file: <a href=\"./known_microRNA_express.aln\"> known_microRNA_express.aln</a><br/>
+  The known moRs file: <a href=\"./known_microRNA_express.moRs\"> known_microRNA_express.moRs</a><br/>
+  The known microRNA mature sequence file: <a href=\"./known_microRNA_mature.fa\"> known_microRNA_mature.fa</a><br/>
+  The knowm microRNA precursor sequence file: <a href=\"./known_microRNA_precursor.fa\"> known_microRNA_precursor.fa</a>
+  </p>
+
+  <h3>3.2 novel microRNA</h3>
+  <p>The novel microRNA prediction file:<a href=\"./microRNA_prediction.mrd\"> microRNA_prediction.mrd</a><br/>
+  The novel microRNA express list: <a href=\"./novel_microRNA_express.txt\"> novel_microRNA_express.txt</a><br/>
+  The novel microRNA mature sequence file: <a href=\"./novel_microRNA_mature.fa\"> novel_microRNA_mature.fa</a><br/>
+  The novel microRNA precursor sequence file: <a href=\"./novel_microRNA_precursor.fa\"> novel_microRNA_precursor.fa</a>
+  </p>
+";
+
+
+
+print OUT "
+ </BODY>
+</HTML>
+";
+close OUT;
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -o
+options:
+-o output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/html_preprocess.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,388 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-5-29
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","format=s","min=i","max=i","o=s","h");
+if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+my ($config,$prepath,$rfampath,$knownpath,$genomepath,$novelpath);
+my ($predir,$rfamdir,$knowndir,$genomedir,$noveldir);
+open IN,"<$opts{i}";
+$config=<IN>; chomp $config;
+$prepath=<IN>; chomp $prepath;
+$genomepath=<IN>; chomp $genomepath;
+$rfampath=<IN>;
+close IN;
+my @tmp=split/\//,$prepath;
+$predir=$tmp[-1];
+@tmp=split/\//,$genomepath;
+$genomedir=$tmp[-1];
+
+my $dir=dirname($opts{'o'});
+
+open OUT ,">$opts{'o'}";
+print OUT "<HTML>\n  <HEAD>\n  <TITLE> Analysis Report </TITLE>\n </HEAD>
+ <BODY bgcolor=\"lightgray\">\n  <h1 align=\"center\">\n    <font face=\"ºÚÌå\">\n	<b>Preprocess Report</b>\n  </font>\n  </h1>
+  <h2>1. Sequence No. and quality</h2>
+  <h3>1.1 Sequece No.</h3>
+";
+
+### raw data no
+open IN,"<$config";
+my @files;my @marks; my @rawNo;
+while (my $aline=<IN>) {
+	chomp $aline;
+	my @tmp=split/\t/,$aline;
+	push @files,$tmp[0];
+	
+	my $no=`less $tmp[0] |wc -l `;
+	chomp $no;
+	if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") {
+		$no=$no/4;
+	}
+	else{
+		$no=$no/2;
+	}
+	push @rawNo,$no;
+
+	push @marks,$tmp[1];
+}
+close IN;
+
+### preprocess 
+unless ($prepath=~/\/$/) {
+	$prepath .="/";
+}
+
+my @trimNo;my @collapse;
+my $collapsefile=$prepath."collapse_reads.fa";
+open IN,"<$collapsefile";
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @lng=split/_/,$1;
+	for (my $i=0;$i<@lng;$i++) {
+		if ($lng[$i]>0) {
+			$trimNo[$i] +=$lng[$i];
+			$collapse[$i] ++;
+		}
+	}
+}
+close IN;
+
+my @cleanR;my @cleanT;
+my $clean=$prepath."collapse_reads_$opts{min}_$opts{max}.fa";
+open IN,"<$clean";
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @lng=split/_/,$1;
+	for (my $i=0;$i<@lng;$i++) {
+		if ($lng[$i]>0) {
+			$cleanR[$i] +=$lng[$i];
+			$cleanT[$i] ++;
+		}
+	}
+}
+close IN;
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">
+<th>&nbsp;</th>
+";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Raw Reads No. </th>
+";
+foreach  (@rawNo) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Reads No. After Trimed 3\' adapter </th>
+";
+foreach  (@trimNo) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Unique Tags No. </th>
+";
+foreach  (@collapse) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT  "</tr>
+<tr align=\"center\">
+<th align=\"left\">Clean Reads No. </th>
+";
+foreach  (@cleanR) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Clean Tags No. </th>
+";
+foreach  (@cleanT) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>\n</table>";
+print OUT "<p>
+Note:<br />
+The raw data file path is: <b>$files[0]</b><br />
+";
+for (my $i=1;$i<@files;$i++) {
+	print OUT "&nbsp;&nbsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;<b>$files[$i]</b><br />";
+}
+print OUT "The collapsed file path is: <b>$collapsefile</b><br />
+The clean data file path is: <b>$clean</b><br />
+</p>
+<h2> 1. Sequence length count</h2>
+<h3> 1.1 Reads length count </h3>
+";
+print OUT "\n";
+
+my (%length); my $key="Tags Length";
+open IN,"<$prepath/reads_length_distribution.txt";
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\s*$/);
+	if ($aline=~/^Reads/) { $key="Reads Length";}
+	my @tmp=split/\t/,$aline;
+	my @array=split/\s/,$tmp[1];
+	push @{$length{$key}},[$tmp[0],@array];
+}
+close IN;
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">";
+my $hashkey="Reads Length";
+foreach  (@{$length{$hashkey}[0]}) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>";
+
+for (my $i=1;$i<@{$length{$hashkey}};$i++) {
+	print OUT "<tr align=\"center\">
+	<th >$length{$hashkey}[$i][0] </th>
+	";
+	for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
+		print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
+	}
+	print OUT "</tr>\n";
+}
+print OUT "</table>\n";
+
+print OUT "<h3> 1.2 Tags length count </h3>";
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">";
+$hashkey="Tags Length";
+foreach  (@{$length{$hashkey}[0]}) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>";
+
+for (my $i=1;$i<@{$length{$hashkey}};$i++) {
+	print OUT "<tr align=\"center\">
+	<th > $length{$hashkey}[$i][0] </th>
+	";
+	for(my $j=1;$j<@{$length{$hashkey}[$i]};$j++) {
+		print OUT "<td> $length{$hashkey}[$i][$j] </td>\n";
+	}
+	print OUT "</tr>\n";
+}
+
+print OUT "</table>\n";
+
+print OUT "<h2> 2. Sequence length distribution </h2>";
+my $length=$prepath."length.html";
+open IN,"<$length";
+while (my $aline=<IN>) {
+	chomp $aline;
+	print OUT "$aline\n";
+}
+
+#print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution.txt\"> length file</a>
+#</p>
+#";
+
+
+
+
+####genome map
+#unless ($genomedir=~/\/$/) {
+#	$genomedir .="/";
+#}
+
+print OUT "<h2>2. Genome Alignment Result</h2>
+<h3>2.1 Mapping count</h3>
+";
+
+open IN,"<$genomepath/genome_mapped.fa";
+my (@gread,@gtag);
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @sss=split/_/,$1;
+	for (my $i=0;$i<@sss;$i++) {
+		if ($sss[$i]>0) {
+			$gread[$i] +=$sss[$i];
+			$gtag[$i] ++;
+		}
+	}
+}
+close IN;
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">
+<th>&nbsp;</th>
+";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Genome Mapped Reads No. </th>
+";
+foreach  (@gread) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Genome Mapped Reads Percent </th>
+";
+
+for (my $i=0;$i<@gread;$i++) {
+	my $per=sprintf ("%.2f",$gread[$i]/$cleanR[$i]*100);
+	print OUT "<td> $per\%</td>\n";
+}
+
+print  OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Genome Mapped Tags No. </th>
+";
+foreach  (@gtag) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Genome Mapped Tags Percent </th>
+";
+
+for (my $i=0;$i<@gtag;$i++) {
+	my $per=sprintf ("%.2f",$gtag[$i]/$cleanT[$i]*100);
+	print OUT "<td> $per\%</td>\n";
+}
+print OUT "</tr>\n</table>";
+print OUT "<p>
+Note:<br />
+The genome mapped bwt file path is: <b>$genomedir/genome_mapped.bwt</b><br />
+The genome mapped FASTA file path is: <b>$genomedir/genome_mapped.fa</b>
+<br />
+";
+
+
+
+#### rfam
+if(defined $rfampath && $rfampath=~/rfam_match/){
+chomp $rfampath;
+@tmp=split/\//,$rfampath;
+$rfamdir=$tmp[-1];
+
+unless ($rfampath=~/\/$/) {
+	$rfampath .="/";
+}
+print OUT "<h2>3. Rfam non-miRNA annotation</h2>
+<h3>3.1 Reads count</h3>
+<table border=\"1\">
+<tr align=\"center\">
+";
+
+my @rfamR; my @rfamT;
+my $tag=1;
+open IN,"<$dir/rfam_non-miRNA_annotation.txt";
+while (my $aline=<IN>) {
+	chomp $aline;
+	$tag=0 if($aline=~/tags\s+number/);
+	next if($aline=~/^\#/);
+	next if($aline=~/^\s*$/);
+	my @tmp=split/\s+/,$aline;
+	if($tag == 1){push @rfamR,[@tmp];}
+	else{push @rfamT,[@tmp];}
+}
+close IN;
+
+
+print OUT "<th>RNA Name</th>\n";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@rfamR;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$rfamR[$i][0]</th>
+	";
+	for (my $j=1;$j<@{$rfamR[$i]} ;$j++) {
+	print OUT "<td> $rfamR[$i][$j]</td>\n";
+	}
+}
+
+print OUT "</tr>\n</table>
+  <h3>3.2 Tags count</h3>
+  <table border=\"1\">
+   <tr align=\"center\">
+   <th>RNA Name</th>\n";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@rfamT;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$rfamT[$i][0]</th>
+	";
+	for (my $j=1;$j<@{$rfamT[$i]} ;$j++) {
+	print OUT "<td> $rfamT[$i][$j]</td>\n";
+	}
+}
+print OUT "</tr>\n</table>
+<p>Note:<br />The rfam mapping results is: <b>$rfampath</b>";
+print OUT "<b>rfam_mapped.bwt</b></p>
+";
+}
+
+
+print OUT "
+ </BODY>
+</HTML>
+";
+close OUT;
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -o
+options:
+-o output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/html_siRNA.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,788 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-5-29
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","format=s","o=s","h");
+if (!(defined $opts{o}  and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+my ($config,$prepath,$rfampath,$genomepath,$clusterpath,$annotatepath,$plotpath,$degpath);
+my ($predir,$rfamdir,$genomedir,$clusterdir,$annotatedir,$plotdir,$degdir);
+open IN,"<$opts{i}";
+$config=<IN>; chomp $config;
+$prepath=<IN>; chomp $prepath;
+$rfampath=<IN>;chomp $rfampath;
+$genomepath=<IN>; chomp $genomepath;
+$clusterpath=<IN>; chomp $clusterpath;
+$annotatepath=<IN>; chomp $annotatepath;
+$plotpath=<IN>; chomp $plotpath;
+my $deg_tag=1;
+if(eof){$deg_tag=0;}
+else{
+	$degpath=<IN>; chomp $degpath;
+}
+close IN;
+my @tmp=split/\//,$prepath;
+$predir=$tmp[-1];
+@tmp=split/\//,$rfampath;
+$rfamdir=$tmp[-1];
+@tmp=split/\//,$genomepath;
+$genomedir=$tmp[-1];
+@tmp=split/\//,$clusterpath;
+$clusterdir=$tmp[-1];
+@tmp=split/\//,$annotatepath;
+$annotatedir=$tmp[-1];
+@tmp=split/\//,$plotpath;
+$plotdir=$tmp[-1];
+
+my $dir=dirname($opts{'o'});
+
+open OUT ,">$opts{'o'}";
+print OUT "<HTML>\n  <HEAD>\n  <TITLE> Analysis Report </TITLE>\n </HEAD>
+ <BODY bgcolor=\"lightgray\">\n  <h1 align=\"center\">\n    <font face=\"ºÚÌå\">\n	<b>Small RNA Analysis Report</b>\n  </font>\n  </h1>
+  <h2>1. Sequence No. and quality</h2>
+  <h3>1.1 Sequece No.</h3>
+";
+
+### raw data no
+open IN,"<$config";
+my @files;my @marks; my @rawNo;
+while (my $aline=<IN>) {
+	chomp $aline;
+	my @tmp=split/\t/,$aline;
+	push @files,$tmp[0];
+	
+	my $no=`less $tmp[0] |wc -l `;
+	chomp $no;
+	if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") {
+		$no=$no/4;
+	}
+	else{
+		$no=$no/2;
+	}
+	push @rawNo,$no;
+
+	push @marks,$tmp[1];
+}
+close IN;
+
+### preprocess 
+unless ($prepath=~/\/$/) {
+	$prepath .="/";
+}
+
+my @trimNo;my @collapse;
+my $collapsefile=$prepath."collapse_reads.fa";
+open IN,"<$collapsefile";
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @lng=split/_/,$1;
+	for (my $i=0;$i<@lng;$i++) {
+		if ($lng[$i]>0) {
+			$trimNo[$i] +=$lng[$i];
+			$collapse[$i] ++;
+		}
+	}
+}
+close IN;
+
+my @cleanR;my @cleanT;
+my $clean=$prepath."collapse_reads_18-40.fa";
+open IN,"<$clean";
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @lng=split/_/,$1;
+	for (my $i=0;$i<@lng;$i++) {
+		if ($lng[$i]>0) {
+			$cleanR[$i] +=$lng[$i];
+			$cleanT[$i] ++;
+		}
+	}
+}
+close IN;
+
+my @filterR;my @filterT;
+my $filter=$prepath."collapse_reads_out.fa";
+open IN,"<$filter";
+while (my $aline=<IN>) {
+	chomp $aline;
+	<IN>;
+	$aline=~/:([\d|_]+)_x(\d+)$/;
+	my @lng=split/_/,$1;
+	for (my $i=0;$i<@lng;$i++) {
+		if ($lng[$i]>0) {
+			$filterR[$i] +=$lng[$i];
+			$filterT[$i] ++;
+		}
+	}
+}
+close IN;
+
+
+print OUT "<table border=\"1\">
+<tr align=\"center\">
+<th>&nbsp;</th>
+";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Raw Reads No. </th>
+";
+foreach  (@rawNo) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Reads No. After Trimed 3\' adapter </th>
+";
+foreach  (@trimNo) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Unique Tags No. </th>
+";
+foreach  (@collapse) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT  "</tr>
+<tr align=\"center\">
+<th align=\"left\">Clean Reads No. </th>
+";
+foreach  (@cleanR) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Clean Tags No. </th>
+";
+foreach  (@cleanT) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT  "</tr>
+<tr align=\"center\">
+<th align=\"left\">Filter Reads No. \(reads count \>3\) </th>
+";
+foreach  (@filterR) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>
+<tr align=\"center\">
+<th align=\"left\">Filter Tags No. \(reads count \>3\) </th>
+";
+foreach  (@filterT) {
+	print OUT "<td> $_ </td>\n";
+}
+print OUT "</tr>\n</table>";
+print OUT "<p>
+Note:<br />
+The raw data file path is: <b>$files[0]</b><br />
+";
+for (my $i=1;$i<@files;$i++) {
+	print OUT "&nbsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;<b>$files[$i]</b><br />";
+}
+print OUT "The collapsed file path is: <b>$collapsefile</b><br />
+The clean data file path is: <b>$clean</b><br />
+The filter (remain total reads>3) data file path is: <b>$filter</b><br />
+</p>
+<h2> 1. Sequence length count</h2>
+";
+print OUT "\n";
+
+my $length=$prepath."length.html";
+open IN,"<$length";
+while (my $aline=<IN>) {
+	chomp $aline;
+	print OUT "$aline\n";
+}
+close IN;
+
+print OUT "<p> Note:<br />The sequence length data: <a href=\"./$predir/reads_length_distribution_after_count_filter.txt\"> length file</a>
+</p>
+";
+
+#### rfam
+unless ($rfampath=~/\/$/) {
+	$rfampath .="/";
+}
+unless ($genomepath=~/\/$/) {
+	$genomepath .="/";
+}
+print OUT "<h2>2. Rfam non-miRNA annotation</h2>
+<h3>2.1 Reads count</h3>
+<table border=\"1\">
+<tr align=\"center\">
+";
+
+my @rfamR; my @rfamT;
+my $tag=1;
+open IN,"<$dir/rfam_match/rfam_non-miRNA_annotation.txt";
+while (my $aline=<IN>) {
+	chomp $aline;
+	$tag=0 if($aline=~/tags\s+number/);
+	next if($aline=~/^\#/);
+	next if($aline=~/^\s*$/);
+	my @tmp=split/\s+/,$aline;
+	if($tag == 1){push @rfamR,[@tmp];}
+	else{push @rfamT,[@tmp];}
+}
+close IN;
+
+
+print OUT "<th>RNA Name</th>\n";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@rfamR;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$rfamR[$i][0]</th>
+	";
+	for (my $j=1;$j<@{$rfamR[$i]} ;$j++) {
+	print OUT "<td> $rfamR[$i][$j]</td>\n";
+	}
+}
+
+print OUT "</tr>\n</table>
+  <h3>2.2 Tags count</h3>
+  <table border=\"1\">
+   <tr align=\"center\">
+   <th>RNA Name</th>\n";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@rfamT;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$rfamT[$i][0]</th>
+	";
+	for (my $j=1;$j<@{$rfamT[$i]} ;$j++) {
+	print OUT "<td> $rfamT[$i][$j]</td>\n";
+	}
+}
+print OUT "</tr>\n</table>
+<p>Note:<br />The rfam mapping results is: <b>$rfampath</b>";
+print OUT "<b>rfam_mapped.bwt</b></p>";
+
+open IN,"<$dir/genome_match/genome_mapped.bwt";
+my @genome_r_u;
+my @genome_r_m;
+my @genome_t_u;
+my @genome_t_m;
+my $tags_map_number=0;
+while (my $aline=<IN>) {
+	chomp $aline;
+	my @temp=split/\t/,$aline;
+	if ($temp[6]==0) {
+		$aline=~/:([\d|_]+)_x(\d+)/;
+		my @lng=split/_/,$1;
+		for (my $i=0;$i<@lng;$i++) {
+			if ($lng[$i]>0) {
+				$genome_r_u[$i] +=$lng[$i];
+				$genome_t_u[$i] ++;
+			}
+		}
+		$tags_map_number++;
+	}
+	if ($temp[6]>0) {
+		$aline=~/:([\d|_]+)_x(\d+)/;
+		my @lng=split/_/,$1;
+		for (my $i=0;$i<@lng;$i++) {
+			if ($lng[$i]>0) {
+				$genome_r_m[$i] +=$lng[$i];
+				$genome_t_m[$i] ++;
+			}
+		}
+		for (my $i=0;$i<$temp[6] ;$i++) {
+			my $next=<IN>;
+		}
+		$tags_map_number++;
+	}
+}
+close IN;
+#<h3>3.1 Reads count</h3>
+#<table border=\"1\">
+#<tr align=\"center\">
+print OUT "<h2>3. genome mapping result</h2>
+<table border=\"1\">
+<tr align=\"center\">
+<th align=\"left\">Map</th>\n
+";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">Uniq Map Reads No.</th>
+";
+for (my $i=0;$i<@genome_r_u ;$i++) {
+	print OUT "<td> $genome_r_u[$i]</td>\n";
+}
+
+print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">Uniq Map Tags No.</th>
+";
+for (my $i=0;$i<@genome_t_u ;$i++) {
+	print OUT "<td> $genome_t_u[$i]</td>\n";
+}
+
+print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">Multiple Map Reads No.</th>
+";
+for (my $i=0;$i<@genome_r_m ;$i++) {
+	print OUT "<td> $genome_r_m[$i]</td>\n";
+}
+
+print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">Multiple Map Tags No.</th>
+";
+for (my $i=0;$i<@genome_t_m ;$i++) {
+	print OUT "<td> $genome_t_m[$i]</td>\n";
+}
+
+print OUT "</tr>\n</table>
+<p>Note:<br />The genome mapping results is: <b>$genomepath</b>";
+print OUT "<b>genome_mapped.bwt</b></p>";
+
+my $cluster="$clusterpath/sample_reads.cluster";
+my $cluster_number=`less $cluster |wc -l `;
+$cluster_number=$cluster_number-1;
+my (%cluster_length,@exp,@rpkm);
+my @exp_range=qw(0 \(0--10] \(10--100] \(100--1000] \(1000--10000] \(10000--100000] \(100000--**\));
+my @rpkm_range=qw(0 \(0--0.25] \(0.25--0.5] \(0.5--1] \(1.0-5.0] \(5--10] \(10--50] \(50--100] \(100--500] \(500--1000] \(1000--**]);
+
+open IN,"<$cluster";
+while (my $aline=<IN>) {
+	next if($aline=~/^\"/);
+	chomp $aline;
+	my @temp=split/\t/,$aline;
+	my @id=split/:|-/,$temp[0];
+	$cluster_length{$id[2]-$id[1]+1}++;
+	for (my $i=0;$i<@marks ;$i++) {
+		if ($temp[$i+3] == 0) {$exp[0][$i]++;}
+		elsif ($temp[$i+3]>0 && $temp[$i+3]<= 10  ){$exp[1][$i]++;}
+		elsif ($temp[$i+3]>10 && $temp[$i+3]<=100){$exp[2][$i]++;}
+		elsif ($temp[$i+3]>100 && $temp[$i+3]<=1000){$exp[3][$i]++;}
+		elsif ($temp[$i+3]>1000 && $temp[$i+3]<=10000){$exp[4][$i]++;}
+		elsif ($temp[$i+3]>10000 && $temp[$i+3]<=100000){$exp[5][$i]++;}
+		elsif ($temp[$i+3]>100000){$exp[6][$i]++;}
+	}
+}
+close IN;
+
+my $cluster_rpkm="$clusterpath/sample_rpkm.cluster";
+open IN,"<$cluster_rpkm";
+while (my $aline=<IN>) {
+	next if($aline=~/^\#/);
+	chomp $aline;
+	my @temp=split/\t/,$aline;
+	for (my $i=0;$i<@marks ;$i++) {
+		if ($temp[$i+3]==0) {$rpkm[0][$i]++;}
+		elsif($temp[$i+3]>0 && $temp[$i+3]<=0.25){$rpkm[1][$i]++;}
+		elsif($temp[$i+3]>0.25 && $temp[$i+3]<=0.5){$rpkm[2][$i]++;}
+		elsif($temp[$i+3]>0.5 && $temp[$i+3]<=1){$rpkm[3][$i]++;}
+		elsif($temp[$i+3]>1 && $temp[$i+3]<=5){$rpkm[4][$i]++;}
+		elsif($temp[$i+3]>5 && $temp[$i+3]<=10){$rpkm[5][$i]++;}
+		elsif($temp[$i+3]>10 && $temp[$i+3]<=50){$rpkm[6][$i]++;}
+		elsif($temp[$i+3]>50 && $temp[$i+3]<=100){$rpkm[7][$i]++;}
+		elsif($temp[$i+3]>100 && $temp[$i+3]<=500){$rpkm[8][$i]++;}
+		elsif($temp[$i+3]>500 && $temp[$i+3]<=1000){$rpkm[9][$i]++;}
+		else{$rpkm[10][$i]++;}
+	}
+}
+
+close IN;
+
+my $cluster_length_file="$clusterpath/cluster_length.txt";
+open LEN,">$cluster_length_file";
+print LEN "\#length\tcluster_number\n";
+foreach my $key (sort keys %cluster_length) {
+	print LEN "$key\t$cluster_length{$key}\n";
+}
+close LEN;
+print OUT "<h2>4. cluster result</h2>
+<h3>4.1 Cluster count</h3>
+<table border=\"1\">
+<tr align=\"center\">
+<th align=\"left\"> </th>
+<td>Merged samples</td></tr>
+<tr align=\"center\">
+<th align=\"left\">Tags number</th>
+<td>$tags_map_number</td></tr>
+<tr align=\"center\">
+<th align=\"left\">Cluster number</th>
+<td>$cluster_number</td></tr>\n</table>
+";
+
+print OUT "<h3>4.2 Cluster length</h3>
+<p> Note:<br />The clusters length data: <a href=\"./$clusterdir/cluster_length.txt\"> length file</a>
+</p>
+";
+print OUT "<h3>4.3 Quantify</h3>
+<table border=\"1\">
+<tr align=\"center\">
+<th align=\"left\">Reads Range</th>\n
+";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@exp_range;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$exp_range[$i]</th>
+	";
+	for (my $j=0;$j<@marks ;$j++) {
+		if (!(defined($exp[$i][$j]))) {
+			print OUT "<td> 0</td>\n";
+		}
+		else{print OUT "<td> $exp[$i][$j]</td>\n";}
+	}
+}
+print OUT "</tr>\n</table>";
+
+print OUT "\n<table border=\"1\">
+<tr align=\"center\">
+<th align=\"left\">RPKM Range</th>\n
+";
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+for (my $i=0;$i<@rpkm_range;$i++) {
+	print  OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">$rpkm_range[$i]</th>
+	";
+	for (my $j=0;$j<@marks ;$j++) {
+		if (!(defined($rpkm[$i][$j]))) {
+			print OUT "<td> 0</td>\n";
+		}
+		else{print OUT "<td> $rpkm[$i][$j]</td>\n";}
+	}
+}
+print OUT "</tr>\n</table>";
+
+my $annotate="$annotatepath/sample_c_p.anno";
+my (%posit,%repeat,%nat1,%nat2);
+my (@phase,@long,@repeat,@nat);
+for (my $j=0;$j<@marks ;$j++) {
+	$phase[$j]=0;
+	$long[$j]=0;
+	$repeat[$j]=0;
+	$nat[$j]=0;
+}
+
+my $class_anno=1;
+open ANNO,"<$annotate";
+while (my $aline=<ANNO>) {
+	chomp $aline;
+	my @temp=split/\t/,$aline;
+	if($aline=~/^\#/){
+		if (@temp != 10+@marks) {
+			$class_anno=0;
+		}
+		next;
+	}
+	for (my $i=3+@marks+$class_anno;$i<@temp;$i++) {
+		my @posit=split/\;/,$temp[$i];
+		for (my $j=0;$j<@marks ;$j++) {
+			if ($temp[3+$j]>0) {
+				$posit{$posit[0]}[$j]++;
+			}
+			else{
+				if (!(defined($posit{$posit[0]}[$j]))) {
+					$posit{$posit[0]}[$j]=0;
+				}
+			}
+		}
+	}
+	if ($class_anno) {
+		for (my $j=0;$j<@marks ;$j++) {
+			if ($temp[3+$j]>0) {
+				if ($temp[6] eq "phase") {
+					$phase[$j]++;
+				}
+				if ($temp[7] eq "long") {
+					$long[$j]++;
+				}
+				if ($temp[8] ne "\/") {
+					$repeat[$j]++;
+					my @rr=split/\;/,$temp[8];
+					foreach  (@rr) {
+						$repeat{$_}[$j]++;
+					}
+				}
+				if ($temp[9] ne "\/") {
+					$nat[$j]++;
+					my @nn1=split/\;/,$temp[9];
+					my @nn2=split/\;/,$temp[10];
+					for (my $k=0;$k<@nn1 ;$k++) {
+						$nat1{$nn1[$k]}[$j]++;
+						$nat2{$nn2[$k]}[$j]++;
+					}
+				}
+			}
+		}
+	}
+}
+close ANNO;
+
+print OUT "<h2>5. Cluster Annotate</h2>
+<h3>5.1 Cluster genome position annotate</h3>
+<table border=\"1\">
+<tr align=\"center\">
+<th align=\"left\">clusters number</th>\n
+";
+
+foreach  (@marks) {
+	print OUT "<th> $_ </th>\n";
+}
+foreach my $key (sort keys %posit) {
+	print  OUT "</tr>
+		<tr align=\"center\">
+		<th align=\"left\">$key</th>
+	";
+	foreach  (@{$posit{$key}}) {
+		print OUT "<td> $_</td>\n";
+	}
+}
+print OUT "</tr>\n</table>";
+print OUT "<p>
+Note:<br />
+One cluster mybe annotate to multiple genes<br />
+";
+
+if ($class_anno) {
+	print OUT "<h3>5.2 Cluster source mechanism annotate</h3>
+	<table border=\"1\">
+	<tr align=\"center\">
+	<th align=\"left\">clusters number</th>\n
+	";
+
+	foreach  (@marks) {
+		print OUT "<th> $_ </th>\n";
+	}
+	print OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">Phase</th>\n
+	";
+	foreach  (@phase) {
+		print OUT "<td> $_ </td>\n";
+	}
+
+	print OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">Long</th>\n
+	";
+	foreach  (@long) {
+		print OUT "<td> $_ </td>\n";
+	}
+
+	print OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">Repeat</th>\n
+	";
+	foreach  (@repeat) {
+		print OUT "<td> $_ </td>\n";
+	}
+
+	print OUT "</tr>
+	<tr align=\"center\">
+	<th align=\"left\">Nat</th>\n
+	";
+	foreach  (@nat) {
+		print OUT "<td> $_ </td>\n";
+	}
+	print OUT "</tr>\n</table>";
+
+	print OUT "<p>
+	Repeat subclass annotate:
+	";
+
+	print OUT "<table border=\"1\">
+	<tr align=\"center\">
+	<th align=\"left\">Repeat subclass</th>\n
+	";
+	foreach  (@marks) {
+		print OUT "<th> $_ </th>\n";
+	}
+
+	foreach my $key (sort keys %repeat) {
+		print  OUT "</tr>
+			<tr align=\"center\">
+			<th align=\"left\">$key</th>
+		";
+		for (my $i=0;$i<@marks ;$i++) {
+			if (defined($repeat{$key}[$i])) {
+				print OUT "<td> $repeat{$key}[$i] </td>\n";
+			}
+			else{print OUT "<td> 0 </td>\n";}
+		}
+	}
+	print OUT "</tr>\n</table>";
+
+
+	print OUT "<p>
+	Nat subclass1 annotate:
+	";
+
+	print OUT "<table border=\"1\">
+	<tr align=\"center\">
+	<th align=\"left\">Nat subclass1</th>\n
+	";
+	foreach  (@marks) {
+		print OUT "<th> $_ </th>\n";
+	}
+	foreach my $key (sort keys %nat1) {
+		print  OUT "</tr>
+			<tr align=\"center\">
+			<th align=\"left\">$key</th>
+		";
+		for (my $i=0;$i<@marks ;$i++) {
+			if (defined($nat1{$key}[$i])) {
+				print OUT "<td> $nat1{$key}[$i] </td>\n";
+			}
+			else{print OUT "<td> 0 </td>\n";}
+		}
+	}
+	print OUT "</tr>\n</table>";
+
+	print OUT "<p>
+	Nat subclass2 annotate:
+	";
+
+	print OUT "<table border=\"1\">
+	<tr align=\"center\">
+	<th align=\"left\">Nat subclass2</th>\n
+	";
+	foreach  (@marks) {
+		print OUT "<th> $_ </th>\n";
+	}
+	foreach my $key (sort keys %nat2) {
+		print  OUT "</tr>
+			<tr align=\"center\">
+			<th align=\"left\">$key</th>
+		";
+		for (my $i=0;$i<@marks ;$i++) {
+			if (defined($nat2{$key}[$i])) {
+				print OUT "<td> $nat2{$key}[$i] </td>\n";
+			}
+			else{print OUT "<td> 0 </td>\n";}
+		}
+	}
+	print OUT "</tr>\n</table>";
+	print OUT "<p>
+	Note:<br />
+	One cluster mybe annotate to multiple repeats or nats<br />
+	";
+}
+else {
+	print OUT "<h3>5.2 Cluster source mechanism annotate</h3>
+	<br />Do not do source mechanism annotate <br />";
+
+}
+
+print OUT "<h2>6. Graph of Clusters of all samples</h2> \n";
+
+my $plot=$plotpath."cluster.html";
+open IN,"<$plot";
+while (my $aline=<IN>) {
+	chomp $aline;
+	print OUT "$aline\n";
+}
+close IN;
+
+
+if ($deg_tag) {
+	my $deg_file=`ls $degpath`;
+	chomp $deg_file;
+	my @deg_file=split/\n/,$deg_file;
+	my %deg;
+	foreach  (@deg_file) {
+		my $output="$degpath/$_/output_score.txt";
+		open IN,"<$output";
+		$deg{$_}[0]=0;
+		$deg{$_}[1]=0;
+		$deg{$_}[2]=0;
+		while (my $aline=<IN>) {
+			next if ($aline=~/^\"/);
+			chomp $aline;
+			my @temp=split/\t/,$aline;
+			if ($temp[9] eq "TRUE") {
+				$deg{$_}[0]++;
+				if ($temp[4] >0) {
+					$deg{$_}[1]++;
+				}
+				if ($temp[4] <0) {
+					$deg{$_}[2]++;
+				}
+			}
+		}
+		close IN;
+	}
+
+	print OUT "<h2>7. DEG</h2>
+	<table border=\"1\">
+	<tr align=\"center\">
+	<th align=\"left\">Genes number</th>\n
+	<th> DEG </th>\n
+	<th> UP </th>\n
+	<th> DOWN </th>\n
+	";
+
+	foreach my $key (sort keys %deg) {
+		print  OUT "</tr>
+			<tr align=\"center\">
+			<th align=\"left\">$key</th>
+		";
+		for (my $i=0;$i<@{$deg{$key}} ;$i++) {
+			print OUT "<td> $deg{$key}[$i] </td>\n";
+		}
+	}
+	print OUT "</tr>\n</table>";
+}
+else{
+	print OUT "<h2>7. DEG</h2>
+	<br />Do not do DE clusters <br />";
+}
+
+print OUT "
+ </BODY>
+</HTML>
+";
+close OUT;
+
+
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -o
+options:
+-i 
+-format
+-o output file
+-h help
+USAGE
+exit(1);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/install_DEG.R	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,2 @@
+source("http://bioconductor.org/biocLite.R")
+biocLite("DEGseq")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matching.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,86 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2013/7/19
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","g=s","index:s","v:i","p:i","r:s","o=s","h");
+if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'o'};
+unless ($fileout=~/\/$/) {$fileout.="/";}
+my $genome=$opts{'g'};
+my $mis=defined $opts{'v'}? $opts{'v'} : 0;
+my $hits=defined $opts{'r'}? $opts{'r'} : 25;
+my $index=defined $opts{'index'} ? $opts{'index'} : "";
+my $threads=defined $opts{'p'} ? $opts{'p'} : 1;
+
+
+my $time=&Time();
+my $mapdir=$fileout."/genome_match";
+if(not -d $mapdir){
+	mkdir $mapdir;
+}
+chdir $mapdir;
+###check genome index
+if (-s $index.".1.ebwt") {
+}else{
+	`bowtie-build $genome $genome`;
+	$index="$genome";
+}
+
+### genome mapping
+`bowtie -v $mis -f -p $threads -m $hits -a --best --strata $index $filein --al genome_mapped.fa --un genome_not_mapped.fa --max genome_mapped_Mlimit.fa > genome_mapped.bwt  2> run.log`;
+
+#`convert_bowtie_to_blast.pl  genome_mapped.bwt genome_mapped.fa $genome > genome_mapped.bst`;
+
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day-$hour-$min-$sec");
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o
+options:
+-i input file# input reads fasta/fastq file
+-g input file# genome file
+-index file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+-v <int>           report end-to-end hits w/ <=v mismatches; ignore qualities,default 0;
+
+-p/--threads <int> number of alignment threads to launch (default: 1)
+
+-r int          a read is allowed to map up to this number of positions in the genome
+                default is 25 
+
+-o output directory 
+
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/miRDeep_plant.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,1622 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use Getopt::Std;
+#use RNA;
+
+
+################################# MIRDEEP #################################################
+
+################################## USAGE ##################################################
+
+
+my $usage=
+"$0 file_signature file_structure temp_out_directory
+
+This is the core algorithm of miRDeep. It takes as input a file in blastparsed format with
+information on the positions of reads aligned to potential precursor sequences (signature).
+It also takes as input an RNAfold output file, giving information on the sequence, structure
+and mimimum free energy of the potential precursor sequences.
+
+Extra arguments can be given. -s specifies a fastafile containing the known mature miRNA
+sequences that should be considered for conservation purposes. -t prints out the potential
+precursor sequences that do _not_ exceed the cut-off (default prints out the sequences that
+exceeds the cut-off). -u gives limited output, that is only the ids of the potential precursors
+that exceed the cut-off. -v varies the cut-off. -x is a sensitive option for Sanger sequences
+obtained through conventional cloning. -z consider the number of base pairings in the lower
+stems (this option is not well tested).
+
+-h print this usage
+-s fasta file with known miRNAs
+#-o temp directory ,maked befor running the program.
+-t print filtered
+-u limited output (only ids)
+-v cut-off (default 1)
+-x sensitive option for Sanger sequences
+-y use Randfold
+-z consider Drosha processing
+";
+
+
+
+
+
+############################################################################################
+
+################################### INPUT ##################################################
+
+
+#signature file in blast_parsed format
+my $file_blast_parsed=shift or die $usage;
+
+#structure file outputted from RNAfold
+my $file_struct=shift or die $usage;
+
+my $tmpdir=shift or die $usage;
+#options
+my %options=();
+getopts("hs:tuv:xyz",\%options);
+
+
+
+
+
+
+#############################################################################################
+
+############################# GLOBAL VARIABLES ##############################################
+
+
+#parameters
+my $nucleus_lng=11;
+
+my $score_star=3.9;
+my $score_star_not=-1.3;
+my $score_nucleus=7.63;
+my $score_nucleus_not=-1.17;
+my $score_randfold=1.37;
+my $score_randfold_not=-3.624;
+my $score_intercept=0.3;
+my @scores_stem=(-3.1,-2.3,-2.2,-1.6,-1.5,0.1,0.6,0.8,0.9,0.9,0);
+my $score_min=1;
+if($options{v}){$score_min=$options{v};}
+if($options{x}){$score_min=-5;}
+
+my $e=2.718281828;
+
+#hashes
+my %hash_desc;
+my %hash_seq;
+my %hash_struct;
+my %hash_mfe;
+my %hash_nuclei;
+my %hash_mirs;
+my %hash_query;
+my %hash_comp;
+my %hash_bp;
+
+#other variables
+my $subject_old;
+my $message_filter;
+my $message_score;
+my $lines;
+my $out_of_bound;
+
+
+
+##############################################################################################
+
+################################  MAIN  ###################################################### 
+
+
+#print help if that option is used
+if($options{h}){die $usage;}
+unless ($tmpdir=~/\/$/) {$tmpdir .="/";}
+if(!(-s $tmpdir)){mkdir $tmpdir;}
+$tmpdir .="TMP_DIR/";
+mkdir $tmpdir;
+
+#parse structure file outputted from RNAfold
+parse_file_struct($file_struct);
+
+#if conservation is scored, the fasta file of known miRNA sequences is parsed
+if($options{s}){create_hash_nuclei($options{s})};
+
+#parse signature file in blast_parsed format and resolve each potential precursor
+parse_file_blast_parsed($file_blast_parsed);
+#`rm -rf $tmpdir`;
+exit;
+
+
+
+
+##############################################################################################
+
+############################## SUBROUTINES ###################################################
+
+
+
+sub parse_file_blast_parsed{
+
+#    read through the signature blastparsed file, fills up a hash with information on queries
+#    (deep sequences) mapping to the current subject (potential precursor) and resolve each
+#    potential precursor in turn
+ 
+    my $file_blast_parsed=shift;
+    
+    open (FILE_BLAST_PARSED, "<$file_blast_parsed") or die "can not open $file_blast_parsed\n";
+    while (my $line=<FILE_BLAST_PARSED>){
+		if($line=~/^(\S+)\s+(\S+)\s+(\d+)\.+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\.+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(.+)$/){
+			my $query=$1;
+			my $query_lng=$2;
+			my $query_beg=$3;
+			my $query_end=$4;
+			my $subject=$5;
+			my $subject_lng=$6;
+			my $subject_beg=$7;
+			my $subject_end=$8;
+			my $e_value=$9;
+			my $pid=$10;
+			my $bitscore=$11;
+			my $other=$12;
+			
+			#if the new line concerns a new subject (potential precursor) then the old subject must be resolved
+			if($subject_old and $subject_old ne $subject){
+				resolve_potential_precursor();
+			}
+
+			#resolve the strand
+			my $strand=find_strand($other);
+
+			#resolve the number of reads that the deep sequence represents
+			my $freq=find_freq($query);
+
+			#read information of the query (deep sequence) into hash
+			$hash_query{$query}{"subject_beg"}=$subject_beg;
+			$hash_query{$query}{"subject_end"}=$subject_end;
+			$hash_query{$query}{"strand"}=$strand;
+			$hash_query{$query}{"freq"}=$freq;
+
+			#save the signature information
+			$lines.=$line;
+
+			$subject_old=$subject;
+		}
+    }
+    resolve_potential_precursor();
+}
+
+sub resolve_potential_precursor{
+    
+#    dissects the potential precursor in parts by filling hashes, and tests if it passes the
+#    initial filter and the scoring filter
+
+#    binary variable whether the potential precursor is still viable
+    my $ret=1;
+#print STDERR ">$subject_old\n";
+
+    fill_structure();
+#print STDERR "\%hash_bp",scalar keys %hash_bp,"\n";
+    fill_pri();
+#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
+
+    fill_mature();
+#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
+  
+    fill_star();
+#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
+
+    fill_loop();
+#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
+
+    fill_lower_flanks();
+#print STDERR "\%hash_comp",scalar keys %hash_comp,"\n";
+
+#    do_test_assemble();
+
+#    this is the actual classification
+    unless(pass_filtering_initial() and pass_threshold_score()){$ret=0;}
+
+    print_results($ret);
+    
+    reset_variables();
+    
+    return;
+    
+}
+
+
+
+sub print_results{
+
+    my $ret=shift;
+
+#    print out if the precursor is accepted and accepted precursors should be printed out
+#    or if the potential precursor is discarded and discarded potential precursors should
+#    be printed out
+	
+    if((!$options{t} and $ret) or ($options{t} and !$ret)){
+	#full output
+		unless($options{u}){
+			if($message_filter){print $message_filter;}
+			if($message_score){print $message_score;}
+			print_hash_comp();
+			print $lines,"\n\n";
+			return;
+		}
+		#limited output (only ids)
+		my $id=$hash_comp{"pri_id"};
+		print "$id\n";
+    }    
+}
+
+
+
+
+
+
+
+sub pass_threshold_score{
+
+#    this is the scoring
+
+    #minimum free energy of the potential precursor
+#    my $score_mfe=score_mfe($hash_comp{"pri_mfe"});
+    my $score_mfe=score_mfe($hash_comp{"pri_mfe"},$hash_comp{"pri_end"});
+
+    #count of reads that map in accordance with Dicer processing
+    my $score_freq=score_freq($hash_comp{"freq"});
+#print STDERR "score_mfe: $score_mfe\nscore_freq: $score_freq\n";
+
+    #basic score
+    my $score=$score_mfe+$score_freq;
+
+    #scoring of conserved nucleus/seed (optional)
+    if($options{s}){
+
+	#if the nucleus is conserved
+	if(test_nucleus_conservation()){
+
+	    #nucleus from position 2-8
+	    my $nucleus=substr($hash_comp{"mature_seq"},1,$nucleus_lng);
+
+	    #resolve DNA/RNA ambiguities
+	    $nucleus=~tr/[T]/[U]/;
+
+	    #print score contribution
+	    score_s("score_nucleus\t$score_nucleus");
+
+	    #print the ids of known miRNAs with same nucleus
+	    score_s("$hash_mirs{$nucleus}");
+#print STDERR "score_nucleus\t$score_nucleus\n";
+
+	    #add to score
+	    $score+=$score_nucleus;
+
+	#if the nucleus is not conserved
+	}else{
+	    #print (negative) score contribution
+	    score_s("score_nucleus\t$score_nucleus_not");
+
+	    #add (negative) score contribution
+	    $score+=$score_nucleus_not;
+	}
+    }
+   
+    #if the majority of potential star reads fall as expected from Dicer processing
+    if($hash_comp{"star_read"}){
+	score_s("score_star\t$score_star");
+#print STDERR "score_star\t$score_star\n";
+	$score+=$score_star;
+    }else{
+	score_s("score_star\t$score_star_not");
+#print STDERR "score_star_not\t$score_star_not\n";
+	$score+=$score_star_not;
+    }
+
+    #score lower stems for potential for Drosha recognition (highly optional)
+    if($options{z}){
+	my $stem_bp=$hash_comp{"stem_bp"};
+	my $score_stem=$scores_stem[$stem_bp];
+	$score+=$score_stem;
+	score_s("score_stem\t$score_stem");
+    }
+
+#print STDERR "score_intercept\t$score_intercept\n";
+
+    $score+=$score_intercept;
+
+    #score for randfold (optional)
+    if($options{y}){
+
+#	only calculate randfold value if it can make the difference between the potential precursor
+#	being accepted or discarded
+	if($score+$score_randfold>=$score_min and $score+$score_randfold_not<=$score_min){
+
+	    #randfold value<0.05
+	    if(test_randfold()){$score+=$score_randfold;score_s("score_randfold\t$score_randfold");}
+
+	    #randfold value>0.05
+	    else{$score+=$score_randfold_not;score_s("score_randfold\t$score_randfold_not");}
+	}
+    }
+
+    #round off values to one decimal
+    my $round_mfe=round($score_mfe*10)/10;
+    my $round_freq=round($score_freq*10)/10;
+    my $round=round($score*10)/10;
+
+    #print scores
+    score_s("score_mfe\t$round_mfe\nscore_freq\t$round_freq\nscore\t$round");
+ 
+    #return 1 if the potential precursor is accepted, return 0 if discarded
+    unless($score>=$score_min){return 0;}
+    return 1;
+}
+
+sub test_randfold{
+
+    #print sequence to temporary file, test randfold value, return 1 or 0
+
+#    print_file("pri_seq.fa",">pri_seq\n".$hash_comp{"pri_seq"});
+	#my $tmpfile=$tmpdir.$hash_comp{"pri_id"};
+	#open(FILE, ">$tmpfile");
+	#print FILE ">pri_seq\n",$hash_comp{"pri_seq"};
+	#close FILE;
+
+#	my $p_value=`randfold -s $tmpfile 999 | cut -f 3`;
+	#my $p1=`randfold -s $tmpfile 999 | cut -f 3`;
+	#my $p2=`randfold -s $tmpfile 999 | cut -f 3`;
+	my $p1=&randfold_pvalue($hash_comp{"pri_seq"},999);
+	my $p2=&randfold_pvalue($hash_comp{"pri_seq"},999);
+	my $p_value=($p1+$p2)/2;
+	wait;
+#    system "rm $tmpfile";
+
+    if($p_value<=0.05){return 1;}
+
+    return 0;
+}
+
+sub randfold_pvalue{
+	my $cpt_sup = 0;
+	my $cpt_inf = 0;
+	my $cpt_ega = 1;
+	
+	my ($seq,$number_of_randomizations)=@_;
+	#my $str =$seq;
+	#my $mfe = RNA::fold($seq,$str);
+		my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
+		my @rawfolds=split/\s+/,$rnafold;
+		my $str=$rawfolds[1];
+		my $mfe=$rawfolds[-1];
+		$mfe=~s/\(//;
+		$mfe=~s/\)//;
+
+	for (my $i=0;$i<$number_of_randomizations;$i++) {
+		$seq = shuffle_sequence_dinucleotide($seq);
+		#$str = $seq;
+	
+		#my $rand_mfe = RNA::fold($str,$str);
+		$rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
+		my @rawfolds=split/\s+/,$rnafold;
+		my $str=$rawfolds[1];
+		my $rand_mfe=$rawfolds[-1];
+		$rand_mfe=~s/\(//;
+		$rand_mfe=~s/\)//;
+	
+		if ($rand_mfe < $mfe) {
+			$cpt_inf++;
+		}
+		if ($rand_mfe == $mfe) {
+			$cpt_ega++;
+		}
+		if ($rand_mfe > $mfe) {		
+			$cpt_sup++;
+		}
+	}
+	
+	my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1);
+
+	#print "$name\t$mfe\t$proba\n";
+	return $proba;
+}
+
+sub shuffle_sequence_dinucleotide {
+
+	my ($str) = @_;
+
+	# upper case and convert to ATGC
+	$str = uc($str);
+	$str =~ s/U/T/g;
+	
+	my @nuc = ('A','T','G','C');
+	my $count_swap = 0;
+	# set maximum number of permutations
+	my $stop = length($str) * 10;
+	
+	while($count_swap < $stop) {
+
+		my @pos;
+
+		# look start and end letters
+		my $firstnuc = $nuc[int(rand 4)];
+		my $thirdnuc = $nuc[int(rand 4)];	
+
+		# get positions for matching nucleotides
+		for (my $i=0;$i<(length($str)-2);$i++) {
+			if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) {
+				push (@pos,($i+1));
+				$i++;
+			}
+		}
+	
+		# swap at random trinucleotides
+		my $max = scalar(@pos);
+		for (my $i=0;$i<$max;$i++) {
+			my $swap = int(rand($max));
+			if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) {			
+				$count_swap++;
+				my $w1 = substr($str,$pos[$i],1);
+				my $w2 = substr($str,$pos[$swap],1);			
+				substr($str,$pos[$i],1,$w2);
+				substr($str,$pos[$swap],1,$w1);		
+			}
+		}
+	}
+	return($str);	
+}
+
+sub test_nucleus_conservation{
+
+    #test if nucleus is identical to nucleus from known miRNA, return 1 or 0
+
+    my $nucleus=substr($hash_comp{"mature_seq"},1,$nucleus_lng);
+    $nucleus=~tr/[T]/[U]/;
+    if($hash_nuclei{$nucleus}){return 1;}
+    
+    return 0;
+}
+
+
+
+sub pass_filtering_initial{
+
+    #test if the structure forms a plausible hairpin
+    unless(pass_filtering_structure()){filter_p("structure problem"); return 0;}
+
+    #test if >90% of reads map to the hairpin in consistence with Dicer processing
+    unless(pass_filtering_signature()){filter_p("signature problem");return 0;}
+
+    return 1;
+
+}
+
+
+sub pass_filtering_signature{
+
+    #number of reads that map in consistence with Dicer processing
+    my $consistent=0;
+
+    #number of reads that map inconsistent with Dicer processing
+    my $inconsistent=0;
+   
+#   number of potential star reads map in good consistence with Drosha/Dicer processing
+#   (3' overhangs relative to mature product)
+    my $star_perfect=0;
+
+#   number of potential star reads that do not map in good consistence with 3' overhang
+    my $star_fuzzy=0;
+ 
+
+    #sort queries (deep sequences) by their position on the hairpin
+    my @queries=sort {$hash_query{$a}{"subject_beg"} <=> $hash_query{$b}{"subject_beg"}} keys %hash_query;
+
+    foreach my $query(@queries){
+
+		#number of reads that the deep sequence represents
+		unless(defined($hash_query{$query}{"freq"})){next;}
+		my $query_freq=$hash_query{$query}{"freq"};
+
+		#test which Dicer product (if any) the deep sequence corresponds to
+		my $product=test_query($query);
+
+		#if the deep sequence corresponds to a Dicer product, add to the 'consistent' variable
+		if($product){$consistent+=$query_freq;}
+
+		#if the deep sequence do not correspond to a Dicer product, add to the 'inconsistent' variable
+		else{$inconsistent+=$query_freq;}
+
+		#test a potential star sequence has good 3' overhang
+		if($product eq "star"){
+			if(test_star($query)){$star_perfect+=$query_freq;}
+			else{$star_fuzzy+=$query_freq;}
+		}
+    }
+
+#   if the majority of potential star sequences map in good accordance with 3' overhang
+#    score for the presence of star evidence
+    if($star_perfect>$star_fuzzy){$hash_comp{"star_read"}=1;}
+
+    #total number of reads mapping to the hairpin
+    my $freq=$consistent+$inconsistent;
+    $hash_comp{"freq"}=$freq;
+    unless($freq>0){filter_s("read frequency too low"); return 0;}
+
+    #unless >90% of the reads map in consistence with Dicer processing, the hairpin is discarded
+    my $inconsistent_fraction=$inconsistent/($inconsistent+$consistent);
+    unless($inconsistent_fraction<=0.1){filter_p("inconsistent\t$inconsistent\nconsistent\t$consistent"); return 0;}
+
+    #the hairpin is retained
+    return 1;
+}
+
+sub test_star{
+
+    #test if a deep sequence maps in good consistence with 3' overhang
+
+    my $query=shift;
+
+    #5' begin and 3' end positions
+    my $beg=$hash_query{$query}{"subject_beg"};
+    my $end=$hash_query{$query}{"subject_end"};
+
+    #the difference between observed and expected begin positions must be 0 or 1
+    my $offset=$beg-$hash_comp{"star_beg"};
+    if($offset==0 or $offset==1 or $offset==-1){return 1;}
+
+    return 0;
+}
+
+
+
+sub test_query{
+
+    #test if deep sequence maps in consistence with Dicer processing
+    
+    my $query=shift;
+
+    #begin, end, strand and read count
+    my $beg=$hash_query{$query}{"subject_beg"};
+    my $end=$hash_query{$query}{"subject_end"};
+    my $strand=$hash_query{$query}{"strand"};
+    my $freq=$hash_query{$query}{"freq"};
+
+    #should not be on the minus strand (although this has in fact anecdotally been observed for known miRNAs)
+    if($strand eq '-'){return 0;}
+
+    #the deep sequence is allowed to stretch 2 nt beyond the expected 5' end
+    my $fuzz_beg=2;
+    #the deep sequence is allowed to stretch 5 nt beyond the expected 3' end
+    my $fuzz_end=2;
+
+    #if in accordance with Dicer processing, return the type of Dicer product 
+    if(contained($beg,$end,$hash_comp{"mature_beg"}-$fuzz_beg,$hash_comp{"mature_end"}+$fuzz_end)){return "mature";}
+    if(contained($beg,$end,$hash_comp{"star_beg"}-$fuzz_beg,$hash_comp{"star_end"}+$fuzz_end)){return "star";}
+    if(contained($beg,$end,$hash_comp{"loop_beg"}-$fuzz_beg,$hash_comp{"loop_end"}+$fuzz_end)){return "loop";}
+  
+    #if not in accordance, return 0
+    return 0;
+}
+
+
+sub pass_filtering_structure{
+
+    #The potential precursor must form a hairpin with miRNA precursor-like characteristics
+
+    #return value
+    my $ret=1;
+
+    #potential mature, star, loop and lower flank parts must be identifiable
+    unless(test_components()){return 0;}
+
+    #no bifurcations
+    unless(no_bifurcations_precursor()){$ret=0;}
+
+    #minimum 14 base pairings in duplex
+    unless(bp_duplex()>=15){$ret=0;filter_s("too few pairings in duplex");}
+
+    #not more than 6 nt difference between mature and star length
+    unless(-6<diff_lng() and diff_lng()<6){$ret=0; filter_s("too big difference between mature and star length") }
+
+    return $ret;
+}
+
+
+
+
+
+
+sub test_components{
+
+    #tests whether potential mature, star, loop and lower flank parts are identifiable
+
+    unless($hash_comp{"mature_struct"}){
+	filter_s("no mature");
+#	print STDERR "no mature\n";
+	return 0;
+    }
+
+    unless($hash_comp{"star_struct"}){
+	filter_s("no star");
+#	print STDERR "no star\n";
+	return 0;
+    }
+
+    unless($hash_comp{"loop_struct"}){
+	filter_s("no loop");
+#	print STDERR "no loop\n";
+   	return 0;
+    }
+
+    unless($hash_comp{"flank_first_struct"}){
+	filter_s("no flanks");
+#print STDERR "no flanks_first_struct\n";
+   	return 0;
+    }
+     
+    unless($hash_comp{"flank_second_struct"}){
+	filter_s("no flanks");
+#	print STDERR "no flanks_second_struct\n";
+    	return 0;
+    }
+    return 1;
+}
+
+
+
+
+
+sub no_bifurcations_precursor{
+
+    #tests whether there are bifurcations in the hairpin
+
+    #assembles the potential precursor sequence and structure from the expected Dicer products
+    #this is the expected biological precursor, in contrast with 'pri_seq' that includes
+    #some genomic flanks on both sides
+
+    my $pre_struct;
+    my $pre_seq;
+    if($hash_comp{"mature_arm"} eq "first"){
+	$pre_struct.=$hash_comp{"mature_struct"}.$hash_comp{"loop_struct"}.$hash_comp{"star_struct"};
+	$pre_seq.=$hash_comp{"mature_seq"}.$hash_comp{"loop_seq"}.$hash_comp{"star_seq"};
+    }else{
+	$pre_struct.=$hash_comp{"star_struct"}.$hash_comp{"loop_struct"}.$hash_comp{"mature_struct"};
+	$pre_seq.=$hash_comp{"star_seq"}.$hash_comp{"loop_seq"}.$hash_comp{"mature_seq"};
+    }
+
+    #read into hash
+    $hash_comp{"pre_struct"}=$pre_struct;
+    $hash_comp{"pre_seq"}=$pre_seq;
+
+    #simple pattern matching checks for bifurcations
+    unless($pre_struct=~/^((\.|\()+..(\.|\))+)$/){
+	filter_s("bifurcation in precursor");
+#	print STDERR "bifurcation in precursor\n";
+	return 0;
+    }
+
+    return 1;
+}
+
+sub bp_precursor{
+ 
+    #total number of bps in the precursor
+ 
+    my $pre_struct=$hash_comp{"pre_struct"};
+
+    #simple pattern matching
+    my $pre_bps=0;
+    while($pre_struct=~/\(/g){
+	$pre_bps++;
+    }
+    return $pre_bps;
+}
+
+
+sub bp_duplex{
+
+    #total number of bps in the duplex
+
+    my $duplex_bps=0;
+    my $mature_struct=$hash_comp{"mature_struct"};
+
+    #simple pattern matching
+    while($mature_struct=~/(\(|\))/g){
+	$duplex_bps++;
+    }
+    return $duplex_bps;
+}
+
+sub diff_lng{
+
+    #find difference between mature and star lengths
+
+    my $mature_lng=length $hash_comp{"mature_struct"};
+    my $star_lng=length $hash_comp{"star_struct"};
+    my $diff_lng=$mature_lng-$star_lng;
+    return $diff_lng;
+}
+
+
+
+sub do_test_assemble{
+
+#    not currently used, tests if the 'pri_struct' as assembled from the parts (Dicer products, lower flanks)
+#    is identical to 'pri_struct' before disassembly into parts
+
+    my $assemble_struct;
+
+    if($hash_comp{"flank_first_struct"} and $hash_comp{"mature_struct"} and $hash_comp{"loop_struct"} and $hash_comp{"star_struct"} and $hash_comp{"flank_second_struct"}){
+	if($hash_comp{"mature_arm"} eq "first"){
+	    $assemble_struct.=$hash_comp{"flank_first_struct"}.$hash_comp{"mature_struct"}.$hash_comp{"loop_struct"}.$hash_comp{"star_struct"}.$hash_comp{"flank_second_struct"};
+	}else{
+	    $assemble_struct.=$hash_comp{"flank_first_struct"}.$hash_comp{"star_struct"}.$hash_comp{"loop_struct"}.$hash_comp{"mature_struct"}.$hash_comp{"flank_second_struct"};
+	}
+	unless($assemble_struct eq $hash_comp{"pri_struct"}){
+	    $hash_comp{"test_assemble"}=$assemble_struct;
+	    print_hash_comp();
+	}
+    }
+    return;
+ }
+
+
+
+sub fill_structure{
+
+    #reads the dot bracket structure into the 'bp' hash where each key and value are basepaired
+
+    my $struct=$hash_struct{$subject_old};
+    my $lng=length $struct;
+
+    #local stack for keeping track of basepairings
+    my @bps;
+
+    for(my $pos=1;$pos<=$lng;$pos++){
+		my $struct_pos=excise_struct($struct,$pos,$pos,"+");
+
+		if($struct_pos eq "("){
+			push(@bps,$pos);
+		}
+
+		if($struct_pos eq ")"){
+			my $pos_prev=pop(@bps);
+			$hash_bp{$pos_prev}=$pos;
+			$hash_bp{$pos}=$pos_prev;
+		}
+    }
+    return;
+}
+
+
+
+sub fill_star{
+
+    #fills specifics on the expected star strand into 'comp' hash ('component' hash)
+    
+    #if the mature sequence is not plausible, don't look for the star arm
+    my $mature_arm=$hash_comp{"mature_arm"};
+    unless($mature_arm){$hash_comp{"star_arm"}=0; return;}
+ 
+    #if the star sequence is not plausible, don't fill into the hash
+    my($star_beg,$star_end)=find_star();
+    my $star_arm=arm_star($star_beg,$star_end);
+    unless($star_arm){return;}
+
+    #excise expected star sequence and structure
+    my $star_seq=excise_seq($hash_comp{"pri_seq"},$star_beg,$star_end,"+");
+    my $star_struct=excise_seq($hash_comp{"pri_struct"},$star_beg,$star_end,"+");
+
+    #fill into hash
+    $hash_comp{"star_beg"}=$star_beg;
+    $hash_comp{"star_end"}=$star_end;
+    $hash_comp{"star_seq"}=$star_seq;
+    $hash_comp{"star_struct"}=$star_struct;
+    $hash_comp{"star_arm"}=$star_arm;
+
+    return;
+}
+
+
+sub find_star{
+
+    #uses the 'bp' hash to find the expected star begin and end positions from the mature positions
+
+    #the -2 is for the overhang
+    my $mature_beg=$hash_comp{"mature_beg"};
+    my $mature_end=$hash_comp{"mature_end"}-2;
+    my $mature_lng=$mature_end-$mature_beg+1;
+
+    #in some cases, the last nucleotide of the mature sequence does not form a base pair,
+    #and therefore does not basepair with the first nucleotide of the star sequence.
+    #In this case, the algorithm searches for the last nucleotide of the mature sequence
+    #to form a base pair. The offset is the number of nucleotides searched through.
+    my $offset_star_beg=0;
+    my $offset_beg=0;
+
+    #the offset should not be longer than the length of the mature sequence, then it
+    #means that the mature sequence does not form any base pairs
+    while(!$offset_star_beg and $offset_beg<$mature_lng){
+		if($hash_bp{$mature_end-$offset_beg}){
+			$offset_star_beg=$hash_bp{$mature_end-$offset_beg};
+		}else{
+			$offset_beg++;
+		}
+    }
+    #when defining the beginning of the star sequence, compensate for the offset
+    my $star_beg=$offset_star_beg-$offset_beg;
+
+    #same as above
+    my $offset_star_end=0;
+    my $offset_end=0;
+    while(!$offset_star_end and $offset_end<$mature_lng){
+		if($hash_bp{$mature_beg+$offset_end}){
+			$offset_star_end=$hash_bp{$mature_beg+$offset_end};
+		}else{
+			$offset_end++;
+		}
+    }
+    #the +2 is for the overhang
+    my $star_end=$offset_star_end+$offset_end+2;
+
+    return($star_beg,$star_end);
+}
+
+
+sub fill_pri{
+
+    #fills basic specifics on the precursor into the 'comp' hash
+    
+    my $seq=$hash_seq{$subject_old};
+    my $struct=$hash_struct{$subject_old};
+    my $mfe=$hash_mfe{$subject_old};
+    my $length=length $seq;
+    
+    $hash_comp{"pri_id"}=$subject_old;
+    $hash_comp{"pri_seq"}=$seq;
+    $hash_comp{"pri_struct"}=$struct;
+    $hash_comp{"pri_mfe"}=$mfe;
+    $hash_comp{"pri_beg"}=1;
+    $hash_comp{"pri_end"}=$length;
+    
+    return;
+}
+
+
+sub fill_mature{
+
+    #fills specifics on the mature sequence into the 'comp' hash
+
+    my $mature_query=find_mature_query();
+    my($mature_beg,$mature_end)=find_positions_query($mature_query);
+    my $mature_strand=find_strand_query($mature_query);
+    my $mature_seq=excise_seq($hash_comp{"pri_seq"},$mature_beg,$mature_end,$mature_strand);
+    my $mature_struct=excise_struct($hash_comp{"pri_struct"},$mature_beg,$mature_end,$mature_strand);
+    my $mature_arm=arm_mature($mature_beg,$mature_end,$mature_strand);
+
+    $hash_comp{"mature_query"}=$mature_query;
+    $hash_comp{"mature_beg"}=$mature_beg;
+    $hash_comp{"mature_end"}=$mature_end;
+    $hash_comp{"mature_strand"}=$mature_strand;
+    $hash_comp{"mature_struct"}=$mature_struct;
+    $hash_comp{"mature_seq"}=$mature_seq;
+    $hash_comp{"mature_arm"}=$mature_arm;
+
+    return;
+}
+
+
+
+sub fill_loop{
+
+    #fills specifics on the loop sequence into the 'comp' hash
+
+    #unless both mature and star sequences are plausible, do not look for the loop
+    unless($hash_comp{"mature_arm"} and $hash_comp{"star_arm"}){return;}
+
+    my $loop_beg;
+    my $loop_end;
+
+    #defining the begin and end positions of the loop from the mature and star positions
+    #excision depends on whether the mature or star sequence is 5' of the loop ('first')
+    if($hash_comp{"mature_arm"} eq "first"){
+	$loop_beg=$hash_comp{"mature_end"}+1;
+    }else{
+	$loop_end=$hash_comp{"mature_beg"}-1;
+    }
+    
+    if($hash_comp{"star_arm"} eq "first"){
+	$loop_beg=$hash_comp{"star_end"}+1;
+    }else{
+	$loop_end=$hash_comp{"star_beg"}-1;
+    }
+
+    #unless the positions are plausible, do not fill into hash
+    unless(test_loop($loop_beg,$loop_end)){return;}
+
+    my $loop_seq=excise_seq($hash_comp{"pri_seq"},$loop_beg,$loop_end,"+");
+    my $loop_struct=excise_struct($hash_comp{"pri_struct"},$loop_beg,$loop_end,"+");
+
+    $hash_comp{"loop_beg"}=$loop_beg;
+    $hash_comp{"loop_end"}=$loop_end;
+    $hash_comp{"loop_seq"}=$loop_seq;
+    $hash_comp{"loop_struct"}=$loop_struct;
+
+    return;
+}
+
+
+sub fill_lower_flanks{
+
+    #fills specifics on the lower flanks and unpaired strands into the 'comp' hash
+
+    #unless both mature and star sequences are plausible, do not look for the flanks
+    unless($hash_comp{"mature_arm"} and $hash_comp{"star_arm"}){return;}
+
+    my $flank_first_end;
+    my $flank_second_beg;
+
+    #defining the begin and end positions of the flanks from the mature and star positions
+    #excision depends on whether the mature or star sequence is 5' in the potenitial precursor ('first')
+    if($hash_comp{"mature_arm"} eq "first"){
+	$flank_first_end=$hash_comp{"mature_beg"}-1;
+    }else{
+	$flank_second_beg=$hash_comp{"mature_end"}+1;
+    }
+    
+    if($hash_comp{"star_arm"} eq "first"){
+	$flank_first_end=$hash_comp{"star_beg"}-1;
+    }else{
+	$flank_second_beg=$hash_comp{"star_end"}+1;
+    }   
+
+    #unless the positions are plausible, do not fill into hash
+    unless(test_flanks($flank_first_end,$flank_second_beg)){return;}
+
+    $hash_comp{"flank_first_end"}=$flank_first_end;
+    $hash_comp{"flank_second_beg"}=$flank_second_beg;
+    $hash_comp{"flank_first_seq"}=excise_seq($hash_comp{"pri_seq"},$hash_comp{"pri_beg"},$hash_comp{"flank_first_end"},"+");
+    $hash_comp{"flank_second_seq"}=excise_seq($hash_comp{"pri_seq"},$hash_comp{"flank_second_beg"},$hash_comp{"pri_end"},"+");
+    $hash_comp{"flank_first_struct"}=excise_struct($hash_comp{"pri_struct"},$hash_comp{"pri_beg"},$hash_comp{"flank_first_end"},"+");
+    $hash_comp{"flank_second_struct"}=excise_struct($hash_comp{"pri_struct"},$hash_comp{"flank_second_beg"},$hash_comp{"pri_end"},"+");
+
+    if($options{z}){
+	fill_stems_drosha();
+    }
+
+    return;
+}
+
+
+sub fill_stems_drosha{
+
+    #scores the number of base pairings formed by the first ten nt of the lower stems
+    #in general, the more stems, the higher the score contribution
+    #warning: this options has not been thoroughly tested
+
+    my $flank_first_struct=$hash_comp{"flank_first_struct"};
+    my $flank_second_struct=$hash_comp{"flank_second_struct"};
+    
+    my $stem_first=substr($flank_first_struct,-10);
+    my $stem_second=substr($flank_second_struct,0,10);
+    
+    my $stem_bp_first=0;
+    my $stem_bp_second=0;
+
+    #find base pairings by simple pattern matching
+    while($stem_first=~/\(/g){
+	$stem_bp_first++;
+    }
+    
+    while($stem_second=~/\)/g){
+	$stem_bp_second++;
+    }
+    
+    my $stem_bp=min2($stem_bp_first,$stem_bp_second);
+    
+    $hash_comp{"stem_first"}=$stem_first;
+    $hash_comp{"stem_second"}=$stem_second;
+    $hash_comp{"stem_bp_first"}=$stem_bp_first;
+    $hash_comp{"stem_bp_second"}=$stem_bp_second;
+    $hash_comp{"stem_bp"}=$stem_bp;
+    
+    return;
+}
+
+
+
+
+sub arm_mature{
+ 
+    #tests whether the mature sequence is in the 5' ('first') or 3' ('second') arm of the potential precursor
+
+    my ($beg,$end,$strand)=@_;
+ 
+    #mature and star sequences should alway be on plus strand
+    if($strand eq "-"){return 0;}
+
+    #there should be no bifurcations and minimum one base pairing
+    my $struct=excise_seq($hash_comp{"pri_struct"},$beg,$end,$strand);
+    if(defined($struct) and $struct=~/^(\(|\.)+$/ and $struct=~/\(/){
+	return "first";
+    }elsif(defined($struct) and $struct=~/^(\)|\.)+$/ and $struct=~/\)/){
+	return "second";
+    }
+    return 0;
+}
+
+
+sub arm_star{
+
+    #tests whether the star sequence is in the 5' ('first') or 3' ('second') arm of the potential precursor
+
+    my ($beg,$end)=@_;
+
+    #unless the begin and end positions are plausible, test negative
+    unless($beg>0 and $beg<=$hash_comp{"pri_end"} and $end>0 and $end<=$hash_comp{"pri_end"} and $beg<=$end){return 0;}
+
+    #no overlap between the mature and the star sequence
+    if($hash_comp{"mature_arm"} eq "first"){
+	($hash_comp{"mature_end"}<$beg) or return 0;
+    }elsif($hash_comp{"mature_arm"} eq "second"){
+	($end<$hash_comp{"mature_beg"}) or return 0;
+    }
+
+    #there should be no bifurcations and minimum one base pairing
+    my $struct=excise_seq($hash_comp{"pri_struct"},$beg,$end,"+");
+    if($struct=~/^(\(|\.)+$/ and $struct=~/\(/){
+	return "first";
+    }elsif($struct=~/^(\)|\.)+$/ and $struct=~/\)/){
+	return "second";
+    }
+    return 0;
+}
+
+
+sub test_loop{
+
+    #tests the loop positions
+
+    my ($beg,$end)=@_;
+
+    #unless the begin and end positions are plausible, test negative
+    unless($beg>0 and $beg<=$hash_comp{"pri_end"} and $end>0 and $end<=$hash_comp{"pri_end"} and $beg<=$end){return 0;}
+
+    return 1;
+}
+
+
+sub test_flanks{
+
+    #tests the positions of the lower flanks
+
+    my ($beg,$end)=@_;
+
+    #unless the begin and end positions are plausible, test negative
+    unless($beg>0 and $beg<=$hash_comp{"pri_end"} and $end>0 and $end<=$hash_comp{"pri_end"} and $beg<=$end){return 0;}
+
+    return 1;
+}
+
+
+sub comp{
+
+    #subroutine to retrive from the 'comp' hash
+
+    my $type=shift;
+    my $component=$hash_comp{$type};
+    return $component;
+}
+
+
+sub find_strand_query{
+
+    #subroutine to find the strand for a given query
+
+    my $query=shift;
+    my $strand=$hash_query{$query}{"strand"};
+    return $strand;
+}
+
+
+sub find_positions_query{
+
+    #subroutine to find the begin and end positions for a given query
+
+    my $query=shift;
+    my $beg=$hash_query{$query}{"subject_beg"};
+    my $end=$hash_query{$query}{"subject_end"};
+    return ($beg,$end);
+}
+
+
+
+sub find_mature_query{
+
+    #finds the query with the highest frequency of reads and returns it
+    #is used to determine the positions of the potential mature sequence
+
+    my @queries=sort {$hash_query{$b}{"freq"} <=> $hash_query{$a}{"freq"}} keys %hash_query;
+    my $mature_query=$queries[0];
+    return $mature_query;
+}
+
+
+
+
+sub reset_variables{
+
+    #resets the hashes for the next potential precursor
+
+#    %hash_query=();
+#    %hash_comp=();
+#    %hash_bp=();
+	foreach my $key (keys %hash_query) {delete($hash_query{$key});}
+	foreach my $key (keys %hash_comp) {delete($hash_comp{$key});}
+	foreach my $key (keys %hash_bp) {delete($hash_bp{$key});}
+
+#    $message_filter=();
+#    $message_score=();
+#    $lines=();
+	undef($message_filter);
+	undef($message_score);
+	undef($lines);
+    return;
+}
+
+
+
+sub excise_seq{
+
+    #excise sub sequence from the potential precursor
+
+    my($seq,$beg,$end,$strand)=@_;
+
+    #begin can be equal to end if only one nucleotide is excised
+    unless($beg<=$end){print STDERR "begin can not be smaller than end for $subject_old\n";exit;}
+
+    #rarely, permuted combinations of signature and structure cause out of bound excision errors.
+    #this happens once appr. every two thousand combinations
+    unless($beg<=length($seq)){$out_of_bound++;return 0;}
+
+    #if on the minus strand, the reverse complement should be excised
+    if($strand eq "-"){$seq=revcom($seq);}
+
+    #the blast parsed format is 1-indexed, substr is 0-indexed
+    my $sub_seq=substr($seq,$beg-1,$end-$beg+1);
+
+    return $sub_seq;
+
+}
+
+sub excise_struct{
+
+    #excise sub structure
+
+    my($struct,$beg,$end,$strand)=@_;
+    my $lng=length $struct;
+
+    #begin can be equal to end if only one nucleotide is excised
+    unless($beg<=$end){print STDERR "begin can not be smaller than end for $subject_old\n";exit;}
+
+    #rarely, permuted combinations of signature and structure cause out of bound excision errors.
+    #this happens once appr. every two thousand combinations
+    unless($beg<=length($struct)){return 0;}
+
+    #if excising relative to minus strand, positions are reversed
+    if($strand eq "-"){($beg,$end)=rev_pos($beg,$end,$lng);}
+
+    #the blast parsed format is 1-indexed, substr is 0-indexed
+    my $sub_struct=substr($struct,$beg-1,$end-$beg+1);
+ 
+    return $sub_struct;
+}
+
+
+sub create_hash_nuclei{
+    #parses a fasta file with sequences of known miRNAs considered for conservation purposes
+    #reads the nuclei into a hash
+
+    my ($file) = @_;
+    my ($id, $desc, $sequence, $nucleus) = ();
+
+    open (FASTA, "<$file") or die "can not open $file\n";
+    while (<FASTA>)
+    {
+        chomp;
+        if (/^>(\S+)(.*)/)
+	{
+	    $id       = $1;
+	    $desc     = $2;
+	    $sequence = "";
+	    $nucleus  = "";
+	    while (<FASTA>){
+                chomp;
+                if (/^>(\S+)(.*)/){
+		    $nucleus                = substr($sequence,1,$nucleus_lng);
+		    $nucleus                =~ tr/[T]/[U]/;
+		    $hash_mirs{$nucleus}   .="$id\t";
+		    $hash_nuclei{$nucleus} += 1;
+
+		    $id               = $1;
+		    $desc             = $2;
+		    $sequence         = "";
+		    $nucleus          = "";
+		    next;
+                }
+		$sequence .= $_;
+            }
+        }
+    }
+    $nucleus                = substr($sequence,1,$nucleus_lng);
+    $nucleus                =~ tr/[T]/[U]/;
+    $hash_mirs{$nucleus}   .="$id\t";
+    $hash_nuclei{$nucleus} += 1;
+    close FASTA;
+}
+    
+
+sub parse_file_struct{
+	#parses the output from RNAfoldand reads it into hashes
+	my($file) = @_;
+	my($id,$desc,$seq,$struct,$mfe) = ();
+	open (FILE_STRUCT, "<$file") or die "can not open $file\n";
+	while (<FILE_STRUCT>){
+		chomp;
+		if (/^>(\S+)\s*(.*)/){
+			$id= $1;
+			$desc= $2;
+			$seq= "";
+			$struct= "";
+			$mfe= "";
+			while (<FILE_STRUCT>){
+				chomp;
+				if (/^>(\S+)\s*(.*)/){
+					$hash_desc{$id}   = $desc;
+					$hash_seq{$id}    = $seq;
+					$hash_struct{$id} = $struct;
+					$hash_mfe{$id}    = $mfe;
+					$id          = $1;
+					$desc        = $2;
+					$seq         = "";
+					$struct      = "";
+					$mfe         = "";
+					next;
+				}
+				if(/^\w/){
+					tr/uU/tT/;
+					$seq .= $_;
+					next;
+				}
+				if(/((\.|\(|\))+)/){$struct .=$1;}
+				if(/\((\s*-\d+\.\d+)\)/){$mfe = $1;}
+			}
+        }
+    }
+    $hash_desc{$id}        = $desc;
+    $hash_seq{$id}         = $seq;
+    $hash_struct{$id}      = $struct;
+    $hash_mfe{$id}         = $mfe;
+    close FILE_STRUCT;
+    return;
+}
+
+
+sub score_s{
+
+    #this score message is appended to the end of the string of score messages outputted for the potential precursor
+
+    my $message=shift;
+    $message_score.=$message."\n";;
+    return;
+}
+
+
+
+sub score_p{
+
+   #this score message is appended to the beginning of the string of score messages outputted for the potential precursor
+
+    my $message=shift;
+    $message_score=$message."\n".$message_score;
+    return;
+}
+
+
+
+sub filter_s{
+
+    #this filtering message is appended to the end of the string of filtering messages outputted for the potential precursor
+
+    my $message=shift;
+    $message_filter.=$message."\n";
+    return;
+}
+
+
+sub filter_p{
+
+    #this filtering message is appended to the beginning of the string of filtering messages outputted for the potential precursor
+
+    my $message=shift;
+    if(defined $message_filter){$message_filter=$message."\n".$message_filter;}
+    else{$message_filter=$message."\n";}
+    return;
+}
+
+    
+sub find_freq{
+
+    #finds the frequency of a given read query from its id.
+
+    my($query)=@_;
+
+    if($query=~/x(\d+)/i){
+	my $freq=$1;
+	return $freq;
+    }else{
+	#print STDERR "Problem with read format\n";
+	return 0;
+    }
+}
+
+
+sub print_hash_comp{
+
+    #prints the 'comp' hash
+
+    my @keys=sort keys %hash_comp;
+    foreach my $key(@keys){
+	my $value=$hash_comp{$key};
+	print "$key  \t$value\n";
+    }
+}
+
+
+
+sub print_hash_bp{
+
+    #prints the 'bp' hash
+
+    my @keys=sort {$a<=>$b} keys %hash_bp;
+    foreach my $key(@keys){
+	my $value=$hash_bp{$key};
+	print "$key\t$value\n";
+    }
+    print "\n";
+}
+    
+
+
+sub find_strand{
+
+    #A subroutine to find the strand, parsing different blast formats
+
+    my($other)=@_;
+
+    my $strand="+";
+
+    if($other=~/-/){
+	$strand="-";
+    }
+
+    if($other=~/minus/i){
+	$strand="-";
+    }
+    return($strand);
+}
+
+
+sub contained{
+
+    #Is the stretch defined by the first positions contained in the stretch defined by the second?
+
+    my($beg1,$end1,$beg2,$end2)=@_;
+
+    testbeginend($beg1,$end1,$beg2,$end2);
+
+    if($beg2<=$beg1 and $end1<=$end2){
+	return 1;
+    }else{
+	return 0;
+    }
+}
+
+
+sub testbeginend{
+
+    #Are the beginposition numerically smaller than the endposition for each pair?
+
+    my($begin1,$end1,$begin2,$end2)=@_;
+
+    unless($begin1<=$end1 and $begin2<=$end2){
+	print STDERR "beg can not be larger than end for $subject_old\n";
+	exit;
+    }
+}
+
+
+sub rev_pos{
+
+#   The blast_parsed format always uses positions that are relative to the 5' of the given strand
+#   This means that for a sequence of length n, the first nucleotide on the minus strand base pairs with
+#   the n't nucleotide on the plus strand
+
+#   This subroutine reverses the begin and end positions of positions of the minus strand so that they
+#   are relative to the 5' end of the plus strand	
+   
+    my($beg,$end,$lng)=@_;
+    
+    my $new_end=$lng-$beg+1;
+    my $new_beg=$lng-$end+1;
+    
+    return($new_beg,$new_end);
+}
+
+sub round {
+
+    #rounds to nearest integer
+   
+    my($number) = shift;
+    return int($number + .5);
+    
+}
+
+
+sub rev{
+
+    #reverses the order of nucleotides in a sequence
+
+    my($sequence)=@_;
+
+    my $rev=reverse $sequence;   
+
+    return $rev;
+}
+
+sub com{
+
+    #the complementary of a sequence
+
+    my($sequence)=@_;
+
+    $sequence=~tr/acgtuACGTU/TGCAATGCAA/;   
+ 
+    return $sequence;
+}
+
+sub revcom{
+    
+    #reverse complement
+
+    my($sequence)=@_;
+
+    my $revcom=rev(com($sequence));
+
+    return $revcom;
+}
+
+
+sub max2 {
+
+    #max of two numbers
+    
+    my($a, $b) = @_;
+    return ($a>$b ? $a : $b);
+}
+
+sub min2  {
+
+    #min of two numbers
+
+    my($a, $b) = @_;
+    return ($a<$b ? $a : $b);
+}
+
+
+
+sub score_freq{
+
+#   scores the count of reads that map to the potential precursor
+#   Assumes geometric distribution as described in methods section of manuscript
+
+    my $freq=shift;
+
+    #parameters of known precursors and background hairpins
+    my $parameter_test=0.999;
+    my $parameter_control=0.6;
+
+    #log_odds calculated directly to avoid underflow
+    my $intercept=log((1-$parameter_test)/(1-$parameter_control));
+    my $slope=log($parameter_test/$parameter_control);
+    my $log_odds=$slope*$freq+$intercept;
+
+    #if no strong evidence for 3' overhangs, limit the score contribution to 0
+    unless($options{x} or $hash_comp{"star_read"}){$log_odds=min2($log_odds,0);}
+   
+    return $log_odds;
+}
+
+
+
+##sub score_mfe{
+
+#   scores the minimum free energy in kCal/mol of the potential precursor
+#   Assumes Gumbel distribution as described in methods section of manuscript
+
+##    my $mfe=shift;
+
+    #numerical value, minimum 1
+##    my $mfe_adj=max2(1,-$mfe);
+
+    #parameters of known precursors and background hairpins, scale and location
+##    my $prob_test=prob_gumbel_discretized($mfe_adj,5.5,32);
+##    my $prob_background=prob_gumbel_discretized($mfe_adj,4.8,23);
+
+##    my $odds=$prob_test/$prob_background;
+##    my $log_odds=log($odds);
+
+##    return $log_odds;
+##}
+
+sub score_mfe{
+#	use bignum;
+
+#   scores the minimum free energy in kCal/mol of the potential precursor
+#   Assumes Gumbel distribution as described in methods section of manuscript
+
+    my ($mfe,$mlng)=@_;
+
+    #numerical value, minimum 1
+    my $mfe_adj=max2(1,-$mfe);
+my $mfe_adj1=$mfe/$mlng;
+    #parameters of known precursors and background hairpins, scale and location
+	my $a=1.339e-12;my $b=2.778e-13;my $c=45.834;
+	my $ev=$e**($mfe_adj1*$c);
+	#print STDERR "\n***",$ev,"**\t",$ev+$b,"\t";
+	my $log_odds=($a/($b+$ev));
+
+
+	my $prob_test=prob_gumbel_discretized($mfe_adj,5.5,32);
+    my $prob_background=prob_gumbel_discretized($mfe_adj,4.8,23);
+
+    my $odds=$prob_test/$prob_background;
+    my $log_odds_2=log($odds);
+	#print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n";
+   return $log_odds;
+}
+
+
+
+sub prob_gumbel_discretized{
+
+#   discretized Gumbel distribution, probabilities within windows of 1 kCal/mol
+#   uses the subroutine that calculates the cdf to find the probabilities
+
+    my ($var,$scale,$location)=@_;
+
+    my $bound_lower=$var-0.5;
+    my $bound_upper=$var+0.5;
+
+    my $cdf_lower=cdf_gumbel($bound_lower,$scale,$location);
+    my $cdf_upper=cdf_gumbel($bound_upper,$scale,$location);
+
+    my $prob=$cdf_upper-$cdf_lower;
+
+    return $prob;
+}
+
+
+sub cdf_gumbel{
+
+#   calculates the cumulative distribution function of the Gumbel distribution
+
+    my ($var,$scale,$location)=@_;
+
+    my $cdf=$e**(-($e**(-($var-$location)/$scale)));
+
+    return $cdf;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/miRNA_Express_and_sequence.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,173 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-6-4
+#Modified:
+#Description: solexa miRNA express and sequence
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h");
+if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'list'};
+my $out=$opts{'fa'};
+my $preout=$opts{'pre'};
+
+=cut
+my %hash_pri;
+open PRI,"<$opts{p}";
+while (my $aline=<PRI>) {
+	chomp $aline;
+	if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;}
+}
+close PRI;
+=cut
+
+open IN,"<$filein"; #input file  
+open OUT,">$fileout"; #output file  
+open FA ,">$out";
+open PRE,">$preout";
+
+print OUT "#ID\tcoordinate\tpos1\tpos2";
+my @marks=split/\,/,$opts{'tag'};
+foreach  (@marks) {
+	print OUT "\t",$_,"_matureExp";
+}
+foreach  (@marks) {
+	print OUT "\t",$_,"_starExp";
+}
+foreach  (@marks) {
+	print OUT "\t",$_,"_totalExp";
+}
+
+print OUT "\n";
+
+my (%uniq_id,$novel);
+while (my $aline=<IN>) {
+	chomp $aline;
+	until ($aline =~ /^score\s+[-\d\.]+/){
+		$aline = <IN>;
+		if (eof) {last;}
+	}
+	if (eof) {last;}
+########## miRNA ID ################
+	$novel++;
+########### annotate####################
+	do {$aline=<IN>;} until($aline=~/flank_first_end/) ;
+	chomp $aline;
+	my @flank1=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/flank_second_beg/) ;
+	chomp $aline;
+	my @flank2=split/\t/,$aline;
+#		
+########## mature start loop pre ####
+	do {$aline=<IN>;} until($aline=~/mature_beg/) ;
+	chomp $aline;
+	my @start=split/\t/,$aline;
+#	$start[1] -=$flank1[1];
+	do {$aline=<IN>;} until($aline=~/mature_end/) ;
+	chomp $aline;
+	my @end=split/\t/,$aline;
+#	$end[1] -=$flank1[1];
+	do {$aline=<IN>;} until($aline=~/mature_seq/) ;
+	chomp $aline;
+	my @arr1=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/pre_seq/) ;
+	chomp $aline;
+	my @arr2=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/pri_id/) ;
+	chomp $aline;
+	my @pri_id=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/pri_seq/) ;
+	chomp $aline;
+	my @pri_seq=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/star_beg/) ;
+	chomp $aline;
+	my @star_start=split/\t/,$aline;
+#	$star_start[1] -=$flank1[1];
+	do {$aline=<IN>;} until($aline=~/star_end/) ;
+	chomp $aline;
+	my @star_end=split/\t/,$aline;
+#	$star_end[1] -=$flank1[1];
+	do {$aline=<IN>;} until($aline=~/star_seq/) ;
+	chomp $aline;
+	my @arr3=split/\t/,$aline;
+	print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t";
+	#print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t";
+	print FA ">miR-c-$novel\n$arr1[1]\n";
+	print PRE ">miR-c-$novel\n$pri_seq[1]\n";
+########## reads count #############
+	<IN>;
+	my @count1;my @count2;my @count3;my @count4;
+	$aline=<IN>;
+	do {
+		chomp $aline;
+		my @reads=split/\t/,$aline;
+		my @pos=();
+		$reads[5]=~/(\d+)\.\.(\d+)/;
+#		$pos[0] =$1-$flank1[1];
+#		$pos[1] =$2-$flank1[1];
+		$pos[0]=$1;
+		$pos[1]=$2;
+		$reads[0]=~/:([\d|_]+)_x(\d+)$/;
+		my @ss=split/_/,$1;
+		for (my $i=0;$i<@ss ;$i++) {
+			if (!(defined $count3[$i])) {
+				$count3[$i]=0;
+			}
+			if (!(defined $count4[$i])) {
+				$count4[$i]=0;
+			}
+			$count2[$i]+=$ss[$i];
+
+		}
+#		$count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 );
+#		$count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 );
+#		$count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1);
+#		$count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1);
+		if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 )
+		{
+			for (my $i=0;$i<@ss;$i++) {
+				$count3[$i]+=$ss[$i];
+			}
+		}
+		if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){
+			for (my $i=0;$i<@ss;$i++) {
+				$count4[$i]+=$ss[$i];
+			}
+		} 
+		$aline=<IN>;
+		chomp $aline;
+	} until(length $aline < 1) ;
+	$"="\t";
+	print OUT "@count3\t@count4\t@count2\n";
+	$"=" ";
+}
+
+close IN;
+close OUT;
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -list -fa -pre -tag
+options:
+-i input file,predictions file
+-list output file miRNA list file
+-fa output file ,miRNA sequence fasta file.
+-pre output file, miRNA precursor fasta file.
+-tag string, sample names# eg: samA,samB,samC
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/microRNA.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,253 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-4-22
+#Modified:
+#Description: plant microRNA prediction
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use threads;
+#use threads::shared;
+use File::Path;
+use File::Basename;
+#use RNA;
+#use Term::ANSIColor;
+
+my %opts;
+GetOptions(\%opts,"i=s","fa=s","gfa=s","pre:s","mat:s","dis:i","flank:i","mfe:f","idx:s","mis:i","r:i","e:i","f:i","t:i","o:s","path:s","D","h");
+if (!(defined $opts{i} and defined $opts{gfa}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $time=&Time();
+print "miPlant program start:\n   The time is $time!\n";
+print "Command line:\n   $0 @ARGV\n";
+
+my  $mypath=`pwd`;
+chomp $mypath;
+
+my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/miRNA_out/";
+
+
+unless ($dir=~/\/$/) {$dir.="/";}
+if (not -d $dir) {
+	mkdir $dir;
+}
+my $config=$opts{'i'};
+my $data=$opts{'fa'};
+
+my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/";
+
+my $t=1; #threads number
+if (defined $opts{'t'}) {$t=$opts{'t'};}
+
+my $mis=0; #mismatch number for microRNA
+if (defined $opts{'mis'}) {$mis=$opts{'mis'};}
+
+my $hit=25; # maximum reads mapping hits in genome
+if (defined $opts{'r'}) {$hit=$opts{'r'};}
+
+my $upstream = 2; # microRNA 5' extension
+$upstream = $opts{'e'} if(defined $opts{'e'});
+
+my $downstream = 5;# microRNA 3' extension
+$downstream = $opts{'f'} if(defined $opts{'f'});
+
+my $maxd=defined $opts{'dis'} ? $opts{'dis'} : 200;
+my $flank=defined $opts{'flank'} ? $opts{'flank'} :10;
+my $mfe=defined $opts{'mfe'} ? $opts{'mfe'} : -20;
+
+$time=&Time();
+print "$time, Checking input file!\n";
+
+my (@filein,@mark);
+&read_config();
+
+&checkfa($opts{pre}) if(defined $opts{pre});
+&checkfa($opts{mat}) if(defined $opts{mat});
+&checkfa($opts{gfa});
+
+chdir $dir;
+my $data2=$data;
+my $known_result=$dir."known_miRNA_Express";
+if(defined $opts{pre} and defined $opts{mat}){
+	&quantify(); ### known microRAN quantify
+	$data2=$known_result."/mirbase_not_mapped.fa";
+}
+
+my $genome_map=$dir."genome_match";
+&genome($data2);
+
+#my $genome_map=&search($dir,"genome_match_");
+my $mapfile=$genome_map."/genome_mapped.bwt";
+my $mapfa=$genome_map."/genome_mapped.fa";
+my $unmap=$genome_map."/genome_not_mapped.fa";
+
+#$time=Time();
+#print "$time: Novel microRNA prediction!\n\n";
+
+&predict($mapfa);
+
+$time=Time();
+print "$time: Program end!!\n";
+
+############################## sub programs ###################################
+sub predict{
+	my ($file)=@_;
+	$time=&Time();
+	print "$time: Novel microRNA prediction!\n\n";
+	my $predict=$dir."Novel_miRNA_predict";
+	mkdir $predict;
+	chdir $predict;
+	system("perl $scipt_path/precursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe");
+#	print "\nprecursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe\n";
+	
+	system("bowtie-build -f excised_precursor.fa excised_precursor");
+#	print "\nbowtie-build -f excised_precursor.fa excised_precursor\n";
+	
+	system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt 2> run.log");
+#	print "\nbowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt\n";
+	
+	system("perl $scipt_path/convert_bowtie_to_blast.pl  precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst");
+#	print "\nconvert_bowtie_to_blast.pl  precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst\n";
+
+	system("sort -k 4  precursor_mapped.bst  > signatures.bst");
+#	print "\nsort +3 -25  precursor_mapped.bst  > ../signatures.bst\n";
+
+	chdir $dir;
+	system("perl $scipt_path/miRDeep_plant.pl $predict/signatures.bst $predict/excised_precursor_struc.txt novel_tmp_dir -y > microRNA_prediction.mrd");
+#	print "\nmiRDeep_plant.pl $dir/signatures.bst $predict/excised_precursor_struc.txt tmp_dir -y > microRNA_prediction.txt\n";
+	#system("rm novel_tmp_dir -rf");
+	my $tag=join "," ,@mark;
+	system("perl $scipt_path/miRNA_Express_and_sequence.pl -i microRNA_prediction.mrd -list novel_microRNA_express.txt -fa novel_microRNA_mature.fa -pre novel_microRNA_precursor.fa -tag $tag");
+
+	system("perl $scipt_path/non_miRNA_reads.pl -i microRNA_prediction.mrd -fa $file -o non_microRNA_sequence.fa");
+
+}
+
+sub genome{
+	my ($file)=@_;
+	if(defined $opts{'idx'}){
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} ") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n";
+	}else{
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir ") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n";
+	}
+}
+
+sub quantify{
+	my $tag=join "\\;" ,@mark;
+	system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -mis $mis -t $t -e $upstream -f $downstream -tag $tag");
+	print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n";
+}
+
+sub read_config{
+	open CON,"<$config";
+	while (my $aline=<CON>) {
+		chomp $aline;
+		my @tmp=split/\t/,$aline;
+		push @filein,$tmp[0];
+		push @mark,$tmp[1];
+		#&check_rawdata($tmp[0]);
+	}
+	close CON;
+	if (@filein != @mark) {
+		#&printErr();
+		die "Maybe config file have some wrong!!!\n";
+	}
+}
+sub checkfa{
+	my ($file_reads)=@_;
+	open N,"<$file_reads";
+	my $line=<N>;
+	chomp $line;
+    if($line !~ /^>\S+/){
+        #printErr();
+        die "The first line of file $file_reads does not start with '>identifier'
+Reads file $file_reads is not a valid fasta file\n\n";
+    }
+    if(<N> !~ /^[ACGTNacgtn]*$/){
+        #printErr();
+        die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+    }
+	close N;
+}
+sub search{
+	my ($dir,$str)=@_;
+	opendir I,$dir;
+	my @ret;
+	while (my $file=readdir I) {
+		if ($file=~/$str/) {
+			push @ret, $file;
+		}
+	}
+	closedir I;
+	if (@ret != 1) {
+		#&printErr();
+
+		die "Can not find directory or file which name has string: $str !!!\n";
+	}
+	return $ret[0];
+}
+
+
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day $hour:$min:$sec");
+}
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+
+$0 -i -fa -gfa -idx -pre -mat -mis -e -f  -t  -o  -path
+options:
+-i input files, # config
+
+-fa ,#fasta sequence file
+
+-path scirpt path
+
+-gfa string,  input file # genome fasta. sequence file
+-idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-pre string,  input file #species specific microRNA precursor sequences
+-mat string,  input file #species specific microRNA mature sequences
+
+-mis [int]     number of allowed mismatches when mapping reads to precursors, default 0
+-e [int]     number of nucleotides upstream of the mature sequence to consider, default 2
+-f [int]     number of nucleotides downstream of the mature sequence to consider, default 5
+-r int       a read is allowed to map up to this number of positions in the genome,default is 25 
+
+-dis <int>   Maximal space between miRNA and miRNA* (200)
+-flank <int>   Flank sequence length of miRNA precursor (10)
+-mfe <folat> Maximal free energy allowed for a miRNA precursor (-20)
+
+-t int,    number of threads [1]
+
+-o output directory# absolute path
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/microRNA.xml	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,113 @@
+<tool id="micrornas_v1" name="microRNA" veision="1.0.0">
+  <description>Program for plant microRNA analysis(miRNA only) </description>
+
+  <requirements>
+	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
+    <requirement type="package" version="0.12.7">bowtie</requirement>
+    <requirement type="set_environment">SCRIPT_PATH</requirement>
+    <!--requirement type="package" version="3.0.1">R</requirement!-->
+	<requirement type="package" version="2.59">SVG</requirement>
+	<requirement type="package" version="2.1.8">ViennaRNA</requirement>
+  </requirements>
+
+  <!--command interpreter="perl">miPlant.pl -i $input -format $format -gfa $gfa -idx $index -pre $pre -mat $mat -rfam $rfam -idx2 $idx2 -D $D -a $a -M $M -min $min -max $max -mis $mis -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe -t $t -o $output</command-->
+
+  <command interpreter="perl">microRNA.pl 
+   ## Change this to accommodate the number of threads you have available.
+        -t \${GALAXY_SLOTS:-4}
+	-path \$SCRIPT_PATH
+
+
+   ## Do or not annotate known microRNAs
+    #if $mirbase.known_microRNA == "yes":
+	-pre $mirbase.pre -mat $mirbase.mat 
+	#end if
+
+        ## prepare bowtie index
+        #set index_path = ''
+        #if str($reference_genome.source) == "history":
+           ### bowtie-build "$reference_genome.own_file" genome; ln -s "$reference_genome.own_file" genome.fa;
+            #set index_path = $reference_genome.own_file
+			-gfa $index_path
+        #else:
+            #set index_path = $reference_genome.index.fields.path
+			-gfa ${index_path}.fa -idx $index_path
+		#end if
+
+
+     -mis $mismatch  -i $config -fa $reads -e $e -f $f -r $r -dis $dis -flank $flank -mfe $mfe  > run.log
+  </command>
+
+  <inputs>
+	<param name="config" type="data" label="Raw data configs file" />
+	<param name="reads" type="data" label="Input Fasta. file of candidate microRNA sequence" />
+
+	        <!-- reference genome -->
+        <conditional name="reference_genome">
+          <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+              <options from_data_table="bowtie_indexes">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+          </when>
+        </conditional>
+
+	<conditional name="mirbase">
+		<param name="known_microRNA" type="select" label="Analysis known microRNAs(eg. from mirbase)">
+		  <option value="yes" selected="true">yes</option>
+		  <option value="no">no</option>
+		 </param>
+		 <when value="yes">
+			<param name="mat" type="data" label="mature microRNA sequence file" />
+			<param name="pre" type="data" label="precursor microRNA sequence fie" />
+		 </when>
+    </conditional> <!-- params -->
+
+
+
+	<param name="mismatch" type="integer" value="0" label="number of allowed mismatches when mapping reads to genome" />
+	<param name="e" type="integer" value="2" label="number of nucleotides upstream of the mature sequence to consider" />
+	<param name="f" type="integer" value="5" label="number of nucleotides downstream of the mature sequence to consider" />
+	<param name="r" type="integer" value="25" label="a read is allowed to map up to this number of positions in the genome" />
+	<param name="dis" type="integer" value="200" label="Maximal space between miRNA and miRNA*" />
+	<param name="flank" type="integer" value="10" label="Flank sequence length of miRNA precursor" />
+	<param name="mfe" type="float" value="-30" label="Maximal free energy allowed for a miRNA precursor" />
+
+  </inputs>
+
+  <outputs>
+   <data format="txt" name="known microRNA express list" from_work_dir="miRNA_out/known_microRNA_express.txt" label="${tool.name} on ${on_string}: known microRNA express list">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="known microRNA express alignment" from_work_dir="miRNA_out/known_microRNA_express.aln" label="${tool.name} on ${on_string}: known microRNA express alignment">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="known microRNA moRs result" from_work_dir="miRNA_out/known_microRNA_express.moRs" label="${tool.name} on ${on_string}: known microRNA moRs result">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="fasta" name="known microRNA precursor file" from_work_dir="miRNA_out/known_microRNA_precursor.fa" label="${tool.name} on ${on_string}: known microRNA precursor file">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="fasta" name="known microRNA mature file" from_work_dir="miRNA_out/known_microRNA_mature.fa" label="${tool.name} on ${on_string}: known microRNA mature file">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="novel microRNA express list" from_work_dir="miRNA_out/novel_microRNA_express.txt" label="${tool.name} on ${on_string}: novel microRNA express list"/>
+   <data format="fasta" name="novel microRNA precursor file" from_work_dir="miRNA_out/novel_microRNA_precursor.fa" label="${tool.name} on ${on_string}: novel microRNA precursor file"/>
+   <data format="fasta" name="novel microRNA mature sequence file" from_work_dir="miRNA_out/novel_microRNA_mature.fa" label="${tool.name} on ${on_string}: novel microRNA mature sequence file"/>
+   <data format="fasta" name="non-microRNA sequence FASTA file" from_work_dir="miRNA_out/non_microRNA_sequence.fa" label="${tool.name} on ${on_string}: Sequence FASTA file of non-microRNA tags"/>
+
+  </outputs>
+
+ <help>
+
+ </help>
+ </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/microRNA_pipeline.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,508 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-4-22
+#Modified:
+#Description: plant microRNA prediction
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use threads;
+#use threads::shared;
+use File::Path;
+use File::Basename;
+#use RNA;
+#use Term::ANSIColor;
+
+my %opts;
+GetOptions(\%opts,"i:s@","tag:s@","phred:i","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h");
+if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $time=&Time();
+print "miPlant program start:\n   The time is $time!\n";
+print "Command line:\n   $0 @ARGV\n";
+
+my $format=$opts{'format'};
+if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { 
+	#&printErr();
+	die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
+}
+
+my $phred_qv=64;
+if (defined $opts{'phred'}) {
+	$phred_qv=$opts{'phred'};
+}
+
+my @inputfiles=@{$opts{'i'}};
+my @inputtags=@{$opts{'tag'}};
+
+my  $mypath=`pwd`;
+chomp $mypath;
+
+my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/miRPlant_out/";
+
+
+unless ($dir=~/\/$/) {$dir.="/";}
+if (not -d $dir) {
+	mkdir $dir;
+}
+my $config=$dir."/input_config";
+open CONFIG,">$config";
+	for (my $i=0;$i<@inputfiles;$i++) {
+		print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n";
+	}
+close CONFIG;
+
+my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/";
+
+my $a="ATCTCGTATG";  #adapter
+if (defined $opts{'a'}) {$a=$opts{'a'};}
+
+my $m=6;  #adapter minimum mapped nt
+if (defined $opts{'M'}) {$m=$opts{'M'};}
+
+my $t=1; #threads number
+if (defined $opts{'t'}) {$t=$opts{'t'};}
+
+my $min_nt=19; # minimum reads length
+if (defined $opts{'min'}) {$min_nt=$opts{'min'};}
+
+my $max_nt=28; #maximum reads length
+if (defined $opts{'max'}) {$max_nt=$opts{'max'};}
+
+my $mis=0; #mismatch number for microRNA
+if (defined $opts{'mis'}) {$mis=$opts{'mis'};}
+
+my $mis_rfam=0;# mismatch number for rfam
+if (defined $opts{'v'}) {$mis_rfam=$opts{'v'};}
+
+my $hit=25; # maximum reads mapping hits in genome
+if (defined $opts{'r'}) {$hit=$opts{'r'};}
+
+my $upstream = 2; # microRNA 5' extension
+$upstream = $opts{'e'} if(defined $opts{'e'});
+
+my $downstream = 5;# microRNA 3' extension
+$downstream = $opts{'f'} if(defined $opts{'f'});
+
+my $maxd=defined $opts{'dis'} ? $opts{'dis'} : 200;
+my $flank=defined $opts{'flank'} ? $opts{'flank'} :10;
+my $mfe=defined $opts{'mfe'} ? $opts{'mfe'} : -20;
+
+$time=&Time();
+print "$time, Checking input file!\n";
+
+my (@filein,@mark,@clean);
+#&read_config();
+@filein=@inputfiles;
+@mark=@inputtags;
+
+&checkfa($opts{pre});
+&checkfa($opts{mat});
+&checkfa($opts{gfa});
+
+
+##### clip adpter --> clean data  start
+$time=&Time();
+print "$time, Preprocess:\n   trim adapter, reads collapse and filter reads by length.\n";
+
+$time=~s/:/-/g;
+$time=~s/ /-/g;
+my $preprocess=$dir."preProcess/";
+mkdir $preprocess;
+my $can_use_threads = eval 'use threads; 1';
+if ($can_use_threads) {
+# Do processing using threads
+	print "Do processing using threads\n";
+	my @filein1=@filein; my @mark1=@mark;
+	while (@filein1>0) {
+		my @thrs; my @res;
+		for (my $i=0;$i<$t ;$i++) {
+			last if(@filein1==0);
+			my $in=shift @filein1;
+			my $out=shift @mark1;
+			push @clean,$preprocess.$out."_clips_adapter.fq";
+			$thrs[$i]=threads->create(\&clips,$in,$out);
+		}
+		for (my $i=0;$i<@thrs;$i++) {
+			$res[$i]=$thrs[$i]->join();
+		}
+	}
+} else {
+# Do not processing using threads
+	print "Do not processing using threads\n";
+	for (my $i=0;$i<@filein ;$i++) {
+		my $in=$filein[$i];
+		my $out=$mark[$i];
+		push @clean,$preprocess.$out."_clips_adapter.fq";
+		&clips($in,$out);
+	}
+}
+
+##### clip adpter --> clean data  end
+
+my $collapsed=$preprocess."collapse_reads.fa";
+my $data=$preprocess."collapse_reads_${min_nt}_${max_nt}.fa"; ## raw clean data
+my $data2;   ### mirbase not mapped reads
+my $data3;   ### rfam not mapped reads
+&collapse(\@clean,$collapsed);   #collapse reads to tags
+
+&filterbylength();  # filter <$min_nt  && >$max_nt
+
+print "The final clean data file is $data, only contains reads which length is among $min_nt\~$max_nt\n\n";
+
+$time=Time();
+print "$time: known microRNA quantify!\n\n";
+
+chdir $dir;
+
+$time=~s/:/-/g;
+$time=~s/ /-/g;
+my $known_result=$dir."known_miRNA_Express/";
+&quantify(); ### known microRAN quantify
+
+
+#my $miR_exp_dir=&search($known_result,"miRNA_Express_");
+$data2=$known_result."/mirbase_not_mapped.fa";
+
+my $pathfile="$dir/path.txt";
+open PA,">$pathfile";
+print PA "$config\n";
+print PA "$preprocess\n";
+print PA "$known_result\n";
+
+if (defined $opts{'rfam'}) {              #rfam mapping and analysis
+	$time=Time();
+	print "$time: RNA annotate!\n\n";
+	$time=~s/:/-/g;
+	$time=~s/ /-/g;
+	my $rfam_exp_dir=$dir."rfam_match";
+	&rfam();
+	#my $rfam_exp_dir=&search($dir,"rfam_match_");
+	$data3=$rfam_exp_dir."/rfam_not_mapped.fa";
+print PA "$rfam_exp_dir\n";
+
+	my $tag=join "\\;" ,@mark;
+	system("perl $scipt_path/count_rfam_express.pl -i $rfam_exp_dir/rfam_mapped.bwt -tag $tag -o rfam_non-miRNA_annotation.txt");
+}
+
+my $data4=$data;
+if (defined $opts{'D'}) {                 #genome mapping 
+	$data4=$data3;
+}else{
+	$data4=$data2;
+}
+
+$time=Time();
+print "$time: Genome alignment!\n\n";
+$time=~s/:/-/g;
+$time=~s/ /-/g;
+my $genome_map=$dir."genome_match";
+&genome($data4);
+print PA "$genome_map\n";
+#my $genome_map=&search($dir,"genome_match_");
+my $mapfile=$genome_map."/genome_mapped.bwt";
+my $mapfa=$genome_map."/genome_mapped.fa";
+my $unmap=$genome_map."/genome_not_mapped.fa";
+
+#$time=Time();
+#print "$time: Novel microRNA prediction!\n\n";
+
+&predict($mapfa);
+
+close PA;
+system("perl $scipt_path/html_miRPlant.pl -i $pathfile -format $format -o $dir/result.html");
+
+$time=Time();
+print "$time: Program end!!\n";
+
+############################## sub programs ###################################
+sub predict{
+	my ($file)=@_;
+	$time=&Time();
+	print "$time: Novel microRNA prediction!\n\n";
+	$time=~s/:/-/g;
+	$time=~s/ /-/g;
+	my $predict=$dir."novel_miRNA_predict";
+print PA "$predict\n";
+	mkdir $predict;
+	chdir $predict;
+	system("perl $scipt_path/precursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe");
+#	print "\nprecursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe\n";
+	
+	system("bowtie-build -f excised_precursor.fa excised_precursor");
+#	print "\nbowtie-build -f excised_precursor.fa excised_precursor\n";
+	
+	system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt 2> run.log");
+#	print "\nbowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt\n";
+	
+	system("perl $scipt_path/convert_bowtie_to_blast.pl  precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst");
+#	print "\nconvert_bowtie_to_blast.pl  precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst\n";
+
+	system("sort -k 4  precursor_mapped.bst  > signatures.bst");
+#	print "\nsort +3 -25  precursor_mapped.bst  > ../signatures.bst\n";
+
+	chdir $dir;
+	system("perl $scipt_path/miRDeep_plant.pl $predict/signatures.bst $predict/excised_precursor_struc.txt novel_tmp_dir -y > microRNA_prediction.mrd");
+#	print "\nmiRDeep_plant.pl $dir/signatures.bst $predict/excised_precursor_struc.txt tmp_dir -y > microRNA_prediction.txt\n";
+	#system("rm novel_tmp_dir -rf");
+	my $tag=join "," ,@mark;
+	system("perl $scipt_path/miRNA_Express_and_sequence.pl -i microRNA_prediction.mrd -list novel_microRNA_express.txt -fa novel_microRNA_mature.fa -pre novel_microRNA_precursor.fa -tag $tag");
+}
+
+sub genome{
+	my ($file)=@_;
+	if(defined $opts{'idx'}){
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} ") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n";
+	}else{
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir ") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n";
+	}
+}
+sub rfam{
+	if (defined $opts{'idx2'}) {
+		system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} ");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time\n";
+	}else{
+		system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time\n";
+	}
+}
+sub quantify{
+	my $tag=join "\\;" ,@mark;
+	system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -mis $mis -t $t -e $upstream -f $downstream -tag $tag");
+	print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir  -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n";
+}
+sub filterbylength{
+	my $tmpmark=join ",", @mark;
+	system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
+	system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt  -o $preprocess/length.html");
+#	print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n";
+
+}
+sub collapse{
+	my ($ins,$data)=@_;
+		my $str="";
+	for (my $i=0;$i<@{$ins};$i++) {
+		$str .="-i $$ins[$i] ";
+	}
+	system ("perl $scipt_path/collapseReads2Tags.pl $str -mark seq -o $data -format $format");
+#	print "\ncollapseReads2Tags.pl $str -mark seq -o $data -format $format\n";
+}
+
+sub clips{
+	my ($in,$out)=@_;
+	my $adapter=$preprocess.$out."_clips_adapter.fq";
+	if($format eq "fq" || $format eq "fastq"){
+		system("fastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter") ;
+		print "\nfastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter\n";
+	}
+	if($format eq "fa" || $format eq "fasta"){
+		system("fastx_clipper -a $a -M $m -i $in -o $adapter") ;
+        #		print "\nfastx_clipper -a $a -M $m -i $in -o $adapter\n";
+	}
+	#my $clean=$preprocess.$out."_clean.fq";
+	#system("filterReadsByLength.pl -i $adapter -o $clean -min $min_nt -max $max_nt ");
+
+	return;
+}
+
+sub read_config{
+	open CON,"<$config";
+	while (my $aline=<CON>) {
+		chomp $aline;
+		my @tmp=split/\t/,$aline;
+		push @filein,$tmp[0];
+		push @mark,$tmp[1];
+		&check_rawdata($tmp[0]);
+	}
+	close CON;
+	if (@filein != @mark) {
+		#&printErr();
+		die "Maybe config file have some wrong!!!\n";
+	}
+}
+sub check_rawdata{
+	my ($fileforcheck)=@_;
+	if (!(-s $fileforcheck)) {
+		#&printErr();
+		die "Can not find $fileforcheck, or file is empty!!!\n";
+	}
+	if ($format eq "fasta" || $format eq "fa") {
+		&checkfa($fileforcheck);
+	}
+	if ($format eq "fastq" || $format eq "fq") {
+		&checkfq($fileforcheck);
+	}
+}
+sub checkfa{
+	my ($file_reads)=@_;
+	open N,"<$file_reads";
+	my $line=<N>;
+	chomp $line;
+    if($line !~ /^>\S+/){
+        #printErr();
+        die "The first line of file $file_reads does not start with '>identifier'
+Reads file $file_reads is not a valid fasta file\n\n";
+    }
+    if(<N> !~ /^[ACGTNacgtn]*$/){
+        #printErr();
+        die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+    }
+	close N;
+}
+sub checkfq{
+	my ($file_reads)=@_;
+
+	open N,"<$file_reads";
+	for (my $i=0;$i<10;$i++) {
+		my $a=<N>;
+		my $b=<N>;
+		my $c=<N>;
+		my $d=<N>;
+		chomp $a;
+		chomp $b;
+		chomp $c;
+		chomp $d;
+		if($a!~/^\@/){
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if($b!~ /^[ACGTNacgtn]*$/){
+			#&printErr();
+			die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+		}
+		if ($c!~/^\@/ && $c!~/^\+/) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if ((length $b) != (length $d)) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		my @qv=split //,$d;
+		for (my $j=0;$j<@qv ;$j++) {
+			my $q=ord($qv[$j])-64;
+			if($q<0){$phred_qv=33;}
+		}
+	}
+	close N;
+}
+
+sub search{
+	my ($dir,$str)=@_;
+	opendir I,$dir;
+	my @ret;
+	while (my $file=readdir I) {
+		if ($file=~/$str/) {
+			push @ret, $file;
+		}
+	}
+	closedir I;
+	if (@ret != 1) {
+		#&printErr();
+
+		die "Can not find directory or file which name has string: $str !!!\n";
+	}
+	return $ret[0];
+}
+
+=cut
+
+sub printErr{
+    print STDERR color 'bold red';
+    print STDERR "Error: ";
+    print STDERR color 'reset';
+}
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day-$hour-$min-$sec");
+}
+=cut
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day $hour:$min:$sec");
+}
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+
+$0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t  -o  -path
+options:
+-i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
+-tag string # raw data file names, -tag xxx -tag xxx
+
+-format string,#specific input rawdata file format : fastq|fq|fasta|fa
+
+-path scirpt path
+
+-gfa string,  input file # genome fasta. sequence file
+-idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-pre string,  input file #species specific microRNA precursor sequences
+-mat string,  input file #species specific microRNA mature sequences
+
+-rfam string,  input file# rfam database file, microRNAs must not be contained in this file## if not define, rfam small RNA will not be count.
+-idx2 string,  rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-D      If [-D] is specified,will discard rfam mapped reads(nead -rfam).
+
+-a string,  ADAPTER string. default is ATCTCGTATG.
+-M int,    require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't clip it. 
+-min int,  reads min length,default is 19.
+-max int,  reads max length,default is 28.
+
+-mis [int]     number of allowed mismatches when mapping reads to precursors, default 0
+-e [int]     number of nucleotides upstream of the mature sequence to consider, default 2
+-f [int]     number of nucleotides downstream of the mature sequence to consider, default 5
+-v <int>     report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment
+-r int       a read is allowed to map up to this number of positions in the genome,default is 25 
+
+-dis <int>   Maximal space between miRNA and miRNA* (200)
+-flank <int>   Flank sequence length of miRNA precursor (10)
+-mfe <folat> Maximal free energy allowed for a miRNA precursor (-20)
+
+-t int,    number of threads [1]
+
+-o output directory# absolute path
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/microRNA_pipeline.xml	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,192 @@
+<tool id="plant_microRNA_v1" name="microRNA_pipeline" veision="1.0.0">
+  <description>Program for plant microRNA analysis (rawdata preprocess -> genome mapping -> non-coding RNA(exclude miRNAs) annotation -> microRNA analysis)</description>
+
+  <requirements>
+	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
+    <requirement type="package" version="0.12.7">bowtie</requirement>
+    <requirement type="set_environment">SCRIPT_PATH</requirement>
+    <!--requirement type="package" version="3.0.1">R</requirement!-->
+	<requirement type="package" version="1.96">threads</requirement>
+	<requirement type="package" version="2.59">SVG</requirement>
+	<!--requirement type="package" version="0.228">parent</requirement-->
+	<requirement type="package" version="2.1.8">ViennaRNA</requirement>
+  </requirements>
+
+  <!--command interpreter="perl">miPlant.pl -i $input -format $format -gfa $gfa -idx $index -pre $pre -mat $mat -rfam $rfam -idx2 $idx2 -D $D -a $a -M $M -min $min -max $max -mis $mis -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe -t $t -o $output</command-->
+
+  <command interpreter="perl">microRNA_pipeline.pl 
+   ## Change this to accommodate the number of threads you have available.
+        -t \${GALAXY_SLOTS:-4}
+	-path \$SCRIPT_PATH
+
+    #for $j, $s in enumerate( $series )
+    ##rank_of_series=$j
+    -i ${s.input}
+    -tag ${s.tag}
+    #end for
+
+      ## prepare bowtie index
+      #set index_path = ''
+      #if str($reference_genome.source) == "history":
+          #####bowtie-build "$reference_genome.own_file" genome; ln -s "$reference_genome.own_file" genome.fa;
+          #set index_path = $reference_genome.own_file
+		  -gfa $index_path 
+	  #else:
+          #set index_path = $reference_genome.index.fields.path
+		  -gfa ${index_path}.fa -idx $index_path 
+      #end if
+
+
+   ## Do or not annotate rfam non-miRNA RNAs
+    #if $params.annotate_rfam == "yes":
+
+		  ## prepare Rfam bowtie index
+		  #set rfam_index_path = ''
+		  #if str($params.reference_rfam.source) == "history":
+			 ######## bowtie-build "$params.reference_rfam.own_file" rfam; ln -s "$params.reference_rfam.own_file" rfam.fa;
+			  #set rfam_index_path = $params.reference_rfam.own_file
+			 -rfam $rfam_index_path 
+		  #else:
+			#set rfam_index_path = $params.reference_rfam.index.fields.path
+			-rfam ${rfam_index_path}.fa -idx2 $rfam_index_path 
+		  #end if
+
+		-v $params.v 
+	   ## Do or not delete rfam mapped tags
+		#if $params.delete_rfam == "yes":
+		-D 
+		#end if
+	#end if
+
+
+   ## Do or not annotate known microRNAs
+    #if $mirbase.known_microRNA == "yes":
+	-pre $mirbase.pre -mat $mirbase.mat 
+	#end if
+
+
+    -format $format -phred $phred   -a $a -M $mapnt -min $min -max $max -mis $mismatch -e $e -f $f -r $r -dis $dis -flank $flank -mfe $mfe > run.log
+  </command>
+
+  <inputs>
+
+   <repeat name="series" title="Raw sequence data">
+     <param name="input" type="data" label="Raw data"/>
+     <param name="tag" type="text" data_ref="input" label="Sample name of raw data"/>
+   </repeat>
+
+	<param name="format" type="select" label="Raw data format" multiple="false">
+	  <option value="fastq">Raw data is fastq. format</option>
+	  <option value="fasta">Raw data is fasta. format</option>
+	</param>
+
+	<param name="phred" type="select" label="Input quals are Phred+64 or Phred+33" multiple="false">
+	  <option value="64">Phred+64</option>
+	  <option value="33" selected="true">Phred+33</option>
+	</param>
+
+        <conditional name="reference_genome">
+          <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+              <options from_data_table="bowtie_indexes">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+          </when>
+        </conditional>
+
+	<conditional name="params">
+		<param name="annotate_rfam" type="select" label="annotate rfam nocoding RNAs(excluding miRNA)">
+		  <option value="yes" selected="true">yes</option>
+		  <option value="no">no</option>
+		</param>
+		 <when value="yes">
+			<!--param name="rfam" type="data" label="rfam sequence file" /-->
+			<conditional name="reference_rfam">
+			  <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+				<option value="indexed">Use a built-in index</option>
+				<option value="history">Use one from the history</option>
+			  </param>
+			  <when value="indexed">
+				<param name="index" type="select" label="Select a non-coding RNA reference" help="If your reference of interest is not listed, contact the Galaxy team">
+				  <options from_data_table="rfam_bowtie_indexes">
+					<filter type="sort_by" column="2"/>
+					<validator type="no_options" message="No indexes are available for the selected input dataset"/>
+				  </options>
+				</param>
+			  </when>
+			  <when value="history">
+				<param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference" />
+			  </when>
+			</conditional>
+
+			<param name="v" type="integer" value="0" label="report end-to-end hits less than v mismatches for rfam mapping"/>
+
+			<param name="delete_rfam" type="select" label="delet rfam mapped reads">
+			  <option value="yes" selected="true">yes</option>
+			  <option value="no">no</option>
+			</param>
+		 </when>
+
+    </conditional> 
+
+	<conditional name="mirbase">
+		<param name="known_microRNA" type="select" label="Analysis known microRNAs(eg. from mirbase)">
+		  <option value="yes" selected="true">yes</option>
+		  <option value="no">no</option>
+		 </param>
+		 <when value="yes">
+			<param name="mat" type="data" label="mature microRNA sequence file" />
+			<param name="pre" type="data" label="precursor microRNA sequence file" />
+		 </when>
+
+	</conditional>
+	
+
+	<param name="a" type="text" value="TGGAATTCTCGGGTGCCAAGG" label="3' adapter sequence" />
+	<param name="mapnt" type="integer" value="8" label="minimum adapter map nts" />
+	<param name="min" type="integer" value="19" label="minimum microRNA length" />
+	<param name="max" type="integer" value="28" label="maximum microRNA length" />
+	<param name="mismatch" type="integer" value="0" label="number of allowed mismatches when mapping reads to precursors" />
+	<param name="e" type="integer" value="2" label="number of nucleotides upstream of the mature sequence to consider" />
+	<param name="f" type="integer" value="5" label="number of nucleotides downstream of the mature sequence to consider" />
+	<param name="r" type="integer" value="25" label="a read is allowed to map up to this number of positions in the genome" />
+	<param name="dis" type="integer" value="200" label="Maximal space between miRNA and miRNA*" />
+	<param name="flank" type="integer" value="10" label="Flank sequence length of miRNA precursor" />
+	<param name="mfe" type="float" value="-30" label="Maximal free energy allowed for a miRNA precursor" />
+  </inputs>
+
+  <outputs>
+   <data format="txt" name="known microRNA express list" from_work_dir="miRPlant_out/known_microRNA_express.txt" label="${tool.name} on ${on_string}: known microRNA express list">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="known microRNA express alignment" from_work_dir="miRPlant_out/known_microRNA_express.aln" label="${tool.name} on ${on_string}: known microRNA express alignment">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="known microRNA moRs result" from_work_dir="miRPlant_out/known_microRNA_express.moRs" label="${tool.name} on ${on_string}: known microRNA moRs result">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="known microRNA precursor file" from_work_dir="miRPlant_out/known_microRNA_precursor.fa" label="${tool.name} on ${on_string}: known microRNA precursor file">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="known microRNA mature file" from_work_dir="miRPlant_out/known_microRNA_mature.fa" label="${tool.name} on ${on_string}: known microRNA mature file">
+   <filter>(mirbase['known_microRNA'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="novel microRNA express list" from_work_dir="miRPlant_out/novel_microRNA_express.txt" label="${tool.name} on ${on_string}: novel microRNA express list"/>
+   <data format="txt" name="novel microRNA precursor file" from_work_dir="miRPlant_out/novel_microRNA_precursor.fa" label="${tool.name} on ${on_string}: novel microRNA precursor file"/>
+   <data format="txt" name="novel microRNA mature sequence file" from_work_dir="miRPlant_out/novel_microRNA_mature.fa" label="${tool.name} on ${on_string}: novel microRNA mature sequence file"/>
+   <data format="html" name="analysis result" from_work_dir="miRPlant_out/result.html" label="${tool.name} on ${on_string}: analysis result"/>
+  </outputs>
+
+ <help>
+
+ </help>
+ </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nibls.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,319 @@
+#!/usr/bin/perl
+#####################################################################################################
+#LocusPocus is a free script, it is provided with the hope that you will enjoy, you may freely redistribute it at will. We would be greatful if you would keep these acknowledgements with it. 
+#
+# Dan MacLean
+# dan.maclean@sainsbury-laboratory.ac.uk
+#
+# This program is free academic software; academic and non-profit
+# users may redistribute it freely.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+#
+# This software is released under GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007
+# see included file GPL3.txt
+#
+#
+
+
+###Dont forget you will need ... 
+#####################################################################################################
+# Boost::Graph 
+#Copyright 2005 by David Burdick
+# Available from http://search.cpan.org/~dburdick/Boost-Graph-1.2/Graph.pm
+#Boost::Graph is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
+#####################################################################################################
+
+
+
+use strict;
+use warnings;
+use Boost::Graph;
+use Getopt::Long;
+
+
+my $usage = "usage: $0 -f GFF_FILE [options]\n\n -m minimum inclusion distance (default 5)\n -c clustering coefficient (default 0.6) -b buffer between graphs (default 0) -k sample mark -o output file -t temp output file\n";
+
+my $gff_file ;
+my $min_inc = 5;
+my $clus = 0.6;
+my $buff = 0;
+my $output_file;
+my $temp;
+my $mark;
+
+GetOptions(
+
+	'c=f'          => \$clus,
+	'm=i' => \$min_inc,
+	'f|file=s'     => \$gff_file,
+	'b=i' => \$buff,
+	'o=s' => \$output_file,
+	't=s' => \$temp,
+	'k=s' => \$mark
+) ;
+
+
+die $usage unless $gff_file;
+
+
+my $starttime = time;
+warn "started $starttime\n";
+
+## load in data
+my %molecules; # stores starts and ends of srnas
+open GFF, "<$gff_file";
+
+while (my $entry = <GFF>){
+
+	chomp $entry;
+	next if($entry=~/^\#/);
+	my @data = split(/\t/,$entry);
+	my $chr=shift @data;
+	my $strand=shift @data;
+	my $start=shift @data;
+	my $end=shift @data;
+#	my $length1=$end-$start+1;
+#	if ($length1>30) {
+#		$length1=40;
+#	}
+	my $total;
+	for (my $s=0;$s<@data ;$s++) {
+		$total+=$data[$s];
+	}
+	push @data,$total;
+#	push @data,$length1;
+#	if (defined $molecules{$chr}{$start}{$end}{$strand}) {
+#		my @old_data=split(/;/,$molecules{$chr}{$start}{$end}{$strand});
+#		for (my $i=0;$i<$#old_data ;$i++) {
+#			$data[$i]+=$old_data[$i];
+#		}
+#	}
+	my $data=join ";",@data;
+	$molecules{$chr}{$start}{$end}{$strand} = $data;#chr#start#end#strand#add Tags information
+	#print "$chr\t$start\t$end\n";
+}
+
+close GFF;
+
+warn "Data loaded...\nBuilding graphs and finding loci\nPlease be patient, this can take a while...\n";
+
+my @sample=split/\#/,$mark;
+$mark=join"\"\t\"",@sample;
+open OUT, ">$output_file";
+print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
+open CLUSTER,">$temp";
+print CLUSTER "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
+foreach my $chromosome (keys %molecules){
+	my $g = new Boost::Graph(directed=>0);
+	my @starts = keys(%{$molecules{$chromosome}} );
+	@starts = sort {$a <=> $b} @starts;  
+
+	while (my $srna_start = shift @starts){   ## work from left most sRNA to right most, add to graph if they close enough
+
+
+		foreach my $srna_end (keys %{$molecules{$chromosome}{$srna_start}}){
+
+
+			###use new graph if the next srna is too far away from this one.. 
+			if(defined $starts[0] and $srna_end + $min_inc + $buff < $starts[0]){
+
+
+				##dump the info from the old graph
+				if (scalar(@{$g->get_nodes()}) > 2){
+
+					my $cluster_coeff = get_cc($g);
+					if ($cluster_coeff >= $clus){
+					 dump_locus($g, $cluster_coeff);
+					}
+				}
+
+
+				$g = new Boost::Graph(directed=>0);
+
+			}
+
+			foreach my $e (keys %{$molecules{$chromosome}{$srna_start}}){   ### extra bit because all loci with same start and different end overlap by definition. but are not collected by main search below
+
+				unless ($e eq $srna_end){
+					my $sn = $chromosome. ':' . $srna_start . ':' . $srna_end;   ## turn coordinate of sRNA inro a node name
+					my $en = $chromosome. ':' . $srna_start . ':' . $e;
+					$g->add_edge(node1=>"$sn", node2=>"$en", weight=>'1');
+				} 
+
+			}
+
+			foreach my $start (@starts){  ##build graph of overlaps
+				my $new = 0;
+				last if $start - $min_inc > $srna_end;
+				if ($start - $min_inc <= $srna_end){
+
+					my $start_node = $chromosome . ':' . $srna_start . ':' . $srna_end;
+					foreach my $end (keys %{$molecules{$chromosome}{$start}}){
+
+						my $end_node = $chromosome . ':' . $start . ':' . $end;
+						$g->add_edge(node1=>"$start_node", node2=>"$end_node", weight=>'1');
+					}
+
+				}
+			}
+		}
+
+		if (!(defined $starts[0])) {
+			##dump the info from the last graph
+			if (scalar(@{$g->get_nodes()}) > 2){
+
+				my $cluster_coeff = get_cc($g);
+				if ($cluster_coeff >= $clus){
+				 dump_locus($g, $cluster_coeff);
+				}
+			}
+		}
+	}
+}
+
+warn "Loci printed\nFinished\n";
+
+my $endtime = time;
+
+my $elapsed = $endtime - $starttime;
+
+warn "Time elapsed = $elapsed s\n";	
+close OUT;
+close CLUSTER;
+#########################################################################################
+sub get_cc{   ## do cluster coeff calculation. No useful method anyway so self implemented NB, this is an undirected graph so k is n(n-1)/2
+
+	my $graph = shift;
+
+	my @component = @{$graph->get_nodes()}; #number of nodes
+	my @clustering_coefficients;
+
+	foreach my $vertex (@component)
+	{
+
+		my @neighbours = @{$graph->neighbors($vertex)};
+
+		my %edges_in_graph;
+
+		my $n = @neighbours; #n = the number of neighbours
+		my $k = ($n * ($n - 1))/2;	#k = total number of possible connections
+
+		my $e= 0; #actual number of connections within sub-graph
+
+		foreach my $neighbour (@neighbours)
+		{
+			foreach my $neighbour_2 (@neighbours)
+			{
+			my $edge1 = "$neighbour\t$neighbour_2";
+			my $edge2 = "$neighbour_2\t$neighbour";
+				unless (exists $edges_in_graph{$edge1} or exists $edges_in_graph{$edge2})
+				{
+					if ($graph->has_edge($neighbour, $neighbour_2) or $graph->has_edge($neighbour_2, $neighbour))
+					{
+						++$e;
+						$edges_in_graph{$edge1}=1;
+						$edges_in_graph{$edge2}=1;
+					}
+				}
+			}
+		}
+
+		if ($k >= 1)
+		{	
+		my $c = $e / $k;
+		push @clustering_coefficients, $c;
+		}
+		else {push @clustering_coefficients, '0';}
+	}
+
+	my $graph_n = scalar(@clustering_coefficients);
+	my $graph_cc = 0;
+	foreach my $cc (@clustering_coefficients){ 
+
+		$graph_cc = $graph_cc + $cc;
+
+	}
+	$graph_cc = $graph_cc / $graph_n;
+
+	return $graph_cc;
+}
+
+############################################################################################################
+
+sub dump_locus{
+
+	my $g = shift;
+	my $cc = shift;
+	my $chr;
+	my $start = 1000000000000000000000000000000000000000000000; 
+	my $end = -1;
+	my @sample;
+	my @tag;
+	foreach my $node (@{$g->get_nodes()}){
+
+		$node =~ m/^(\S+):(\d+):(\d+)$/;
+		my $c=$1;
+		my $s=$2;
+		my $e=$3;
+	#	my @data;
+		foreach my $str (keys %{$molecules{$c}{$s}{$e}}) {
+			my @data=split(/;/,$molecules{$c}{$s}{$e}{$str});
+			push @tag,($s.",".$e.",".$str.",".$data[-1]);
+#			for (my $i=0;$i<$#old_data ;$i++) {
+#				$data[$i]+=$old_data[$i];
+#			}
+			my $length=$e-$s+1;
+			if ($length>30) {
+				$length=40;
+			}
+			push @data,$length;
+			my $data=join ";",@data;#sample_exp/total_exp/length;
+			push @sample,$data;
+		}
+
+		$chr = $c;
+		$start = $s if $s < $start;
+		$end = $e if $e > $end;
+	}
+	my $tag=join";",@tag;
+	my $tag_number=@tag;
+	my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample);
+	if ($max_length==40) {
+		$max_length="\>30";
+	}
+	my $cluster_exp=join"\t",@cluster_exp;
+	my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp;
+	print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag_number\t$tag\n";
+	print OUT $gff, "\n";  
+}
+
+sub Max_length{
+	my @exp=@{$_[0]};
+	my %sample_length;
+	my $total_exp;
+	my @each;
+	for (my $i=0;$i<=$#exp ;$i++) {
+		my @tag=split/;/,$exp[$i];
+		my $length=pop(@tag);
+		my $exp=pop(@tag);
+		$sample_length{$length}+=$exp;
+		$total_exp+=$exp;
+		for (my $j=0;$j<=$#tag ;$j++) {
+			$each[$j]+=$tag[$j];
+		}
+	}
+	my $max=0;
+	my $max_key;
+	foreach my $key (sort keys %sample_length) {
+		my $p=$sample_length{$key}/$total_exp;
+		if ($p>$max) {
+			$max=$p;
+			$max_key=$key;
+		}
+		$sample_length{$key}=sprintf("%.2f",$p);
+	}
+	return($max_key,$sample_length{$max_key},@each);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/non_miRNA_reads.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2013/7/19
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","fa=s","o=s","h");
+if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'fa'};
+my $fileout=$opts{'o'};
+
+open IN,"<$filein"; #input file  
+my (%fa,%seq);
+while (my $aline=<IN>){
+	chomp $aline;
+	$aline=~s/^>//;
+	my $seq=<IN>;
+	chomp $seq;
+	#$seq{$seq}=$aline;
+	$fa{$aline}=$seq;
+}
+close IN;
+
+open IN,"<$opts{i}";
+while(my $aline=<IN>){
+	chomp $aline;
+	if($aline=~/^\S+_x\d+/){
+		$aline=~/^(\S+_x\d+)/;
+		my $name=$1;
+		delete($fa{$name});
+	}
+}
+close IN;
+
+open OUT,">$fileout"; #output file  
+foreach my $key (keys %fa) {
+	print OUT ">$key\n$fa{$key}\n";
+}
+close OUT;
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o
+options:
+-i input file
+-o output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phased_siRNA.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,254 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2013/7/19
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+#use Math::Cephes qw(:hypergeometrics); 
+
+my %opts;
+GetOptions(\%opts,"i=s","o=s","h");
+if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'o'};
+
+open IN,"<$filein"; #input file  
+open OUT,">$fileout"; #output file  
+
+while (my $aline=<IN>) {
+	chomp $aline;
+	if ($aline=~/^\#/) {
+		print OUT $aline,"\tp-value\n";
+		next;
+	}
+	my @tmp=split/\t/,$aline;
+	my @pos=split/:|-/,$tmp[0];
+	$tmp[1]=~s/nt//;
+	my $pv=&phase($tmp[1],$pos[1],$pos[2],$tmp[4]);
+	
+	print OUT $aline,"\t",$pv,"\n";
+}
+close IN;
+close OUT;
+
+sub phase{
+	my ($tagL,$start,$end,$tags)=@_;
+	my @tmp=split/\;/,$tags;
+	my %tag;
+	for (my $i=0;$i<@tmp;$i++) {
+		my @aa=split/\,/,$tmp[$i];
+		next if($aa[1]-$aa[0]+1 != $tagL);
+#		$tag{$aa[0].",".$aa[2]}+=$aa[3] if($aa[2] eq "+");
+#		$tag{($aa[1]).",".$aa[2]}+=$aa[3] if($aa[2] eq "-");
+		$tag{$aa[0]}+=$aa[3] if($aa[2] eq "+");
+		$tag{($aa[1]+3)}+=$aa[3] if($aa[2] eq "-");
+	}
+
+	my $pv=&pvalue2(\%tag,$tagL,$start,$end);
+
+	return $pv;
+}
+
+sub pvalue2{
+	my ($tag,$tagL,$start,$end)=@_;
+
+	my $p=1; my $pp=1;
+	foreach my $ccs(keys %{$tag}){
+		my $n=0;
+		my $k=0;
+		my $K=0;
+		my $N=0;
+
+		my $cor= $ccs;
+		my $ss=$cor;
+		my $ee=($cor+$tagL*10-1)<$end ? $cor+$tagL*10-1 : $end;
+		
+		my $max=0;
+		for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand
+		{
+			my $x=$i;
+			if (defined $$tag{$x})
+			{
+			if ($max<$$tag{$x}) {$max=$$tag{$x};}
+			$n +=$$tag{$x};
+			$N++;
+			}
+		}
+		for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand
+		{
+			my $x=$i;
+			if (defined $$tag{$x})
+			{
+			$k +=$$tag{$x};
+			$K++;
+			}
+		}
+
+
+		return $p if($K<3);
+		return $p if($max/$n>0.8);
+
+		my $pn=0; 
+		next if($n==$k);
+		$pn=10*$k/($n-$k)+1;
+		$pn = $pn ** ($K-2);
+		$pn = log($pn);
+		if ($p<$pn) {
+			$p=$pn;
+		}
+
+	}
+	
+	return $p;
+
+}
+
+sub pvalue{
+	my ($tag,$tagL,$start,$end)=@_;
+
+	my $p=1;
+	foreach my $ccs(keys %{$tag}){
+		my $n=-1;
+		my $k=-1;
+
+		my ($cor, $str)=split(/,/, $ccs);
+		if ($str eq "+") # small RNAs on the Watson strand
+		{
+			my $ss=$cor;
+			my $ee=($cor+$tagL*11-1)<$end ? $cor+$tagL*11-1 : $end;
+			for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand
+			{
+				my $x=$i.","."+";
+				if (defined $$tag{$x})
+				{
+				$n=$n+1;
+				}
+			}
+			for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand
+			{
+				my $x=$i.","."+";
+				if (defined $$tag{$x})
+				{
+				$k=$k+1;
+				}
+			}
+
+			for (my $j=$ss-2; $j<=$ee-2; $j++) # calculate n on the antisense strand
+			{
+				my $x=$j.","."-";
+				if (defined $$tag{$x})
+				{
+				$n=$n+1;
+				}
+			}
+
+			for (my $j=$ss+$tagL-2; $j<=$ee-2; $j=$j+$tagL) # calculate k on the antisense strand
+			{
+				my $x=$j.","."-";
+				if (defined $$tag{$x})
+				{
+				$k=$k+1;
+				}
+			}
+		}
+
+		elsif ($str eq "-") # small RNAs on the Crick strand
+		{
+			my $ee=$cor;
+			my $ss=$cor-$tagL*11+1> $start ? $cor-$tagL*11+1 : $start;
+			for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand
+			{
+				my $x=$i.","."-";
+				if (defined $$tag{$x})
+				{
+				$n=$n+1;
+				}
+			}
+			for (my $i=$ss+$tagL-1; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand
+			{
+				my $x=$i.","."-";
+				if (defined $$tag{$x})
+				{
+				$k=$k+1;
+				}
+			}
+
+			for (my $j=$ss+2; $j<=$ee+2; $j++) # calculate n on the antisense strand
+			{
+				my $x=$j.","."+";
+				if (defined $$tag{$x})
+				{
+				$n=$n+1;
+				}
+			}
+			for (my $j=$ss+2; $j<=$ee+2; $j=$j+$tagL) # calculate k on the antisense strand
+			{
+				my $x=$j.","."+";
+				if (defined $$tag{$x})
+				{
+				$k=$k+1;
+				}
+			}
+		}
+
+		next if($k<3);
+
+		my $pn=0; my $N=$tagL*11*2-1; my $M=21;
+		for (my $w=$k; $w<=$M; $w++) # calculate p-value from n and k
+		{
+			my $c=1;
+			my $rr=1;
+			my $rw=1;
+
+			for (my $j=0; $j<=$w-1; $j++)
+			{
+				$c=$c*($M-$j)/($j+1);
+			}
+			for (my $x=0; $x<=$n-$w-1; $x++)
+			{
+				$rr=$rr*($N-$M-$x)/($x+1);
+			}
+			for (my $y=0; $y<=$n-1; $y++)
+			{
+				$rw=$rw*($y+1)/($N-$y);
+			}
+			my $pr=$c*$rr*$rw;
+			
+			$pn=$pn+$pr;
+		}
+
+		$p=$pn<$p ? $pn :$p;
+
+		if ($p<0.001) #select and output small RNA clusters with p<0.001
+
+		{
+
+			return $p;
+
+		}
+
+	}
+	return $p;
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o
+options:
+-i input file
+-o output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preProcess.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,385 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-12-2
+#Modified:
+#Description: RNA-seq data pre-process
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use threads;
+#use threads::shared;
+use File::Path;
+use File::Basename;
+#use RNA;
+#use Term::ANSIColor;
+
+my %opts;
+GetOptions(\%opts,"i:s@","tag:s@","format=s","phred:i","gfa=s","rfam:s","idx:s","idx2:s","mis:i","v:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","h");
+if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $time=&Time();
+print "miPlant program start:\n   The time is $time!\n";
+print "Command line:\n   $0 @ARGV\n";
+
+my $format=$opts{'format'};
+if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { 
+	#&printErr();
+	die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
+}
+
+my $phred_qv=64;
+if (defined $opts{'phred'}) {$phred_qv=$opts{'phred'};}
+
+my @inputfiles=@{$opts{'i'}};
+my @inputtags=@{$opts{'tag'}};
+
+my  $mypath=`pwd`;
+chomp $mypath;
+
+my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/preProcess/";
+
+
+unless ($dir=~/\/$/) {$dir.="/";}
+if (not -d $dir) {
+	mkdir $dir;
+}
+my $config=$dir."/input_config";
+open CONFIG,">$config";
+	for (my $i=0;$i<@inputfiles;$i++) {
+		print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n";
+	}
+close CONFIG;
+
+my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/";
+
+my $a="ATCTCGTATG";  #adapter
+if (defined $opts{'a'}) {$a=$opts{'a'};}
+
+my $m=6;  #adapter minimum mapped nt
+if (defined $opts{'M'}) {$m=$opts{'M'};}
+
+my $t=1; #threads number
+if (defined $opts{'t'}) {$t=$opts{'t'};}
+
+my $min_nt=19; # minimum reads length
+if (defined $opts{'min'}) {$min_nt=$opts{'min'};}
+
+my $max_nt=28; #maximum reads length
+if (defined $opts{'max'}) {$max_nt=$opts{'max'};}
+
+my $mis=0; #mismatch number for microRNA
+if (defined $opts{'mis'}) {$mis=$opts{'mis'};}
+
+my $mis_rfam=0;# mismatch number for rfam
+if (defined $opts{'v'}) {$mis_rfam=$opts{'v'};}
+
+my (@filein,@mark,@clean);
+#&read_config();
+@filein=@inputfiles;
+@mark=@inputtags;
+
+&checkfa($opts{gfa});
+
+
+##### clip adpter --> clean data  start
+my $preprocess=$dir."preProcess_clean/";
+mkdir $preprocess;
+my $can_use_threads = eval 'use threads; 1';
+if ($can_use_threads) {
+# Do processing using threads
+	print "Do processing using threads\n";
+	my @filein1=@filein; my @mark1=@mark;
+	while (@filein1>0) {
+		my @thrs; my @res;
+		for (my $i=0;$i<$t ;$i++) {
+			last if(@filein1==0);
+			my $in=shift @filein1;
+			my $out=shift @mark1;
+			push @clean,$preprocess.$out."_clips_adapter.fq";
+			$thrs[$i]=threads->create(\&clips,$in,$out);
+		}
+		for (my $i=0;$i<@thrs;$i++) {
+			$res[$i]=$thrs[$i]->join();
+		}
+	}
+} else {
+# Do not processing using threads
+	print "Do not processing using threads\n";
+	for (my $i=0;$i<@filein ;$i++) {
+		my $in=$filein[$i];
+		my $out=$mark[$i];
+		push @clean,$preprocess.$out."_clips_adapter.fq";
+		&clips($in,$out);
+	}
+}
+
+##### clip adpter --> clean data  end
+
+my $collapsed=$preprocess."collapse_reads.fa";
+my $data=$preprocess."collapse_reads_${min_nt}_${max_nt}.fa"; ## raw clean data
+&collapse(\@clean,$collapsed);   #collapse reads to tags
+
+&filterbylength();  # filter <$min_nt  && >$max_nt
+
+print "The final clean data file is $data, only contains reads which length is among $min_nt\~$max_nt\n\n";
+
+my $clean_data=$preprocess."clean_data.fa";
+system("ln -s $data $clean_data");
+
+$time=Time();
+print "$time: Genome alignment!\n\n";
+my $genome_map=$dir."genome_match";
+&genome($data);
+#my $genome_map=&search($dir,"genome_match_");
+my $mapfile=$genome_map."/genome_mapped.bwt";
+my $mapfa=$genome_map."/genome_mapped.fa";
+my $unmap=$genome_map."/genome_not_mapped.fa";
+
+chdir $dir;
+my $pathfile="$dir/path.txt";
+open PA,">$pathfile";
+print PA "$config\n";
+print PA "$preprocess\n";
+print PA "$genome_map\n";
+
+if (defined $opts{'rfam'}) {              #rfam mapping and analysis
+	$time=Time();
+	print "$time: RNA annotate!\n\n";
+	$time=~s/:/-/g;
+	$time=~s/ /-/g;
+	my $rfam_exp_dir=$dir."rfam_match";
+	&rfam();
+	#my $rfam_exp_dir=&search($dir,"rfam_match_");
+print PA "$rfam_exp_dir\n";
+
+	my $tag=join "\\;" ,@mark;
+	system("perl $scipt_path/count_rfam_express.pl -i $rfam_exp_dir/rfam_mapped.bwt -tag $tag -o rfam_non-miRNA_annotation.txt");
+}
+
+
+close PA;
+system("perl $scipt_path/html_preprocess.pl -i $pathfile -format $format -min $min_nt -max $max_nt -o $dir/preprocessResult.html");
+
+$time=Time();
+print "$time: Program end!!\n";
+
+############################## sub programs ###################################
+sub genome{
+	my ($file)=@_;
+	if(defined $opts{'idx'}){
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -r 1000 -v $mis -p $t -o $dir -index $opts{idx}") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n";
+	}else{
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -r 1000 -v $mis -p $t -o $dir") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n";
+	}
+}
+sub rfam{
+	if (defined $opts{'idx2'}) {
+		system("perl $scipt_path/rfam.pl -i $mapfa -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} ");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time\n";
+	}else{
+		system("perl $scipt_path/rfam.pl -i $mapfa -ref $opts{rfam} -v $mis_rfam -p $t -o $dir ");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time\n";
+	}
+}
+sub filterbylength{
+	my $tmpmark=join ",", @mark;
+	system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
+	system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt  -o $preprocess/length.html");
+#	print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n";
+
+}
+sub collapse{
+	my ($ins,$data)=@_;
+		my $str="";
+	for (my $i=0;$i<@{$ins};$i++) {
+		$str .="-i $$ins[$i] ";
+	}
+	system ("perl $scipt_path/collapseReads2Tags.pl $str -mark seq -o $data -format $format");
+#	print "\ncollapseReads2Tags.pl $str -mark seq -o $data -format $format\n";
+}
+
+sub clips{
+	my ($in,$out)=@_;
+	my $adapter=$preprocess.$out."_clips_adapter.fq";
+	if($format eq "fq" || $format eq "fastq"){
+		system("fastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter") ;
+#		print "\nfastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter\n";
+	}
+	if($format eq "fa" || $format eq "fasta"){
+		system("fastx_clipper -a $a -M $m -i $in -o $adapter") ;
+        #		print "\nfastx_clipper -a $a -M $m -i $in -o $adapter\n";
+	}
+	#my $clean=$preprocess.$out."_clean.fq";
+	#system("filterReadsByLength.pl -i $adapter -o $clean -min $min_nt -max $max_nt ");
+
+	return;
+}
+
+sub read_config{
+	open CON,"<$config";
+	while (my $aline=<CON>) {
+		chomp $aline;
+		my @tmp=split/\t/,$aline;
+		push @filein,$tmp[0];
+		push @mark,$tmp[1];
+		&check_rawdata($tmp[0]);
+	}
+	close CON;
+	if (@filein != @mark) {
+		#&printErr();
+		die "Maybe config file have some wrong!!!\n";
+	}
+}
+sub check_rawdata{
+	my ($fileforcheck)=@_;
+	if (!(-s $fileforcheck)) {
+		#&printErr();
+		die "Can not find $fileforcheck, or file is empty!!!\n";
+	}
+	if ($format eq "fasta" || $format eq "fa") {
+		&checkfa($fileforcheck);
+	}
+	if ($format eq "fastq" || $format eq "fq") {
+		&checkfq($fileforcheck);
+	}
+}
+sub checkfa{
+	my ($file_reads)=@_;
+	open N,"<$file_reads";
+	my $line=<N>;
+	chomp $line;
+    if($line !~ /^>\S+/){
+        #printErr();
+        die "The first line of file $file_reads does not start with '>identifier'
+Reads file $file_reads is not a valid fasta file\n\n";
+    }
+    if(<N> !~ /^[ACGTNacgtn]*$/){
+        #printErr();
+        die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+    }
+	close N;
+}
+sub checkfq{
+	my ($file_reads)=@_;
+
+	open N,"<$file_reads";
+	for (my $i=0;$i<10;$i++) {
+		my $a=<N>;
+		my $b=<N>;
+		my $c=<N>;
+		my $d=<N>;
+		chomp $a;
+		chomp $b;
+		chomp $c;
+		chomp $d;
+		if($a!~/^\@/){
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if($b!~ /^[ACGTNacgtn]*$/){
+			#&printErr();
+			die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+		}
+		if ($c!~/^\@/ && $c!~/^\+/) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if ((length $b) != (length $d)) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		my @qv=split //,$d;
+		for (my $j=0;$j<@qv ;$j++) {
+			my $q=ord($qv[$j])-64;
+			if($q<0){$phred_qv=33;}
+		}
+	}
+	close N;
+}
+
+sub search{
+	my ($dir,$str)=@_;
+	opendir I,$dir;
+	my @ret;
+	while (my $file=readdir I) {
+		if ($file=~/$str/) {
+			push @ret, $file;
+		}
+	}
+	closedir I;
+	if (@ret != 1) {
+		#&printErr();
+
+		die "Can not find directory or file which name has string: $str !!!\n";
+	}
+	return $ret[0];
+}
+
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day $hour:$min:$sec");
+}
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -format -gfa -index -rfam -a -M -min -max -mis -v -t  -o  -path
+options:
+-i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
+-tag string # raw data file names, -tag xxx -tag xxx
+
+-format string,#specific input rawdata file format : fastq|fq|fasta|fa
+-phred  int # phred quality number, default is 64
+
+-path scirpt path
+
+-gfa string,  input file # genome fasta. sequence file
+-idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-rfam string,  input file# rfam database file, microRNAs must not be contained in this file## if not define, rfam small RNA will not be count.
+-idx2 string,  rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-a string,  ADAPTER string. default is ATCTCGTATG.
+-M int,    require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't clip it. 
+-min int,  reads min length,default is 19.
+-max int,  reads max length,default is 28.
+
+-mis [int]     number of allowed mismatches when mapping reads to genome, default 0
+-v <int>     report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment
+
+-t int,    number of threads [1]
+
+-o output directory# absolute path
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preProcess.xml	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,155 @@
+<tool id="preprocess" name="preProcess" veision="1.0.0">
+  <description>Program for Raw data preprocess analysis, including 3' adapter triming, reads collaping, genome mapping and  rfam non-miRNA analysis </description>
+
+  <requirements>
+	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
+    <requirement type="package" version="0.12.7">bowtie</requirement>
+    <requirement type="set_environment">SCRIPT_PATH</requirement>
+    <!--requirement type="package" version="3.0.1">R</requirement!-->
+	<requirement type="package" version="1.96">threads</requirement>
+	<requirement type="package" version="2.59">SVG</requirement>
+	<requirement type="package" version="2.1.8">ViennaRNA</requirement>
+  </requirements>
+
+  <!--command interpreter="perl">miPlant.pl -i $input -format $format -gfa $gfa -idx $index -pre $pre -mat $mat -rfam $rfam -idx2 $idx2 -D $D -a $a -M $M -min $min -max $max -mis $mis -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe -t $t -o $output</command-->
+
+   <command interpreter="perl">preProcess.pl 
+   ## Change this to accommodate the number of threads you have available.
+        -t \${GALAXY_SLOTS:-4}
+	-path \$SCRIPT_PATH
+
+    #for $j, $s in enumerate( $series )
+    ##rank_of_series=$j
+    -i ${s.input}
+    -tag ${s.tag}
+    #end for
+
+     ## Do or not annotate rfam non-miRNA RNAs
+    #if $nocoding.annotate_rfam == "yes":
+		  ## prepare Rfam bowtie index
+		  #set rfam_index_path = ''
+		  #if str($nocoding.reference_rfam.source) == "history":
+			  -rfam $nocoding.reference_rfam.own_file 
+		  #else:
+			  #set rfam_index_path = $nocoding.reference_rfam.index.fields.path
+		      -rfam ${rfam_index_path}.fa -idx2 $rfam_index_path
+		  #end if
+		 -v $nocoding.v
+	#end if
+
+        ## prepare bowtie index
+        #set index_path = ''
+        #if str($reference_genome.source) == "history":
+            #set index_path = 'genome'
+            -gfa $reference_genome.own_file
+		#else:
+            #set index_path = $reference_genome.index.fields.path
+		   -gfa ${index_path}.fa -idx $index_path
+		#end if
+
+    -format $format -phred $phred -a $a -M $mapnt -min $min -max $max -mis $mismatch  > run.log
+  </command>
+
+  <inputs>
+
+   <repeat name="series" title="Raw sequence data">
+     <param name="input" type="data" label="Raw data"/>
+     <param name="tag" type="text" data_ref="input" label="Sample name of raw data"/>
+   </repeat>
+
+	<!--param name="input" format="tabular"  type="data" label="input config file" /-->
+	
+	<param name="format" type="select" label="Raw data format" multiple="false">
+	  <option value="fastq">Raw data is fastq. format</option>
+	  <option value="fasta">Raw data is fasta. format</option>
+	</param>
+	<param name="phred" type="select" label="Input quals are Phred+64 or Phred+33" multiple="false">
+	  <option value="64">Phred+64</option>
+	  <option value="33" selected="true">Phred+33</option>
+	</param>
+
+	        <!-- reference genome -->
+        <conditional name="reference_genome">
+          <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+              <options from_data_table="bowtie_indexes">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+          </when>
+        </conditional>
+
+	<!--param name="gfa"  type="data" label="genome sequence fasta file"/-->
+	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
+	<param name="a" type="text" value="TGGAATTCTCGGGTGCCAAGG" label="3' adapter sequence" />
+	<param name="mapnt" type="integer" value="8" label="minimum adapter map nts" />
+	<param name="min" type="integer" value="19" label="Minimum microRNA length" />
+	<param name="max" type="integer" value="28" label="Maximum microRNA length" />
+	<param name="mismatch" type="integer" value="0" label="Number of allowed mismatches when mapping reads to genome" />
+
+	<conditional name="nocoding">
+		<param name="annotate_rfam" type="select" label="Annotate nocoding RNAs(excluding miRNA)">
+		  <option value="yes" selected="true">yes</option>
+		  <option value="no">no</option>
+		 </param>
+		 <when value="yes">
+			<!--param name="rfam" type="data" label="rfam sequence file" /-->
+			<conditional name="reference_rfam">
+			  <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+				<option value="indexed">Use a built-in index</option>
+				<option value="history">Use one from the history</option>
+			  </param>
+			  <when value="indexed">
+				<param name="index" type="select" label="Select a reference" help="If your reference of interest is not listed, contact the Galaxy team">
+				  <options from_data_table="rfam_bowtie_indexes">
+					<filter type="sort_by" column="2"/>
+					<validator type="no_options" message="No indexes are available for the selected input dataset"/>
+				  </options>
+				</param>
+			  </when>
+			  <when value="history">
+				<param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference" />
+			  </when>
+			</conditional>
+
+			<param name="v" type="integer" value="0" label="Report end-to-end hits less than v mismatches for non-coding RNA annotation"/>
+		 </when>
+    </conditional> 
+
+
+
+  </inputs>
+
+  <outputs>
+   <data format="html" name="preprocess result" from_work_dir="preProcess/preprocessResult.html" label="${tool.name} on ${on_string}: preprocess result"/>
+
+   <data format="txt" name="clean FASTA data" from_work_dir="preProcess/preProcess_clean/clean_data.fa" label="${tool.name} on ${on_string}: clean FASTA data"/>
+
+   <data format="txt" name="genome mapping result" from_work_dir="preProcess/genome_match/genome_mapped.bwt" label="${tool.name} on ${on_string}: genome mapping result"/>
+   <data format="txt" name="genome mapped FASTA reads" from_work_dir="preProcess/genome_match/genome_mapped.fa" label="${tool.name} on ${on_string}: genome mapped FASTA reads"/>
+
+   <data format="txt" name="Rfam mapping result" from_work_dir="preProcess/rfam_match/rfam_mapped.bwt" label="${tool.name} on ${on_string}: Rfam mapping result">
+   <filter>(nocoding['annotate_rfam'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="Rfam mapped FASTA file" from_work_dir="preProcess/rfam_match/rfam_mapped.fa" label="${tool.name} on ${on_string}: Rfam mapped FASTA file">
+   <filter>(nocoding['annotate_rfam'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="Rfam not mapped FASTA file" from_work_dir="preProcess/rfam_match/rfam_not_mapped.fa" label="${tool.name} on ${on_string}: Rfam not mapped FASTA file">
+   <filter>(nocoding['annotate_rfam'] == 'yes')</filter>
+   </data>
+   <data format="txt" name="input config" from_work_dir="preProcess/input_config" label="${tool.name} on ${on_string}: input config"/>
+
+  </outputs>
+
+ <help>
+
+ </help>
+ </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/preProcess_core.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,385 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-12-2
+#Modified:
+#Description: RNA-seq data pre-process
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use threads;
+#use threads::shared;
+use File::Path;
+use File::Basename;
+#use RNA;
+#use Term::ANSIColor;
+
+my %opts;
+GetOptions(\%opts,"i:s@","tag:s@","format=s","phred:i","gfa=s","rfam:s","idx:s","idx2:s","mis:i","v:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","h");
+if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $time=&Time();
+print "miPlant program start:\n   The time is $time!\n";
+print "Command line:\n   $0 @ARGV\n";
+
+my $format=$opts{'format'};
+if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { 
+	#&printErr();
+	die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
+}
+
+my $phred_qv=64;
+if (defined $opts{'phred'}) {$phred_qv=$opts{'phred'};}
+
+my @inputfiles=@{$opts{'i'}};
+my @inputtags=@{$opts{'tag'}};
+
+my  $mypath=`pwd`;
+chomp $mypath;
+
+my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/preProcess/";
+
+
+unless ($dir=~/\/$/) {$dir.="/";}
+if (not -d $dir) {
+	mkdir $dir;
+}
+my $config=$dir."/input_config";
+open CONFIG,">$config";
+	for (my $i=0;$i<@inputfiles;$i++) {
+		print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n";
+	}
+close CONFIG;
+
+my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/";
+
+my $a="ATCTCGTATG";  #adapter
+if (defined $opts{'a'}) {$a=$opts{'a'};}
+
+my $m=6;  #adapter minimum mapped nt
+if (defined $opts{'M'}) {$m=$opts{'M'};}
+
+my $t=1; #threads number
+if (defined $opts{'t'}) {$t=$opts{'t'};}
+
+my $min_nt=19; # minimum reads length
+if (defined $opts{'min'}) {$min_nt=$opts{'min'};}
+
+my $max_nt=28; #maximum reads length
+if (defined $opts{'max'}) {$max_nt=$opts{'max'};}
+
+my $mis=0; #mismatch number for microRNA
+if (defined $opts{'mis'}) {$mis=$opts{'mis'};}
+
+my $mis_rfam=0;# mismatch number for rfam
+if (defined $opts{'v'}) {$mis_rfam=$opts{'v'};}
+
+my (@filein,@mark,@clean);
+#&read_config();
+@filein=@inputfiles;
+@mark=@inputtags;
+
+&checkfa($opts{gfa});
+
+
+##### clip adpter --> clean data  start
+my $preprocess=$dir."preProcess_clean/";
+mkdir $preprocess;
+my $can_use_threads = eval 'use threads; 1';
+if ($can_use_threads) {
+# Do processing using threads
+	print "Do processing using threads\n";
+	my @filein1=@filein; my @mark1=@mark;
+	while (@filein1>0) {
+		my @thrs; my @res;
+		for (my $i=0;$i<$t ;$i++) {
+			last if(@filein1==0);
+			my $in=shift @filein1;
+			my $out=shift @mark1;
+			push @clean,$preprocess.$out."_clips_adapter.fq";
+			$thrs[$i]=threads->create(\&clips,$in,$out);
+		}
+		for (my $i=0;$i<@thrs;$i++) {
+			$res[$i]=$thrs[$i]->join();
+		}
+	}
+} else {
+# Do not processing using threads
+	print "Do not processing using threads\n";
+	for (my $i=0;$i<@filein ;$i++) {
+		my $in=$filein[$i];
+		my $out=$mark[$i];
+		push @clean,$preprocess.$out."_clips_adapter.fq";
+		&clips($in,$out);
+	}
+}
+
+##### clip adpter --> clean data  end
+
+my $collapsed=$preprocess."collapse_reads.fa";
+my $data=$preprocess."collapse_reads_${min_nt}_${max_nt}.fa"; ## raw clean data
+&collapse(\@clean,$collapsed);   #collapse reads to tags
+
+&filterbylength();  # filter <$min_nt  && >$max_nt
+
+print "The final clean data file is $data, only contains reads which length is among $min_nt\~$max_nt\n\n";
+
+my $clean_data=$preprocess."clean_data.fa";
+system("ln -s $data $clean_data");
+
+$time=Time();
+print "$time: Genome alignment!\n\n";
+my $genome_map=$dir."genome_match";
+&genome($data);
+#my $genome_map=&search($dir,"genome_match_");
+my $mapfile=$genome_map."/genome_mapped.bwt";
+my $mapfa=$genome_map."/genome_mapped.fa";
+my $unmap=$genome_map."/genome_not_mapped.fa";
+
+chdir $dir;
+my $pathfile="$dir/path.txt";
+open PA,">$pathfile";
+print PA "$config\n";
+print PA "$preprocess\n";
+print PA "$genome_map\n";
+
+if (defined $opts{'rfam'}) {              #rfam mapping and analysis
+	$time=Time();
+	print "$time: RNA annotate!\n\n";
+	$time=~s/:/-/g;
+	$time=~s/ /-/g;
+	my $rfam_exp_dir=$dir."rfam_match";
+	&rfam();
+	#my $rfam_exp_dir=&search($dir,"rfam_match_");
+print PA "$rfam_exp_dir\n";
+
+	my $tag=join "\\;" ,@mark;
+	system("perl $scipt_path/count_rfam_express.pl -i $rfam_exp_dir/rfam_mapped.bwt -tag $tag -o rfam_non-miRNA_annotation.txt");
+}
+
+
+close PA;
+system("perl $scipt_path/html_preprocess.pl -i $pathfile -format $format -min $min_nt -max $max_nt -o $dir/preprocessResult.html");
+
+$time=Time();
+print "$time: Program end!!\n";
+
+############################## sub programs ###################################
+sub genome{
+	my ($file)=@_;
+	if(defined $opts{'idx'}){
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -r 1000 -v $mis -p $t -o $dir -index $opts{idx}") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n";
+	}else{
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -r 1000 -v $mis -p $t -o $dir") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n";
+	}
+}
+sub rfam{
+	if (defined $opts{'idx2'}) {
+		system("perl $scipt_path/rfam.pl -i $mapfa -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} ");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time\n";
+	}else{
+		system("perl $scipt_path/rfam.pl -i $mapfa -ref $opts{rfam} -v $mis_rfam -p $t -o $dir ");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time\n";
+	}
+}
+sub filterbylength{
+	my $tmpmark=join ",", @mark;
+	system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
+	system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt  -o $preprocess/length.html");
+#	print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n";
+
+}
+sub collapse{
+	my ($ins,$data)=@_;
+		my $str="";
+	for (my $i=0;$i<@{$ins};$i++) {
+		$str .="-i $$ins[$i] ";
+	}
+	system ("perl $scipt_path/collapseReads2Tags.pl $str -mark seq -o $data -format $format");
+#	print "\ncollapseReads2Tags.pl $str -mark seq -o $data -format $format\n";
+}
+
+sub clips{
+	my ($in,$out)=@_;
+	my $adapter=$preprocess.$out."_clips_adapter.fq";
+	if($format eq "fq" || $format eq "fastq"){
+		system("fastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter") ;
+#		print "\nfastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter\n";
+	}
+	if($format eq "fa" || $format eq "fasta"){
+		system("fastx_clipper -a $a -M $m -i $in -o $adapter") ;
+        #		print "\nfastx_clipper -a $a -M $m -i $in -o $adapter\n";
+	}
+	#my $clean=$preprocess.$out."_clean.fq";
+	#system("filterReadsByLength.pl -i $adapter -o $clean -min $min_nt -max $max_nt ");
+
+	return;
+}
+
+sub read_config{
+	open CON,"<$config";
+	while (my $aline=<CON>) {
+		chomp $aline;
+		my @tmp=split/\t/,$aline;
+		push @filein,$tmp[0];
+		push @mark,$tmp[1];
+		&check_rawdata($tmp[0]);
+	}
+	close CON;
+	if (@filein != @mark) {
+		#&printErr();
+		die "Maybe config file have some wrong!!!\n";
+	}
+}
+sub check_rawdata{
+	my ($fileforcheck)=@_;
+	if (!(-s $fileforcheck)) {
+		#&printErr();
+		die "Can not find $fileforcheck, or file is empty!!!\n";
+	}
+	if ($format eq "fasta" || $format eq "fa") {
+		&checkfa($fileforcheck);
+	}
+	if ($format eq "fastq" || $format eq "fq") {
+		&checkfq($fileforcheck);
+	}
+}
+sub checkfa{
+	my ($file_reads)=@_;
+	open N,"<$file_reads";
+	my $line=<N>;
+	chomp $line;
+    if($line !~ /^>\S+/){
+        #printErr();
+        die "The first line of file $file_reads does not start with '>identifier'
+Reads file $file_reads is not a valid fasta file\n\n";
+    }
+    if(<N> !~ /^[ACGTNacgtn]*$/){
+        #printErr();
+        die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+    }
+	close N;
+}
+sub checkfq{
+	my ($file_reads)=@_;
+
+	open N,"<$file_reads";
+	for (my $i=0;$i<10;$i++) {
+		my $a=<N>;
+		my $b=<N>;
+		my $c=<N>;
+		my $d=<N>;
+		chomp $a;
+		chomp $b;
+		chomp $c;
+		chomp $d;
+		if($a!~/^\@/){
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if($b!~ /^[ACGTNacgtn]*$/){
+			#&printErr();
+			die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+		}
+		if ($c!~/^\@/ && $c!~/^\+/) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if ((length $b) != (length $d)) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		my @qv=split //,$d;
+		for (my $j=0;$j<@qv ;$j++) {
+			my $q=ord($qv[$j])-64;
+			if($q<0){$phred_qv=33;}
+		}
+	}
+	close N;
+}
+
+sub search{
+	my ($dir,$str)=@_;
+	opendir I,$dir;
+	my @ret;
+	while (my $file=readdir I) {
+		if ($file=~/$str/) {
+			push @ret, $file;
+		}
+	}
+	closedir I;
+	if (@ret != 1) {
+		#&printErr();
+
+		die "Can not find directory or file which name has string: $str !!!\n";
+	}
+	return $ret[0];
+}
+
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day $hour:$min:$sec");
+}
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -format -gfa -index -rfam -a -M -min -max -mis -v -t  -o  -path
+options:
+-i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
+-tag string # raw data file names, -tag xxx -tag xxx
+
+-format string,#specific input rawdata file format : fastq|fq|fasta|fa
+-phred  int # phred quality number, default is 64
+
+-path scirpt path
+
+-gfa string,  input file # genome fasta. sequence file
+-idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-rfam string,  input file# rfam database file, microRNAs must not be contained in this file## if not define, rfam small RNA will not be count.
+-idx2 string,  rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-a string,  ADAPTER string. default is ATCTCGTATG.
+-M int,    require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't clip it. 
+-min int,  reads min length,default is 19.
+-max int,  reads max length,default is 28.
+
+-mis [int]     number of allowed mismatches when mapping reads to genome, default 0
+-v <int>     report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment
+
+-t int,    number of threads [1]
+
+-o output directory# absolute path
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/precursors.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,858 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2013/7/19
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+#use RNA;
+
+my %opts;
+GetOptions(\%opts,"map=s","g=s","d:i","f:i","o=s","e:f","s=s","h");
+if (!(defined $opts{map} and defined $opts{g} and defined $opts{o} and defined $opts{s} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $checkno=1;
+my $filein=$opts{'map'};
+my $faout=$opts{'o'};
+my $strout=$opts{'s'};
+my $genome= $opts{'g'};
+
+my $maxd=defined $opts{'d'} ? $opts{'d'} : 200;
+my $flank=defined $opts{'f'}? $opts{'f'} : 10;
+
+my $MAX_ENERGY=-18;
+if (defined $opts{'e'}) {$MAX_ENERGY=$opts{'e'};}
+my $MAX_UNPAIR=5;
+my $MIN_PAIR=15;
+my $MAX_SIZEDIFF=4;
+my $MAX_BULGE=2;
+my $ASYMMETRY=5;
+my $MIN_UNPAIR=0;
+my $MIN_SPACE=5;
+my $MAX_SPACE=$maxd;
+my $FLANK=$flank;
+
+######### load in genome sequences start ########
+my %genome;
+my %lng;
+my $name;
+open IN,"<$genome";
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\#/);
+	if ($aline=~/^>(\S+)/) {
+		$name=$1;
+		next;
+	}
+	$genome{$name} .=$aline;
+}
+close IN;
+foreach my $key (keys %genome) {
+	$lng{$key}=length($genome{$key});
+}
+####### load in genome sequences end ##########
+
+my %breaks; ### reads number bigger than 3
+open IN,"<$filein"; #input file  
+while (my $aline=<IN>) {
+	chomp $aline;
+	my @tmp=split/\t/,$aline;
+	$tmp[0]=~/_x(\d+)$/;
+	my $no=$1;
+	next if($no<3);
+	#my $trand=&find_strand($tmp[9]);
+	#my @pos=split/\.\./,$tmp[5];
+	my $end=$tmp[3]+length($tmp[4])-1;
+	if($tmp[1] eq "-"){$tmp[4]=revcom($tmp[4]);}
+	push @{$breaks{$tmp[2]}{$tmp[1]}},[$tmp[3],$end,$no,$tmp[4]]; ### 0 base
+}
+close IN;
+
+my %cites; ### peaks
+foreach my $chr (keys %breaks) {
+	foreach my $strand (keys %{$breaks{$chr}}) {
+		my @array=@{$breaks{$chr}{$strand}};
+		@array=sort{$a->[0]<=>$b->[0]} @array;
+		for (my $i=0;$i<@array;$i++) {
+			my $start=$array[$i][0];my $end=$array[$i][1];
+			my @subarray=();
+			push @subarray,$array[$i];
+
+			for (my $j=$i+1;$j<@array;$j++) {
+				if ($start<$array[$j][1] && $end>$array[$j][0]) {   ###overlap
+					push @subarray,$array[$j];
+					($start,$end)=&newpos($start,$end,$array[$j][0],$array[$j][1]);
+				}
+				else{
+					$i=$j-1;
+					&find_cites(\@subarray,$chr,$strand);
+					last;
+				}
+			}
+		}
+	}
+}
+
+my %cluster;
+foreach my $chr (keys %cites) {
+	foreach my $strand (keys %{$cites{$chr}}) {
+		my @array=@{$sites{$chr}{$strand}};
+		@array=sort{$a->[0]<=>$b->[0]} @array;
+		for (my $i=0;$i<@array;$i++) {
+			my $start=$array[$i][0];my $end=$array[$i][1];
+			my @subarray=();
+			push @subarray,$array[$i];
+
+			for (my $j=$i+1;$j<@array;$j++) {
+				if ($end>$array[$j][0]-$maxd) {   ###distance less than 200bp
+					push @subarray,$array[$j];
+					($start,$end)=&newpos($start,$end,$array[$j][0],$array[$j][1]);
+				}
+				else{
+					@{$cluster{$chr}{$strand}{$i}}=@subarray;
+					$i=$j-1;
+					last;
+				}
+			}
+		}
+
+	}
+}
+
+
+open FA,">$faout"; #output file 
+open STR,">$strout";
+foreach my $chr (keys %cluster) {
+	foreach my $strand (keys %{$cluster{$chr}}) {
+		foreach my $no (keys %{$cluster{$chr}{$strand}}) {
+			my @array2=@{$cluster{$chr}{$strand}{$no}};
+			@array2=sort{$a->[0]<=>$b->[0]} @array2;
+			&excise(\@array2,$chr,$strand);
+		}
+	}
+}
+close FA;
+close STR;
+sub oneCiteDn{
+	my ($array,$a,$chr,$strand)=@_;
+
+	my $ss=$$array[$a][0]-$flank;
+	$ss=0 if($ss<0);
+	my $ee=$$array[$a][1]+$maxd+$flank;
+	$ee=$lng{$chr} if($ee>$lng{$chr});
+	
+	my $seq=substr($genome{$chr},$ss,$ee-$ss+1);
+	if($strand eq "-"){$seq=revcom($seq);}
+
+	my $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee);
+	return $val;
+}
+sub oneCiteUp{
+	my ($array,$a,$chr,$strand)=@_;
+
+	my $ss=$$array[$a][0]-$maxd-$flank;
+	$ss=0 if($ss<0);
+	my $ee=$$array[$a][1]+$flank;
+	$ee=$lng{$chr} if($ee>$lng{$chr});
+	
+	my $seq=substr($genome{$chr},$ss,$ee-$ss+1);
+	if($strand eq "-"){$seq=revcom($seq);}
+
+	my $val=&ffw1($seq,$$array[$a][3],$chr,$strand,$ss,$ee);
+	return $val;
+
+}
+
+sub twoCites{
+	my ($array,$a,$b,$chr,$strand)=@_;
+
+	my $ss=$$array[$a][0]-$flank;
+	$ss=0 if($ss<0);
+	my $ee=$$array[$b][1]+$flank;
+	$ee=$lng{$chr} if($ee>$lng{$chr});
+	
+	my $seq=substr($genome{$chr},$ss,$ee-$ss+1);
+	if($strand eq "-"){$seq=revcom($seq);}
+
+#	my( $str,$mfe)=RNA::fold($seq);
+#	return 0 if($mfe>$MAX_ENERGY); ### minimum mfe
+	my $val=&ffw2($seq,$$array[$a][3],$$array[$b][3],$chr,$strand,$ss,$ee);
+	
+	return $val;
+
+}
+sub excise{
+	my ($cluster,$chr,$strand)=@_;
+
+	if(@{$cluster}==1){
+		$ok=&oneCiteDn($cluster,0,$chr,$strand);
+		$ok=&oneCiteUp($cluster,0,$chr,$strand);
+	}else{
+		my $peak_pos=0;
+
+		for (my $i=0;$i<@{$cluster};$i++) {
+			if($$cluster[$i][2]>$$cluster[$peak_pos][2]){$peak_pos=$i;}
+		}
+
+		my $ok=0; 
+		for (my $i=0;$i<@{$cluster};$i++) {
+			next if($i==$peak_pos);
+			if($i<$peak_pos){$ok=&twoCites($cluster,$i,$peak_pos,$chr,$strand);}
+			else{$ok=&twoCites($cluster,$peak_pos,$i,$chr,$strand);}
+			last if($ok);
+		}
+		if (!$ok) {
+			$ok=&oneCiteDn($cluster,$peak_pos,$chr,$strand);
+			$ok=&oneCiteUp($cluster,$peak_pos,$chr,$strand);
+		}
+
+	}
+}
+
+sub ffw2{
+	my ($seq,$tag1,$tag2,$chr,$strand,$ss,$ee)=@_;
+	
+	my $N_count=$seq=~tr/N//;  ## precursor sequence has not more than 5 Ns
+	if ($N_count > 5) {
+		return 0;
+	}
+
+	my $seq_length=length $seq;
+	# position tag1 and tag2
+	my $tag1_beg=index($seq,$tag1,0)+1;
+	if ($tag1_beg < 1) {
+		warn "[ffw2] coordinate error.\n";
+#		$fold->{reason}="coordinate error";
+		return 0;
+	}
+	my $tag2_beg=index($seq,$tag2,0)+1;
+	if ($tag2_beg < 1) {
+		warn "[ffw2] coordinate error.\n";
+#		$fold->{reason}="coordinate error";
+		return 0;
+	}
+	if ($tag2_beg < $tag1_beg) {
+		# swap tag1 and tag2
+		($tag1,$tag2)=($tag2,$tag1);
+		($tag1_beg,$tag2_beg)=($tag2_beg,$tag1_beg);
+	}
+	my $tag1_end=$tag1_beg+length($tag1)-1;
+	my $tag2_end=$tag2_beg+length($tag2)-1;
+	# re-clipping
+	my $beg=$tag1_beg-$FLANK; $beg=1 if $beg < 1;
+	my $end=$tag2_end+$FLANK; $end=$seq_length if $end > $seq_length;
+	$seq=substr($seq,$beg-1,$end-$beg+1);
+	$seq_length=length $seq;
+	# re-reposition
+	$tag1_beg=index($seq,$tag1,0)+1;
+	if ($tag1_beg < 1) {
+		warn "[ffw2] coordinate error.\n";
+#		$fold->{reason}="coordinate error";
+		return 0;
+	}
+	
+	$tag2_beg=index($seq,$tag2,0)+1;
+	if ($tag2_beg < 1) {
+		warn "[ffw2] coordinate error.\n";
+#		$fold->{reason}="coordinate error";
+		return 0;
+	}
+	$tag1_end=$tag1_beg+length($tag1)-1;
+	$tag2_end=$tag2_beg+length($tag2)-1;
+
+	# fold
+	#my ($struct,$mfe)=RNA::fold($seq);
+		my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
+		my @rawfolds=split/\s+/,$rnafold;
+		my $struct=$rawfolds[1];
+		my $mfe=$rawfolds[-1];
+		$mfe=~s/\(//;
+		$mfe=~s/\)//;
+	#$mfe=sprintf "%.2f", $mfe;
+	if ($mfe > $MAX_ENERGY) {return 0;}
+
+	# tag1
+	my $tag1_length=$tag1_end-$tag1_beg+1;
+	my $tag1_struct=substr($struct,$tag1_beg-1,$tag1_length);
+	my $tag1_arm=which_arm($tag1_struct);
+	my $tag1_unpair=$tag1_struct=~tr/.//;
+	my $tag1_pair=$tag1_length-$tag1_unpair;
+	my $tag1_max_bulge=biggest_bulge($tag1_struct);
+	if ($tag1_arm ne "5p") { return 0;} # tag not in stem
+#	if ($tag1_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag1_unpair ($MAX_UNPAIR)"; return $pass}
+	if ($tag1_pair < $MIN_PAIR) {return 0;}
+	if ($tag1_max_bulge > $MAX_BULGE) {return 0;}
+
+	# tag2
+	my $tag2_length=$tag2_end-$tag2_beg+1;
+	my $tag2_struct=substr($struct,$tag2_beg-1,$tag2_length);
+	my $tag2_arm=which_arm($tag2_struct);
+	my $tag2_unpair=$tag2_struct=~tr/.//;
+	my $tag2_pair=$tag2_length-$tag2_unpair;
+	my $tag2_max_bulge=biggest_bulge($tag2_struct);
+	if ($tag2_arm ne "3p") {return 0;} # star not in stem
+#	if ($tag2_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag2_unpair ($MAX_UNPAIR)"; return $pass}
+	if ($tag2_pair < $MIN_PAIR) {return 0;}
+	if ($tag2_max_bulge > $MAX_BULGE) {return 0;}
+
+	# space size between miR and miR*
+	my $space=$tag2_beg-$tag1_end-1;
+	if ($space < $MIN_SPACE) {return 0;}
+	if ($space > $MAX_SPACE) {return 0;}
+	
+	# size diff of miR and miR*
+	my $size_diff=abs($tag1_length-$tag2_length);
+	if ($size_diff > $MAX_SIZEDIFF) {return 0;}
+
+	# build base pairing table
+	my %pairtable;
+	&parse_struct($struct,\%pairtable); # coords count from 1
+
+	my $asy1=get_asy(\%pairtable,$tag1_beg,$tag1_end);
+	my $asy2=get_asy(\%pairtable,$tag2_beg,$tag2_end);
+	my $asy=($asy1 < $asy2) ? $asy1 : $asy2;
+	if ($asy > $ASYMMETRY) {return 0}
+
+	# duplex fold, determine whether two matures like a miR/miR* ike duplex
+	my ($like_mir_duplex1,$duplex_pair,$overhang1,$overhang2)=likeMirDuplex1($tag1,$tag2);
+	# parse hairpin, determine whether two matures form miR/miR* duplex in hairpin context
+	my ($like_mir_duplex2,$duplex_pair2,$overhang_b,$overhang_t)=likeMirDuplex2(\%pairtable,$tag1_beg,$tag1_end,$tag2_beg,$tag2_end);
+	if ($like_mir_duplex1==0 && $like_mir_duplex2==0) {
+		return 0;
+	}
+
+	print FA ">$chr:$strand:$ss..$ee\n$seq\n";
+	print STR ">$chr:$strand:$ss..$ee\n$seq\n$struct\t($mfe)\n";
+	
+	return 1;	
+}
+
+sub ffw1{
+	my ($seq,$tag,$chr,$strand,$ss,$ee)=@_;
+	my $pass=0;
+
+	my $N_count=$seq=~tr/N//;
+	if ($N_count > 5) {
+		return 0;
+	}
+
+	my $seq_length=length $seq;
+	my $tag_length=length $tag;
+
+	# position
+	my $tag_beg=index($seq,$tag,0)+1;
+	if ($tag_beg < 1) {
+		 warn "[ffw1] coordinate error.\n";
+		 return $pass;
+	}
+	my $tag_end=$tag_beg+length($tag)-1;
+	
+	
+	# define candidate precursor by hybrid short arm to long arm, not solid enough
+	my($beg,$end)=define_precursor($seq,$tag);
+	if (not defined $beg) {
+		return $pass;
+	}
+	if (not defined $end) {
+		return $pass;
+	}
+	$seq=substr($seq,$beg-1,$end-$beg+1);
+	$seq_length=length $seq;
+	
+	
+	# fold
+	#my ($struct,$mfe)=RNA::fold($seq);
+		my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
+		my @rawfolds=split/\s+/,$rnafold;
+		my $struct=$rawfolds[1];
+		my $mfe=$rawfolds[-1];
+		$mfe=~s/\(//;
+		$mfe=~s/\)//;
+	
+	if ($mfe > $MAX_ENERGY) {
+		$pass=0;
+		return $pass;
+	}
+
+	# reposition
+	$tag_beg=index($seq,$tag,0)+1;
+	if ($tag_beg < 1) {
+		 warn "[ffw1] coordinate error.\n";
+		 return 0;
+	}
+	$tag_end=$tag_beg+length($tag)-1;
+
+	my $tag_struct=substr($struct,$tag_beg-1,$tag_length);
+	my $tag_arm=which_arm($tag_struct);
+	my $tag_unpair=$tag_struct=~tr/.//;
+	my $tag_pair=$tag_length-$tag_unpair;
+	my $tag_max_bulge=biggest_bulge($tag_struct);
+	if ($tag_arm eq "-") { return $pass;}
+#	if ($tag_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$tag_unpair ($MAX_UNPAIR)"; return $pass}
+	if ($tag_pair < $MIN_PAIR) { return $pass;}
+	if ($tag_max_bulge > $MAX_BULGE) {return $pass;}
+
+	# build base pairing table
+	my %pairtable;
+	&parse_struct($struct,\%pairtable); # coords count from 1
+	
+	# get star
+	my ($star_beg,$star_end)=get_star(\%pairtable,$tag_beg,$tag_end);
+	my $star=substr($seq,$star_beg-1,$star_end-$star_beg+1);
+	my $star_length=$star_end-$star_beg+1;
+	my $star_struct=substr($struct,$star_beg-1,$star_end-$star_beg+1);
+	my $star_arm=which_arm($star_struct);
+	my $star_unpair=$star_struct=~tr/.//;
+	my $star_pair=$star_length-$star_unpair;
+	my $star_max_bulge=biggest_bulge($star_struct);
+	if ($star_arm eq "-") { return $pass;}
+#	if ($star_unpair > $MAX_UNPAIR) {$fold->{reason}="unpair=$star_unpair ($MAX_UNPAIR)"; return $pass}
+	if ($star_pair < $MIN_PAIR) {return $pass;}
+	if ($star_max_bulge > $MAX_BULGE) {return $pass;}
+
+	if ($tag_arm eq $star_arm) {return $pass;}
+	
+	# space size between miR and miR*
+	my $space;
+	if ($tag_beg < $star_beg) {
+		$space=$star_beg-$tag_end-1;
+	}
+	else {
+		$space=$tag_beg-$star_end-1;
+	}
+	if ($space < $MIN_SPACE) { return $pass;}
+	if ($space > $MAX_SPACE) { return $pass;}
+
+	# size diff
+	my $size_diff=abs($tag_length-$star_length);
+	if ($size_diff > $MAX_SIZEDIFF) { return $pass;}
+	
+	# asymmetry
+	my $asy=get_asy(\%pairtable,$tag_beg,$tag_end);
+	if ($asy > $ASYMMETRY) {return $pass;}
+	
+	$pass=1;
+	print FA ">$chr:$strand:$ss..$ee\n$seq\n";
+	print STR ">$chr:$strand:$ss..$ee\n$seq\n$struct\t($mfe)\n";
+	return $pass;
+
+}
+sub get_star {
+	my($table,$beg,$end)=@_;
+	
+	my ($s1,$e1,$s2,$e2); # s1 pair to s2, e1 pair to e2
+	foreach my $i ($beg..$end) {
+		if (defined $table->{$i}) {
+			my $j=$table->{$i};
+			$s1=$i;
+			$s2=$j;
+			last;
+		}
+	}
+	foreach my $i (reverse ($beg..$end)) {
+		if (defined $table->{$i}) {
+			my $j=$table->{$i};
+			$e1=$i;
+			$e2=$j;
+			last;
+		}
+	}
+#	print "$s1,$e1 $s2,$e2\n";
+	
+	# correct terminus
+	my $off1=$s1-$beg;
+	my $off2=$end-$e1;
+	$s2+=$off1;
+	$s2+=2; # 081009
+	$e2-=$off2; $e2=1 if $e2 < 1;
+	$e2+=2; $e2=1 if $e2 < 1; # 081009
+	($s2,$e2)=($e2,$s2) if ($s2 > $e2);
+	return ($s2,$e2);
+}
+
+sub define_precursor {
+	my $seq=shift;
+	my $tag=shift;
+
+	my $seq_length=length $seq;
+	my $tag_length=length $tag;
+	my $tag_beg=index($seq,$tag,0)+1;
+	my $tag_end=$tag_beg+$tag_length-1;
+
+	# split the candidate region into short arm and long arm
+	my $tag_arm;
+        my ($larm,$larm_beg,$larm_end);
+	my ($sarm,$sarm_beg,$sarm_end);
+        if ($tag_beg-1 < $seq_length-$tag_end) { # on 5' arm
+                $sarm=substr($seq,0,$tag_end);
+		$larm=substr($seq,$tag_end);
+		$sarm_beg=1;
+		$sarm_end=$tag_end;
+		$larm_beg=$tag_end+1;
+		$larm_end=$seq_length;
+		$tag_arm="5p";
+        }
+        else {
+            $larm=substr($seq,0,$tag_beg-1); # on 3' arm
+			$sarm=substr($seq,$tag_beg-1);
+			$larm_beg=1;
+			$larm_end=$tag_beg-1;
+			$sarm_beg=$tag_beg;
+			$sarm_end=$seq_length;
+			$tag_arm="3p";
+        }
+	
+#	print "$sarm_beg,$sarm_end $sarm\n";
+#	print "$larm_beg,$larm_end $larm\n";
+
+	# clipping short arm
+	if ($tag_arm eq "5p") {
+		$sarm_beg=$tag_beg-$flank; $sarm_beg=1 if $sarm_beg < 1;
+		$sarm=substr($seq,$sarm_beg-1,$sarm_end-$sarm_beg+1);
+	}
+	else {
+		$sarm_end=$tag_end+$flank; $sarm_end=$seq_length if $sarm_end > $seq_length;
+		$sarm=substr($seq,$sarm_beg-1,$sarm_end-$sarm_beg+1);
+	}
+#	print "$sarm_beg,$sarm_end $sarm\n";
+#	print "$larm_beg,$larm_end $larm\n";
+
+	# define the precursor by hybriding short arm to long arm
+=cut #modify in 2014-10-28
+	my $duplex=RNA::duplexfold($sarm,$larm);
+	my $struct=$duplex->{structure};
+	my $energy=sprintf "%.2f", $duplex->{energy};
+	my ($str1,$str2)=split(/&/,$struct);
+	my $pair=$str1=~tr/(//;
+#	print "pair=$pair\n";
+	my $beg1=$duplex->{i}+1-length($str1);
+	my $end1=$duplex->{i};
+	my $beg2=$duplex->{j};
+	my $end2=$duplex->{j}+length($str2)-1;
+=cut
+###### new codes begin
+	my $duplex=`perl -e 'print "$sarm\n$larm"' | RNAduplex`;
+	#(.(.(((.....(((.&))))))...).).   1,16  :   1,13  (-7.20)
+	my @tmpduplex=split/\s+/,$duplex;
+	my $struct=$tmpduplex[0];
+	$tmpduplex[-1]=~s/[(|)]//g;
+	my $energy=$tmpduplex[-1];
+	my ($str1,$str2)=split(/&/,$struct);
+	my $pair=$str1=~tr/(//;
+	my ($beg1,$end1)=split/,/,$tmpduplex[1];
+	my ($beg2,$end2)=split/,/,$tmpduplex[3];
+######## new codes end
+
+#	print "$beg1:$end1 $beg2:$end2\n";
+	# transform coordinates
+	$beg1=$beg1+$sarm_beg-1;
+	$end1=$end1+$sarm_beg-1;
+	$beg2=$beg2+$larm_beg-1;
+	$end2=$end2+$larm_beg-1;
+#	print "$beg1:$end1 $beg2:$end2\n";
+	
+	my $off5p=$beg1-$sarm_beg;
+	my $off3p=$sarm_end-$end1;
+	$beg2-=$off3p; $beg2=1 if $beg2 < 1;
+	$end2+=$off5p; $end2=$seq_length if $end2 > $seq_length;
+
+#	print "$beg1:$end1 $beg2:$end2\n";
+	
+	my $beg=$sarm_beg < $beg2 ? $sarm_beg : $beg2;
+	my $end=$sarm_end > $end2 ? $sarm_end : $end2;
+	
+	return if $pair < $MIN_PAIR;
+#	print "$beg,$end\n";
+	return ($beg,$end);
+}
+
+
+# duplex fold, judge whether two short seqs like a miRNA/miRNA* duplex
+sub likeMirDuplex1 {
+	my $seq1=shift;
+	my $seq2=shift;
+	my $like_mir_duplex=1;
+	
+	my $length1=length $seq1;
+	my $length2=length $seq2;
+=cut
+	my $duplex=RNA::duplexfold($seq1, $seq2);
+	my $duplex_struct=$duplex->{structure};
+	my $duplex_energy=sprintf "%.2f", $duplex->{energy};
+	my ($str1,$str2)=split(/&/,$duplex_struct);
+	my $beg1=$duplex->{i}+1-length($str1);
+	my $end1=$duplex->{i};
+	my $beg2=$duplex->{j};
+	my $end2=$duplex->{j}+length($str2)-1;
+=cut
+	my $duplex=`perl -e 'print "$seq1\n$seq2"' | RNAduplex`;
+	#(.(.(((.....(((.&))))))...).).   1,16  :   1,13  (-7.20)
+	my @tmpduplex=split/\s+/,$duplex;
+	my $duplex_struct=$tmpduplex[0];
+	$tmpduplex[-1]=~s/[(|)]//g;
+	my $duplex_energy=$tmpduplex[-1];
+	my ($str1,$str2)=split(/&/,$duplex_struct);
+	#my $pair=$str1=~tr/(//;
+	my ($beg1,$end1)=split/,/,$tmpduplex[1];
+	my ($beg2,$end2)=split/,/,$tmpduplex[3];
+
+	# revise beg1, end1, beg2, end2
+	$str1=~/^(\.*)/;
+	$beg1+=length($1);
+	$str1=~/(\.*)$/;
+	$end1-=length($1);
+	$str2=~/^(\.*)/;
+	$beg2+=length($1);
+	$str2=~/(\.*)$/;
+	$end2-=length($1);
+
+	my $pair_num=$str1=~tr/(//;
+	my $overhang1=($length2-$end2)-($beg1-1); # 3' overhang at hairpin bottom
+	my $overhang2=($length1-$end1)-($beg2-1); # 3' overhang at hairpin neck
+#	print $pair_num,"\n";
+#	print $overhang1,"\n";
+#	print $overhang2,"\n";
+	if ($pair_num < 13) {
+		$like_mir_duplex=0;
+	}
+	if ($overhang1 < 0 || $overhang2 < 0 ) {
+		$like_mir_duplex=0;
+	}
+	if ($overhang1 > 4 || $overhang2 > 4) {
+		$like_mir_duplex=0;
+	}
+	return ($like_mir_duplex,$pair_num,$overhang1,$overhang2);
+}
+
+# judge whether two matures form miR/miR* duplex, in hairpin context
+sub likeMirDuplex2 {
+	my ($table,$beg1,$end1,$beg2,$end2)=@_;
+	my $like_mir_duplex=1;
+
+#	   s1         e1
+#   5 ----------------------------3	
+#      | | |||| |||               |
+#3 -------------------------------5
+#      e2         s2
+
+	my $pair_num=0;
+	my $overhang1=0;
+	my $overhang2=0;
+	my ($s1,$e1,$s2,$e2);
+	foreach my $i ($beg1..$end1) {
+		if (defined $table->{$i}) {
+			my $j=$table->{$i};
+			if ($j <= $end2 && $j >= $beg2) {
+				$s1=$i;
+				$e2=$j;
+				last;
+			}
+		}
+	}
+	foreach my $i (reverse ($beg1..$end1)) {
+		if (defined $table->{$i}) {
+			my $j=$table->{$i};
+			if ($j <= $end2 && $j >= $beg2) {
+				$e1=$i;
+				$s2=$j;
+				last;
+			}
+		}
+	}
+
+#	print "$beg1,$end1 $s1,$e1\n";
+#	print "$beg2,$end2 $s2,$e2\n";
+
+	foreach my $i ($beg1..$end1) {
+		if (defined $table->{$i}) {
+			my $j=$table->{$i};
+			if ($j <= $end2 && $j >= $beg2) {
+				++$pair_num;
+			}
+		}
+	}
+	if (defined $s1 && defined $e2) {
+		$overhang1=($end2-$e2)-($s1-$beg1);
+	}
+	if (defined $e1 && defined $s2) {
+		$overhang2=($end1-$e1)-($s2-$beg2);
+	}
+	
+	if ($pair_num < 13) {
+		$like_mir_duplex=0;
+	}
+	if ($overhang1 < 0 && $overhang2 < 0) {
+		$like_mir_duplex=0;
+	}
+	return ($like_mir_duplex,$pair_num,$overhang1,$overhang2);
+}
+sub parse_struct {
+	my $struct=shift;
+	my $table=shift;
+
+	my @t=split('',$struct);
+	my @lbs; # left brackets
+	foreach my $k (0..$#t) {
+		if ($t[$k] eq "(") {
+			push @lbs, $k+1;
+		}
+		elsif ($t[$k] eq ")") {
+			my $lb=pop @lbs;
+			my $rb=$k+1;
+			$table->{$lb}=$rb;
+			$table->{$rb}=$lb;
+		}
+	}
+	if (@lbs) {
+		warn "unbalanced RNA struct.\n";
+	}
+}
+sub which_arm {
+	my $substruct=shift;
+	my $arm;
+	if ($substruct=~/\(/ && $substruct=~/\)/) {
+		$arm="-";
+	}
+	elsif ($substruct=~/\(/) {
+		$arm="5p";
+	}
+	else {
+		$arm="3p";
+	}
+	return $arm;
+}
+sub biggest_bulge {
+	my $struct=shift;
+	my $bulge_size=0;
+	my $max_bulge=0;
+	while ($struct=~/(\.+)/g) {
+		$bulge_size=length $1;
+		if ($bulge_size > $max_bulge) {
+			$max_bulge=$bulge_size;
+		}
+	}
+	return $max_bulge;
+}
+sub get_asy {
+	my($table,$a1,$a2)=@_;
+	my ($pre_i,$pre_j);
+	my $asymmetry=0;
+	foreach my $i ($a1..$a2) {
+		if (defined $table->{$i}) {
+			my $j=$table->{$i};
+			if (defined $pre_i && defined $pre_j) {
+				my $diff=($i-$pre_i)+($j-$pre_j);
+				$asymmetry += abs($diff);
+			}
+			$pre_i=$i;
+			$pre_j=$j;
+		}
+	}
+	return $asymmetry;
+}
+
+sub peaks{
+	my @cluster=@{$_[0]};
+
+	return if(@cluster<1);
+
+	my $max=0; my $index=-1;
+	for (my $i=0;$i<@cluster;$i++) {
+		if($cluster[$i][2]>$max){
+			$max=$cluster[$i][2];
+			$index=$i;
+		}
+	}
+#	&excise(\@cluster,$index,$_[1],$_[2]);
+	return($index);
+}
+
+sub find_cites{
+	my @tmp=@{$_[0]};
+	my $i=&peaks(\@tmp);
+	
+	my $start=$tmp[$i][0];
+	my $total=0; my $node5=0;
+	for (my $j=0;$j<@tmp ;$j++) {
+		$total+=$tmp[$j][2];
+		$node5 +=$tmp[$j][2] if($tmp[$j][0]-$start<=2 && $tmp[$j][0]-$start>=-2);
+	}
+	push @{$cites{$_[1]}{$_[2]}},$tmp[$i] if($node5/$total>0.80 && $tmp[$i][2]/$node5>0.5);
+}
+
+sub newpos{
+	my ($a,$b,$c,$d)=@_;
+	my $s= $a>$c ? $c : $a;
+	my $e=$b>$d ? $b : $d;
+	return($s,$e);
+}
+
+sub rev{
+
+    my($sequence)=@_;
+
+    my $rev=reverse $sequence;   
+
+    return $rev;
+}
+
+sub com{
+
+    my($sequence)=@_;
+
+    $sequence=~tr/acgtuACGTU/TGCAATGCAA/;   
+ 
+    return $sequence;
+}
+
+sub revcom{
+
+    my($sequence)=@_;
+
+    my $revcom=rev(com($sequence));
+
+    return $revcom;
+}
+
+sub find_strand{
+
+    #A subroutine to find the strand, parsing different blast formats
+    my($other)=@_;
+
+    my $strand="+";
+
+    if($other=~/-/){
+	$strand="-";
+    }
+
+    if($other=~/minus/i){
+	$strand="-";
+    }
+
+    return($strand);
+}
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -map -g -d -f -o -s -e
+options:
+  -map input file# align result # bst. format
+  -g input file # genome sequence fasta format
+  -d <int>   Maximal space between miRNA and miRNA* (200)
+  -f <int>   Flank sequence length of miRNA precursor (10)
+  -o output file# percursor fasta file
+  -s output file# precursor structure file
+  -e <folat> Maximal free energy allowed for a miRNA precursor (-18 kcal/mol)
+
+  -h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/quantify.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,502 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2013/7/19
+#Modified:
+#Description: 
+my $version=1.00;
+
+use File::Path;
+use strict;
+use File::Basename;
+#use Getopt::Std;
+use Getopt::Long;
+#use RNA;
+
+my %opts;
+GetOptions(\%opts,"r=s","p=s","m=s","mis:i","t:i","e:i","f:i","tag:s","o=s","h");
+if (!(defined $opts{r} and defined $opts{p} and defined $opts{m} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $read=$opts{'r'};
+my $pre=$opts{'p'};
+my $mature=$opts{'m'};
+
+my $dir=$opts{'o'};
+unless ($dir=~/\/$/) {$dir .="/";}
+if (not -d $dir) {
+	mkdir $dir;
+}
+
+my $threads=defined $opts{'t'} ? $opts{'t'} : 1;
+my $mismatch=defined $opts{'mis'} ?  $opts{'mis'} : 0;
+
+my $upstream = 2;
+my $downstream = 5;
+
+$upstream = $opts{'e'} if(defined $opts{'e'});
+$downstream = $opts{'f'} if(defined $opts{'f'});
+
+my $marks=defined $opts{'tag'} ? $opts{'tag'} : "";
+
+my $time=Time();
+
+my $tmpdir="${dir}/known_miRNA_Express";
+if(not -d $tmpdir){
+	mkdir($tmpdir);
+}
+chdir $tmpdir;
+
+`cp $pre ./`;
+my $pre_file_name=basename($pre);
+
+&mapping(); # matures align to precursors && reads align to precursors;
+
+my %pre_mature; # $pre_mature{pre_id}{matre_ID}{"mature"}[0]->start; $pre_mature{pre_id}{matre_ID}{"mature"}[1]->end;
+&maturePosOnPre(); # acknowledge mature positions on precursor 
+
+my %pre_read;
+&readPosOnPre(); # acknowledge reads positions on precursors
+
+if(!(defined $opts{'tag'})){
+	foreach my $key (keys %pre_read) {
+		$pre_read{$key}[0][0]=~/:([\d|_]+)_x(\d+)$/;
+		my @ss=split/_/,$1;
+		for (my $i=1;$i<=@ss;$i++) {
+			$marks .="Smp$i;";
+		}
+		last;
+	}
+}
+
+my %pre;## read in precursor sequences #$pre{pre_id}="CGTA...."
+&attachPre();
+
+my $preno=scalar (keys %pre);
+print "Total Precursor Number is $preno !!!!\n";
+
+my %struc; #mature  star  loop; $struc{$key}{"struc"}=$str; $struc{$key}{"mfe"}=$mfe;
+&structure();
+
+
+##### analysis and print out  && moRs
+my $aln=$dir."known_microRNA_express.aln";
+my $list=$dir."known_microRNA_express.txt";
+my $moRs=$dir."known_microRNA_express.moRs";
+
+system("ln -s $mature $dir/known_microRNA_mature.fa ");
+system("ln -s $pre $dir/known_microRNA_precursor.fa ");
+
+open ALN,">$aln";
+open LIST,">$list";
+open MORS,">$moRs";
+
+$"="\t"; ##### @array print in \t
+
+my @marks=split/\;/,$marks;
+#print LIST "#matueID\tpreID\tpos1\tpos2\tmatureExp\tstarExp\ttotalExp\n";
+print LIST "#matueID\tpreID\tpos1\tpos2";
+for (my $i=0;$i<@marks;$i++) {
+	print LIST "\t",$marks[$i],"_matureExp";
+}
+for (my $i=0;$i<@marks;$i++) {
+	print LIST "\t",$marks[$i],"_starExp";
+}
+for (my $i=0;$i<@marks;$i++) {
+	print LIST "\t",$marks[$i],"_totalExp";
+}
+print LIST "\n";
+print ALN "#>precursor ID \n#precursor sequence\n#precursor structure (mfe)\n#RNA_seq\t@marks\ttotal\n";
+print MORS "#>precursor ID\tstrand\texpress_reads\texpress_reads\/total_reads\tblock_number\tprecursor_sequence\n#\tblock_start\tblock_end\t@marks\ttotal\ttag_number\tsequence\n";
+my %moRs;
+
+foreach my $key (keys %pre) {
+	print ALN ">$key\n$pre{$key}\n$struc{$key}{struc}  ($struc{$key}{mfe})\n";
+	next if(! (exists $pre_read{$key}));
+	my @array=@{$pre_read{$key}};
+	@array=sort{$a->[3]<=> $b->[3]} @array;
+	
+	my $length=length($pre{$key});
+
+	my $maxline=-1;my $max=0; ### storage the maxinum express read line
+	my $totalReadsNo=0; 
+	my @not_over=(); ### new read format better for moRs analysis
+
+####print out Aln file start
+	for (my $i=0;$i<@array;$i++) {
+		my $maps=$array[$i][3]+1;
+		my $mape=$array[$i][3]+length($array[$i][4]);
+		my $str="";
+		$str .= "." x ($maps-1);
+		$str .=$array[$i][4];
+		$str .="." x ($length-$mape);
+		$str .="        ";
+
+		$array[$i][0]=~/:([\d|_]+)_x(\d+)$/;
+		my @sample=split /\_/,$1;
+		my $total=$2;
+		print ALN $str,"@sample","\t",$total,"\n";
+
+		if($total>$max){$max=$total; $maxline=$i;}
+		$totalReadsNo+=$total;
+		
+		push @not_over,[$key,$maps,$mape,$array[$i][0],$total,"+"];
+	}
+####print out Aln file end
+
+#### express list start
+	my ($ms,$me,$ss,$se);
+	if (!(exists($pre_mature{$key}))) {
+		$ms=$array[$maxline][3]+1;
+		$me=$array[$maxline][3]+length($array[$maxline][4]);
+		($ss,$se)=&other_pair($ms,$me,$struc{$key}{'struc'});
+	
+		my ($mexp,$sexp,$texp)=&express($ms-$upstream,$me+$downstream,$ss-$upstream,$se+$downstream,\@array);
+		print LIST "$key\t$key\tmature:$ms..$me\tstar:$ss..$se\t@$mexp\t@$sexp\t@$texp\n";
+	}
+	else{
+		foreach my $maID (keys %{$pre_mature{$key}}) {
+			$ms=$pre_mature{$key}{$maID}{"mature"}[0];
+			$me=$pre_mature{$key}{$maID}{"mature"}[1];
+			$ss=$pre_mature{$key}{$maID}{"star"}[0];
+			$se=$pre_mature{$key}{$maID}{"star"}[1];
+			my ($mexp,$sexp,$texp)=&express($ms-$upstream,$me+$downstream,$ss-$upstream,$se+$downstream,\@array);
+			print LIST "$maID\t$key\tmature:$ms..$me\tstar:$ss..$se\t@$mexp\t@$sexp\t@$texp\n";
+		}
+	}
+#### express list end
+
+#### analysis moRs start
+	my @result; my @m_texp;my $m_texp=0; ### moRs informations
+
+	while (@not_over>0) {
+		my @over=@not_over;
+		@not_over=();
+
+#·á¶È×î¸ßtag
+		my $m_max=0;my $m_maxline=-1;my $m_start=0;my $m_end=0;my $m_exp=0;my @m_exp;my $m_no=1;
+		for (my $i=0;$i<@over;$i++) {
+			my @m_array=@{$over[$i]};
+			if ($m_max<$m_array[4]) {
+				$m_max=$m_array[4];
+				$m_maxline=$i;
+			}
+		}
+		$m_start=$over[$m_maxline][1];
+		$m_end=$over[$m_maxline][2];
+		$m_exp=$m_max;
+		$over[$m_maxline][3]=~/:([\d|_]+)_x(\d+)$/;
+		my @m_nums=split/_/,$1;
+		for (my $j=0;$j<@m_nums;$j++) {
+			$m_exp[$j]=$m_nums[$j];
+		}
+
+#ͳ¼ÆÒÔ·á¶È×î¸ßtagΪ×ø±êµÄreads, Á½¶ËλÖòîÒì²»³¬¹ý3nt
+		for (my $i=0;$i<@over;$i++) {
+			next if($i==$m_maxline);
+			my @m_array=@{$over[$i]};
+			if (abs($m_array[1]-$m_start)<=3 && abs($m_array[2]-$m_end)<=3) {
+				$m_exp+=$m_array[4];
+				$m_no++;
+				$m_array[3]=~/:([\d|_]+)_x(\d+)$/;
+				my @m_nums=split/_/,$1;
+				for (my $j=0;$j<@m_nums;$j++) {
+					$m_exp[$j] +=$m_nums[$j];
+				}
+			}
+			elsif($m_array[1]>=$m_end || $m_array[2]<=$m_start){push @not_over,[@{$over[$i]}];} #È¥³ý¿çÔ½blockµÄreads
+		}
+		if($m_exp>5){### 5¸öreads
+			$m_texp+=$m_exp;
+			for (my $j=0;$j<@m_exp;$j++) {
+				$m_texp[$j]+=$m_exp[$j];
+			}
+			my $string=&subseq($pre{$key},$m_start,$m_end,"+");
+			push @result,"\t$m_start\t$m_end\t@m_exp\t$m_exp\t$m_no\t$string" ;
+		}
+	}
+
+	my $str=scalar @result;
+	my $percent=sprintf("%.2f",$m_texp/$totalReadsNo);
+	$str=">$key\t+\t$m_texp\t$percent\t".$str."\t$pre{$key}";
+	@{$moRs{$str}}=@result;
+
+#### analysis moRs end
+}
+
+##### moRs print out start
+foreach my $key (keys %moRs) {
+	my @tmp=split/\t/,$key;
+	next if ($tmp[4]<=2);
+	next if($tmp[3]<0.95);
+	my @over;
+	for (my $i=0;$i<@{$moRs{$key}};$i++) {
+		my @arrayi=split/\t/,$moRs{$key}[$i];
+		for (my $j=0;$j<@{$moRs{$key}};$j++) {
+			next if($i==$j);
+			my @arrayj=split/\t/,$moRs{$key}[$j];
+			if ((($arrayj[1]-$arrayi[2]>=0 && $arrayj[1]-$arrayi[2] <=3) || ($arrayj[1]-$arrayi[2]>=18 && $arrayj[1]-$arrayi[2] <=25) )||(($arrayi[1]-$arrayj[2]>=0 && $arrayi[1]-$arrayj[2] <=3)||($arrayi[1]-$arrayj[2]>=18 && $arrayi[1]-$arrayj[2] <=25))) {
+				push @over,$moRs{$key}[$i];
+			}
+		}
+	}
+	if (@over>0) {
+		print MORS "$key\n";
+		foreach  (@{$moRs{$key}}) {
+			print MORS "$_\n";
+		}
+	}
+}
+###### moRs print out end
+close ALN;
+close LIST;
+close MORS;
+
+$"=" ";##### reset
+
+
+################### Sub programs #################
+sub express{
+	my ($ms,$me,$ss,$se,$read)=@_;
+	my (@mexp,@sexp,@texp);
+	$$read[0][0]=~/:([_|\d]+)_x(\d+)$/;
+	my @numsample=split/_/,$1;
+	for (my $i=0;$i<@numsample;$i++) {
+		$mexp[$i]=0;
+		$sexp[$i]=0;
+		$texp[$i]=0;
+	}
+
+	for (my $i=0;$i<@{$read};$i++) {
+		my $start=$$read[$i][3]+1;
+		my $end=$$read[$i][3]+length($$read[$i][4]);
+		$$read[$i][0]=~/:([_|\d]+)_x(\d+)$/;
+		my $expresses=$1;
+		my @nums=split/_/,$expresses;
+		
+		for (my $j=0;$j<@nums;$j++) {
+			$texp[$j]+=$nums[$j];
+		}
+		if ($start>=$ms && $end<=$me) {
+			for (my $j=0;$j<@nums;$j++) {
+				$mexp[$j]+=$nums[$j];
+			}
+		}
+		if ($start>=$ss && $end<=$se) {
+			for (my $j=0;$j<@nums;$j++) {
+				$sexp[$j]+=$nums[$j];
+			}
+		}
+	}
+	return(\@mexp,\@sexp,\@texp);
+}
+
+sub structure{
+	foreach my $key (keys %pre_mature) {
+		if (!(defined $pre{$key})){die "!!!!! No precursor sequence $key, please check it!\n";}
+		#my ($str,$mfe)=RNA::fold($pre{$key});
+		my $rnafold=`perl -e 'print "$pre{$key}"' | RNAfold --noPS`;
+		my @rnafolds=split/\s+/,$rnafold;
+		my $str=$rnafolds[1];
+		my $mfe=$rnafolds[-1];
+		$mfe=~s/\(//;
+		$mfe=~s/\)//;
+
+		$struc{$key}{"struc"}=$str;
+		#$struc{$key}{"mfe"}=sprintf ("%.2f",$mfe);
+		$struc{$key}{"mfe"}=$mfe;
+
+		foreach my $id (keys %{$pre_mature{$key}}) {
+			($pre_mature{$key}{$id}{"star"}[0],$pre_mature{$key}{$id}{"star"}[1])=&other_pair($pre_mature{$key}{$id}{"mature"}[0],$pre_mature{$key}{$id}{"mature"}[1],$str);
+		}
+=cut
+##### Nucleotide complementary 
+		my @tmp=split//,$str;
+		my %a2b;
+		my @bps;
+		for (my $i=0;$i<@tmp;$i++) {
+			if ($tmp[$i] eq "("){push @bps,$i+1 ; next;}
+			if ($tmp[$i] eq ")") {
+				my $up=pop @bps;
+				$a2b{$i+1}=$up;
+				$a2b{$up}=$i+1;
+			}
+		}
+
+##### search star position
+		foreach my $id (keys %{$pre_mature{$key}}) {
+			my $n=0;
+			for (my $i=$pre_mature{$key}{$id}{"mature"}[0];$i<=$pre_mature{$key}{$id}{"mature"}[1] ; $i++) {				
+				if (defined $a2b{$i}) {
+					my $a=$i; my $b=$a2b{$i};
+					if($a>$b){
+						$pre_mature{$key}{$id}{"star"}[0]=$b-$n+2;
+						$pre_mature{$key}{$id}{"star"}[1]=$b-$n+2+($pre_mature{$key}{$id}{"mature"}[1]-$pre_mature{$key}{$id}{"mature"}[0]);
+					}
+					if($a<$b{
+						$pre_mature{$key}{$id}{"star"}[1]=$b+$n+2;
+						$pre_mature{$key}{$id}{"star"}[0]=$b+$n+2-($pre_mature{$key}{$id}{"mature"}[1]-$pre_mature{$key}{$id}{"mature"}[0]);
+					}
+					last;
+				}
+				$n++;
+			}
+		}
+=cut
+	}
+}
+sub other_pair{
+	my ($start,$end,$structure)=@_;
+	##### Nucleotide complementary 
+	my @tmp=split//,$structure;
+	my %a2b;	my @bps;
+	for (my $i=0;$i<@tmp;$i++) {
+		if ($tmp[$i] eq "("){push @bps,$i+1 ; next;}
+		if ($tmp[$i] eq ")") {
+			my $up=pop @bps;
+			$a2b{$i+1}=$up;
+			$a2b{$up}=$i+1;
+		}
+	}
+##### search star position
+	my $n=0;my $startpos; my $endpos;
+	for (my $i=$start;$i<=$end ; $i++) {				
+		if (defined $a2b{$i}) {
+			my $a=$i; my $b=$a2b{$i};
+#			if($a>$b){
+#				$startpos=$b-$n+2;
+#				$endpos=$b-$n+2+($end-$start);
+#			}
+#			if($a<$b){
+				$endpos=$b+$n+2;
+				if($endpos>length($structure)){$endpos=length($structure);}
+				$startpos=$b+$n+2-($end-$start);
+				if($startpos<1){$startpos=1;}
+#			}
+			last;
+		}
+		$n++;
+	}
+	return ($startpos,$endpos);
+}
+sub attachPre{
+	open IN, "<$pre_file_name";
+	my $name;
+	while (my $aline=<IN>) {
+		chomp $aline;
+		if ($aline=~/^>(\S+)/) {
+			$name=$1;
+			next;
+		}
+		$pre{$name} .=$aline;
+	}
+	close IN;
+}
+sub readPosOnPre{
+	open IN,"<read_mapped.bwt";
+	while (my $aline=<IN>) {
+		chomp $aline;
+		my @tmp=split/\t/,$aline;
+		my $id=lc($tmp[2]);
+		push @{$pre_read{$tmp[2]}},[@tmp];
+	}
+	close IN;
+}
+sub maturePosOnPre{
+	open IN,"<mature_mapped.bwt";
+	while (my $aline=<IN>) {
+		chomp $aline;
+		my @tmp=split/\t/,$aline;
+		my $mm=$tmp[0];
+#		$mm=~s/\-3P|\-5P//i;
+		$mm=lc($mm);
+		my $pm=$tmp[2];
+		$pm=lc($pm);
+
+#		next if ($mm ne $pm);### stringent mapping let7a only allowed to map pre-let7a
+		next if($mm!~/$pm/);
+#		print "$tmp[2]\t$tmp[0]\n";
+#		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]-$upstream;
+#		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=0 if($pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]<0);
+#		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4])-1+$downstream;
+		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[0]=$tmp[3]+1;
+		$pre_mature{$tmp[2]}{$tmp[0]}{"mature"}[1]=$tmp[3]+length($tmp[4]);
+	}
+	close IN;
+}
+sub mapping{
+    my $err;
+## build bowtie index
+    #print STDERR "building bowtie index\n";
+    $err = `bowtie-build $pre_file_name miRNA_precursor`;
+
+## map mature sequences against precursors
+    #print STDERR "mapping mature sequences against index\n";
+	$err = `bowtie -p $threads -f -v 0 -a --best --strata --norc miRNA_precursor $mature > mature_mapped.bwt 2> run.log`;
+
+## map reads against precursors
+    #print STDERR "mapping read sequences against index\n";
+    $err=`bowtie -p $threads -f -v $mismatch -a --best --strata --norc miRNA_precursor $read --al mirbase_mapped.fa --un mirbase_not_mapped.fa > read_mapped.bwt 2> run.log`;
+
+}
+
+sub subseq{
+	my $seq=shift;
+	my $beg=shift;
+	my $end=shift;
+	my $strand=shift;
+	
+	my $subseq=substr($seq,$beg-1,$end-$beg+1);
+	if ($strand eq "-") {
+		$subseq=revcom($subseq);
+	}
+	return uc $subseq;
+}
+
+sub revcom{
+	my $seq=shift;
+	$seq=~tr/ATCGatcg/TAGCtagc/;
+	$seq=reverse $seq;
+	return uc $seq;
+}
+
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day-$hour-$min-$sec");
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -r -p -m -mis -t -e -f -tag -o -time
+mandatory parameters:
+-p precursor.fa  miRNA precursor sequences from miRBase # must be absolute path
+-m mature.fa     miRNA sequences from miRBase # must be absolute path
+-r reads.fa      your read sequences #must be absolute path
+
+-o output directory
+
+options:
+-mis [int]     number of allowed mismatches when mapping reads to precursors, default 0
+-t [int]     threads number,default 1
+-e [int]     number of nucleotides upstream of the mature sequence to consider, default 2
+-f [int]     number of nucleotides downstream of the mature sequence to consider, default 5
+-tag [string] sample marks# eg. sampleA;sampleB;sampleC
+-time sting #make directory time,default is the local time
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/quantify_siRNA.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,64 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: chentt
+#Email:
+#Date: 2012-4-6
+#Modified:
+#Description:
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","o=s","d=s","h");
+if (!(defined $opts{i} and defined $opts{d} and defined $opts{o}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $input=$opts{'i'};
+my $output=$opts{'o'};
+my $depth=$opts{'d'};
+
+open (IN,"<$input")||die"$!";
+open OUT,">$output";
+#my @Total=qw(15797079 18042650 17455254 17295526 18791753 16719596 15150009 18451484 17402501 17729362 19347595 17518516 15699663 16589265 15442892 14012264 14190746 17280260 13213117 12390121 14874304 );
+my @Total=split/\,/,$depth;
+#print OUT "#clusterID\tmajor_length\tpercent\n";
+while (my $aline=<IN>) {
+	chomp $aline;
+	if ($aline=~/^\"/){
+		my @title=split/\t/,$aline;
+		for (my $i=0;$i<@title ;$i++) {
+			$title[$i]=~s/^\"(\S+)\"$/$1/;
+		}
+		my $title=join "\t",@title;
+		print OUT "\#$title\n";
+		next;
+	}
+	my @temp=split/\t/,$aline;
+	print OUT "$temp[0]\t$temp[1]\t$temp[2]";
+	my @id=split/:/,$temp[0];
+	my @posi=split/-/,$id[1];
+	for (my $i=3;$i<@temp;$i++) {
+		my $rpkm=sprintf("%.2f",$temp[$i]/($posi[1]-$posi[0]+1)/$Total[$i-3]*1000000000);
+		print OUT "\t$rpkm";
+	}
+	print OUT "\n";
+}
+close IN;
+close OUT;
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o -h
+options:
+-i input cluster file
+-o output file
+-d depth
+-h help
+USAGE
+exit(1);
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rfam.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,100 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2013/7/19
+#Modified:
+#Description: 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","h");
+if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $dir=$opts{'o'};
+unless ($dir=~/\/$/) {$dir.="/";}
+my $rfam=$opts{'ref'};
+my $mis=defined $opts{'v'}? $opts{'v'} : 0;
+my $index=defined $opts{'index'} ? $opts{'index'} : "";
+my $threads=defined $opts{'p'} ? $opts{'p'} : 1;
+
+if (not -d $dir) {
+	mkdir $dir;
+}
+
+
+my $time=Time();
+
+my $mapdir=$dir."/rfam_match";
+if(not -d $mapdir){
+	mkdir $mapdir;
+}
+chdir $mapdir;
+###check genome index
+if (-s $index.".1.ebwt") {
+}else{
+	&checkACGT($rfam);
+	`bowtie-build $rfam $rfam`;
+	$index="$rfam";
+}
+### genome mapping
+`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt 2> run.log`;
+
+sub checkACGT{
+	my $string;
+	open IN,"<$rfam";
+	while (my $aline=<IN>) {
+		if ($aline!~/^>/) {
+			$aline=~s/U/T/gi;
+		}
+		$string .=$aline;	
+	}
+	close IN;
+	$rfam=basename($rfam);
+	open OUT, ">$rfam";
+	print OUT $string;
+	close OUT;
+}
+
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day-$hour-$min-$sec");
+}
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o
+options:
+-i input file# input reads fasta/fastq file
+-ref input file# rfam file, which do not contain miRNAs
+-index file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+-v <int>           report end-to-end hits w/ <=v mismatches; ignore qualities,default 0;
+
+-p/--threads <int> number of alignment threads to launch (default: 1)
+
+-o output directory 
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sRNA_plot.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,411 @@
+#!/usr/bin/perl -w
+#==========================================================================================
+# Date: 
+# Title: 
+# Comment: Program to plot gene structure
+# Input: 1. 
+#        2. 
+#        3. 
+# Output: output file of gene structure graph by html or svg formt
+# Test Usage: 
+#========================================================================================
+#use strict;
+my $version=1.00;
+use SVG;
+use Getopt::Long;
+my %opt;
+GetOptions(\%opt,"g=s","l=s","span=s","c=s","o=s","out=s","cen:s","mark=s","h");
+if (!( defined $opt{o}) || defined $opt{h}) {
+&usage;
+}
+my $span=$opt{span};
+#my $sample_cloumn=$opt{n};
+my $mark=$opt{mark};
+my @mark=split/\#/,$mark;
+my $genelist=$opt{g};
+#===============================Define Attribute==========================================
+my %attribute=(
+	canvas=>{
+		'width'=>1500,
+		'height'=>1800
+	},
+	text=>{
+		'stroke'=>"#000000",
+		'fill'=>"none",
+		'stroke-width'=>0.5
+	},
+	line=>{
+		'stroke'=>"black",
+		'stroke-width'=>1
+	},
+	csv=>{
+		'stroke'=>"red",
+		'stroke-width'=>0.5
+	},
+	exon=>{
+		'stroke'=>"black",
+		'stroke-width'=>1
+	},
+	intron=>{
+		'stroke'=>"black",
+		'stroke-width'=>1.5
+	},
+	font=>{
+		'fill'=>"#000000",
+		'font-size'=>12,
+		'font-size2'=>10,
+		#'font-weight'=>'bold',
+		'font-family'=>"Arial"
+		#'font-family'=>"ArialNarrow-bold"
+	},
+	rect=>{
+		'fill'=>"lightgreen",
+		'stroke'=>"black",
+		'stroke-width'=>0.5
+	},
+	readwidth=>0.5
+);
+#############################s#define start coordinate and scale
+open(TXT,">$opt{out}");
+open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}";
+my %length;
+while (my $aline=<LENGTH>) {
+	chomp $aline;
+	next if($aline=~/^\#/);
+	my @temp=split/\t/,$aline;
+	$temp[0]=~s/^c/C/;
+	$length{$temp[0]}=$temp[1];
+}
+close LENGTH;
+#---------------------------------------------------------------
+open(GENE,"$opt{g}")||die"cannot open the file $opt{g}";
+my %genelist;
+while (my $aline=<GENE>) {
+	chomp $aline;#LOC_Os01g01280  Chr1    133291  134685  +
+	next if($aline=~/^\#/);
+	my @temp=split/\t/,$aline;
+	if ($temp[1]=~/^Chr(\d)$/) {
+		$temp[1]="Chr0$1";
+	}
+	push @{$genelist{$temp[1]}},[$temp[0],$temp[2],$temp[3]];
+
+}
+close GENE;
+#my %have_gene;
+#foreach my $chr (sort keys %genelist) {
+#	my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
+#	my $start=$genelist[0][1];
+#	my $end=$genelist[0][2];
+#	for (my $i=0;$i<@genelist ;$i++) {
+#		if ($gene) {
+#		}
+#	}
+#}
+
+my %gene_desity;
+foreach my $chr (sort keys %genelist) {
+	my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}};
+	for (my $i=0;$i<@genelist ;$i++) {
+		my $start=int($genelist[$i][1]/$span);
+		my $end=int($genelist[$i][2]/$span);
+		#my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]};
+		if ($start==$end) {
+			$gene_desity{$chr}[$start]++;
+		}
+		else{
+			for (my $k=$start;$k<=$end ;$k++) {
+				$gene_desity{$chr}[$k]++;
+			}
+		}
+	}
+}
+#------------------------------------------region_gene_number-------------------------
+my $max_gene_number=0;
+my $total=0;
+foreach my $chr (sort keys %genelist) {
+	for (my $i=0;$i<@{$gene_desity{$chr}} ;$i++) {
+		if (!(defined($gene_desity{$chr}[$i]))) {
+			$gene_desity{$chr}[$i]=0;
+		}
+		if ($gene_desity{$chr}[$i]>$max_gene_number) {
+			$max_gene_number=$gene_desity{$chr}[$i];
+			#print "$gene_desity{$chr}[$i]\n";
+		}
+		#print TXT "$i\t$gene_desity[$i]\n";
+		$total+=$gene_desity{$chr}[$i];
+		#print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
+	}
+}
+#print "Gene max:$max_gene_number\ntotal:$total\n";
+
+#---------------------------------------------------------------
+my %centromere;
+if (defined($opt{cen})) {
+	open CEN,"$opt{cen}";
+	while (my $aline=<CEN>) {
+		chomp $aline;
+		next if($aline=~/^\#/);
+		my @temp=split/\t/,$aline;
+		$temp[0]=~s/^c/C/;
+		$centromere{$temp[0]}[0]=$temp[1];
+		$centromere{$temp[0]}[1]=$temp[2];
+	}
+	close CEN;
+}
+
+#---------------------------------------------------------------
+my $max_length=0;
+foreach my $chr (keys %length) {
+	if ($max_length<$length{$chr}) {
+		$max_length=$length{$chr};
+	}
+	print "$chr\n";
+}
+#====================================cluster data=======================================
+open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}";
+my %cluster;
+my %cluster_density;
+#my @sample=qw(39B3 3PA3 3LC3);
+my @cluster_non_add;
+while (my $aline=<CLUSTER>) {
+	next if($aline=~/^\#/);
+	chomp $aline;##Chr	MajorLength	Percent	end	19B1
+	my @temp=split/\t/,$aline;
+	my @ID=split/\:/,$temp[0];
+	my @posi=split/\-/,$ID[1];
+	my @all_rpkm=@temp;
+	shift @all_rpkm;
+	shift @all_rpkm;
+	shift @all_rpkm;
+#	for (my $s=0;$s<@all_rpkm ;$s++) {#log transfer
+#		$all_rpkm[$s]=log2($all_rpkm[$s]);
+#	}
+	push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],@all_rpkm];#ID	start	end	rpkm(19B1,1PA1,1LC1);
+}
+close CLUSTER;
+my %max_cluster;
+my $chr_number=0;
+print "@mark\n$mark\n";
+foreach my $chr (sort keys %cluster) {
+	for (my $i=0;$i<@mark ;$i++) {
+		$max_cluster{$chr}[$i]=0;
+	}	
+	$chr_number++;
+}
+foreach my $chr (sort keys %cluster) {
+	@{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}};
+	for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
+		for (my $s=0;$s<@mark;$s++) {
+			if ($cluster{$chr}[$i][3+$s]>$max_cluster{$chr}) {
+				$max_cluster{$chr}[$s]=$cluster{$chr}[$i][3+$s];
+			}
+		}
+	}
+
+}
+foreach my $chr (sort keys %max_cluster) {
+	for (my $s=0; $s<@mark;$s++) {
+	#	print "$max_cluster{$chr}[$s]\n";
+	}
+}
+#---------------------------------------------------------------------------------------
+foreach my $chr(keys %cluster) {
+	for(my $i=0;$i<$#{$cluster{$chr}};$i++) {
+		my $start=int($cluster{$chr}[$i][1]/$span);
+		my $end=int($cluster{$chr}[$i][2]/$span);
+		if ($start==$end) {
+			for (my $s=0;$s<@mark ;$s++) {
+				$cluster_density{$chr}[$start][$s]+=$cluster{$chr}[$i][3+$s];
+			}
+		
+		}
+		else{
+			for (my $m=$start;$m<=$end ;$m++) {
+				for (my $s=0;$s<@mark ;$s++) {
+					$cluster_density{$chr}[$m][$s]+=$cluster{$chr}[$i][3+$s];
+				}
+			}
+		}
+	}
+}
+my %max_cluster_density;
+my $max_all_density=0;
+foreach my $chr (sort keys %cluster) {#
+	for (my $s=0;$s<@mark ;$s++) {
+		for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) {
+			$max_cluster_density{$chr}[$s]=0;
+		}
+	}	
+	
+}
+foreach my $chr (sort keys %cluster_density) {
+	print "$#{$cluster_density{$chr}}\n";
+	for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) {
+		print TXT "$chr\t$k";
+		for (my $s=0;$s<@mark;$s++) {
+			if (!(defined($cluster_density{$chr}[$k][$s]))) {
+				$cluster_density{$chr}[$k][$s]=0;
+			}
+			if ($cluster_density{$chr}[$k][$s]>$max_cluster_density{$chr}[$s]) {
+				$max_cluster_density{$chr}[$s]=$cluster_density{$chr}[$k][$s];
+			}
+			if ($cluster_density{$chr}[$k][$s]>$max_all_density) {
+				$max_all_density=$cluster_density{$chr}[$k][$s];
+			}
+			print TXT "\t$cluster_density{$chr}[$k][$s]";
+		}	
+		print TXT "\n";
+	}
+}
+print "max density: $max_all_density\n";
+#--------------------------------------------------------------------
+my $top_margin=30;
+my $tail_margin=30;
+my $XOFFSET=50;
+my $YOFFSET=60;
+my $chr_length=600;
+my $Xscale=$chr_length/$max_length;#¶¨ÒåXÖá±ÈÀý³ß 1:1000 xÖáµÄ×ø±ê³¤¶È¶¼Òª°´Õմ˱ÈÀý³ß»»Ëã
+#my $high_cov=$high_cov9B1=0.5;#¶¨Òå·åͼ×î¸ß·å
+#my $Yscale=1/$high_cov;#¶¨ÒåYÖá±ÈÀý³ß 1:60 yÖáµÄ×ø±ê³¤¶È¶¼Òª°´Õմ˱ÈÀý³ß»»Ëã
+#========================================New canvas============================
+####    Starting    ####
+#н¨»­²¼
+my $width=1000;
+my $heigth=100+130*$chr_number;
+my $svg=SVG->new(width=>$width, height=>$heigth);
+#»­Í¼Æðʼµã
+my $canvas_start_x=$XOFFSET;
+my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#°´ÕÕ±ÈÀý³ß »­Ïß
+my $canvas_start_y=$YOFFSET;
+my $canvas_end_y=$YOFFSET;
+my $chr_heigth=$heigth-$YOFFSET-$tail_margin;
+print "chr number:$chr_number\n";
+my $one_chr_heigth=$chr_heigth/$chr_number;
+my $Yscale=($one_chr_heigth-15)/$max_all_density;
+#my $chr_width=$YOFFSET;
+#my $chr_start_y;
+#my $chr_end_y;
+#my $Yscale=0.01;
+#=======================================title of the graph===============================
+#my $span_k=$span/1000;
+#$svg->text('x',$width/2,'y',$YOFFSET-20,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Clusters rpkm/"."$span_k"."kb Distribution");
+#=======================================the top max chr line=============================
+$svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+$long_scale=int ($max_length/10);#Ê®µÈ·Ö ´ó¿Ì¶È
+#´ó×ø±ê¿Ì¶È
+for ($i=0;$i<=10;$i++) {
+	my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale;
+	my $long_x_end=$long_x_start;
+	my $long_y_start=$YOFFSET;
+	my $long_y_end=$YOFFSET-5;
+	$svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+	my $Bscale=$long_scale*$i;
+	my $cdata=int ($Bscale/1000000);
+	$svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M");
+}
+#=========================================================================================
+my $cc=1;
+foreach my $chr (sort keys %length) {
+	my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale;
+	my $chr_start_x=$XOFFSET;
+	my $chr_start_y=$YOFFSET+$cc*$one_chr_heigth;
+	my $chr_end_y=$chr_start_y;
+	#$chr_start_y+=$chr_width;
+	#$chr_end_y+=$chr_width;
+#	for (my $i=0;$i<@{$gene_desity{$chr}};$i++) {
+#		print "$chr\t$i\t$gene_desity{$chr}[$i]\n";
+#		my $red=$gene_desity{$chr}[$i]/$max_gene_number*255;
+#		my $green=$gene_desity{$chr}[$i]/$max_gene_number*255;
+#		print "$red\t$green\t0\n";
+#		$svg->rect('x',$chr_start_x+$i*$span*$Xscale,'y',$chr_start_y,'width',$span*$Xscale,'height',8,'stroke',"rgb($red,$green,0)",'stroke-width',0.1,'fill',"rgb($red,$green,0)");
+#	}
+
+	$svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+	$svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr);
+	my $m_length=$length{$chr}%1000000;
+	$svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$m_length."M");
+	
+
+	if (defined($centromere{$chr}[0])) {
+		$svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y-2,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
+	}
+	for (my $s=0;$s<@mark ;$s++) {
+		for (my $i=0;$i<$#{$cluster_density{$chr}}-1 ;$i++) {
+			#if ($cluster_density{$chr}[$i]*$Yscale>40) {
+				#$cluster_density{$chr}[$i]=40/$Yscale;
+				#$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green");
+			#}
+			#print "$i\t$cluster_density{$chr}[$i][$s]\t$cluster_density{$chr}[$i+1][$s]\n";
+			my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale;
+			my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale;
+			my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i][$s]*$Yscale;
+			my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][$s]*$Yscale;
+			my $c_red=($s+1)/@mark*255;
+			$svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"rgb($c_red,125,0)",'stroke-width',0.3);
+		}
+
+	}
+	#=======Y axis
+	$svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_start_x,y2=>$chr_start_y-$one_chr_heigth+15,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+	#=======Y axis ===>3 xiaoge
+	my $s10=1;
+	my $e10=0;
+	my $chr_max=$max_all_density;
+	while ($chr_max>10) {
+		$chr_max=int($chr_max/10);
+		$s10=$s10*10;
+		$e10++;
+	}
+	$chr_max=$chr_max/2;
+	#print "*****$max_all_density\t$chr_max\t$s10\n";
+	for (my $i=1;$i<3 ;$i++) {
+		my $y1=$chr_start_y-$chr_max*$s10*$Yscale*$i;
+		my $xiaoge_Y=$chr_max*$i;
+		$svg->line('x1',$chr_start_x,'y1',$y1,'x2',$chr_start_x+3,'y2',$y1,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+		$svg->text('x',$chr_start_x-26,'y',$y1+4,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',8,'font-family',$attribute{font}{'font-family'},'-cdata',$xiaoge_Y."e".$e10);
+	}
+	$cc++;
+}
+
+for (my $s=0;$s<@mark ;$s++) {
+	my $c_red=($s+1)/@mark*255;
+	print "**$c_red\n";
+	$svg->line('x1',$canvas_end_x+100,'y1',$YOFFSET+$s*20+30,'x2',$canvas_end_x+130,'y2',$YOFFSET+$s*20+30,'stroke',"rgb($c_red,125,0)",'stroke-width',1);
+	$svg->text('x',$canvas_end_x+150,'y',$YOFFSET+$s*20+5+30,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$s]);
+}
+#
+#
+if (defined($opt{cen})) {
+	$svg->rect('x',$canvas_end_x+100,'y',$YOFFSET+@mark*20+30,'width',30,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue");
+	$svg->text('x',$canvas_end_x+150,'y',$YOFFSET+@mark*20+30+5,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere");
+}
+
+close TXT;
+
+open (OUT,">$opt{o}");
+print OUT $svg->xmlify();
+
+sub log2 {
+	my $n = shift;
+	return log($n)/log(2);
+}
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 
+options:
+-g genelist
+-span
+-n sample cloumn
+-mark sample name
+-o output graph file name with html or svg extension
+-c cluster file input
+-out txt output
+-l length of chr
+-cen centromere
+-h help
+USAGE
+exit(1);
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sam2Bed_bowtie.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2011/11/7
+#Modified:
+#Description: sam2BED 
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","mark=s","o=s","h");
+if (!(defined $opts{i} and defined $opts{o}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'o'};
+my $mark=$opts{'mark'};
+my @sample=split/\#/,$mark;
+$mark=join"\t",@sample;
+open OUT,">$fileout"; #output file  
+print OUT "#chr\tstrand\tstart\tend\t$mark\n";
+
+open IN,"<$filein"; #input file  
+my $Tags_num=0;
+my @read_num;
+#print OUT "#chr\tstart\tend\tnum\t<=20\t21\t22\t23\t24\t>=25\n";
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\@/);
+	my @tmp=split/\t/,$aline;
+	my $strand=$tmp[1];
+	my $start=$tmp[3]+1;
+	my $length=length($tmp[4]);
+	my $end=$start+$length-1;
+	my $hit=$tmp[6]+1;
+	#======express caculate weighted===================================
+		my $exp;
+		my @tempID=split/\:/,$tmp[0];
+		my @exp=split/\_/,$tempID[1];
+		pop @exp;
+		for (my $j=0;$j<@exp ;$j++) {
+			#my @tempID1=split/\=/,$tempID[$j];
+			$exp[$j]=sprintf("%.2f",$exp[$j]/$hit);
+			$read_num[$j]+=$exp[$j];
+			#print OUT "\t$exp";
+		}
+		$exp=join "\t",@exp;
+		print OUT $tmp[2],"\t",$strand,"\t",$start,"\t",$end,"\t",$exp,"\n";
+		$Tags_num++;
+
+}
+print "Total Tags numer: $Tags_num\n";
+my $read_number=join "\t",@read_num;
+print "Each sample numer: $read_number\n";
+close IN;
+close OUT;
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o
+options:
+-i input file
+-mark sampleA	sampleB	sampleC.....
+-o output file
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/siRNA.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,402 @@
+#!/usr/bin/perl -w
+my $version=1.00;
+use strict;
+use warnings;
+use Getopt::Long;
+use Getopt::Std;
+use threads;
+#use threads::shared;
+use Parallel::ForkManager;
+#use lib '/leofs/biotrans/chentt/perl_module/';
+#perl ../siRNA.pl -i config -g /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome.fa -f /share_bio/hs4/disk3-4/Reference/Plants/Rice_TIGR/Reference/TIGR/version_6.1/all.dir/all.gff3 -path /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/ -o /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test -t 3 -rfam /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/Rfam.fasta -idx /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome -idx2 /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/rfam -deg deg -n 25 -nat class/nat_1 -repeat class/repeat_1 -cen centromere_TIGR.txt -format fastq 
+print "
+#####################################
+#                                   # 
+#          sRNA cluster             # 
+#                                   #
+#####################################
+";
+###########################################################################################
+my $usage="$0
+Options:
+-i input file# fasta
+-config input file 
+-g  genome file
+-f  gff file
+
+-o  workdir file
+-path  script path
+-t  int,    number of threads [1]
+-format  fastq, fq, fasta or fa
+-idx  string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
+		string must be the prefix of the bowtie index. For instance, if
+		the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+		the prefix is 'h_sapiens_37_asm'.##can be null
+-mis  int     number of allowed mismatches when mapping reads to genome, default 0
+
+-n  int max hits number,default 25; used in genome alignment
+-d  int distance of tag to merged a cluster; default 100
+-p  cluster method F :conventional default is F
+				   T :NIBLES 
+-l  int the length of the upstream and downstream,default 1000;used in position annotate
+
+-nat  natural antisense transcripts file
+-repeat  repeat information file out of Repeatmasker
+-deg  file config of de sample
+-cen  centromere file input
+-span  plot span, default 50000
+";
+
+my %options;
+GetOptions(\%options,"i:s","config=s","g=s","f=s","o=s","path:s","p=s","format=s","nat:s","repeat:s","deg:s","n:i","mis:i","t:i","d:i","l:i","idx:s","cen:s","span:s","h");
+#print help if that option is used
+if($options{h}){die $usage;}
+
+my $filein=$options{'i'};
+
+#my $config=$options{'i'};
+my $genome_fa=$options{'g'};
+my $gff=$options{'f'};
+
+
+##########################################################################################
+my $predir=`pwd`;
+chomp $predir;
+my $workdir=defined($options{'o'}) ? $options{'o'}:$predir;
+
+my $path=$options{'path'};
+
+my $t=defined($options{'t'})? $options{'t'}:1; #threads number
+
+my $mis=defined $options{'mis'} ? $options{'mis'}:0;
+
+
+my $hit=defined $options{'n'}?$options{'n'}:25;
+
+my $distance_of_merged_tag=defined $options{'d'} ? $options{'d'}:100;
+
+my $up_down_dis=defined $options{'l'} ?$options{'l'}:1000;
+
+my $cluster_mothod=defined $options{'p'}?$options{'p'}:"F";
+
+my $format=$options{'format'};
+#if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { 
+#	die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
+#}
+
+
+
+my $sample_number;
+my ($dir,$dir_tmp);
+################################  MAIN  ##################################################
+print "\ncluster program start:";
+my $time=Time();
+make_dir_tmp();
+
+my $mark;
+my $sample_mark;
+
+my $config=$options{'config'};
+my (@filein,@mark);
+&read_config();
+$sample_number=@mark;
+$mark=join "\t",@mark;
+$sample_mark=join "\#",@mark;
+
+
+
+my $data3=$filein;   ### rfam not mapped reads
+genome();
+
+my $bed=$dir."cluster\/"."sample\.bed";
+my $read=$dir."cluster\/"."sample_reads\.cluster";
+my $read_txt=$dir."cluster\/"."cluster\.txt";
+my $rpkm=$dir."cluster\/"."sample_rpkm\.cluster";
+my $preprocess;
+my $cluster_file;
+my $annotate_dir;
+my $deg_dir;
+my $plot_dir;
+my %id;
+for (my $i=0;$i<@mark ;$i++) {
+	$id{$mark[$i]}=$i+4;
+}
+
+
+my @map_read;
+my $map_tag=0;
+
+bwt2bed();
+
+cluster();
+
+quantify();
+
+phase();
+
+if (defined($options{'nat'})&&defined($options{'repeat'})) {
+	class();
+}
+else{
+	get_genelist();
+}
+
+annotate();
+
+genome_length();
+
+plot();
+
+my @pairdir;
+if (defined($options{'deg'})) {
+	dec();
+	infor_merge();
+}
+else{infor_merge_no_dec()}
+html();
+print "\ncluster program end:";
+Time();
+############################sub program###################################################
+sub make_dir_tmp{
+
+	#make temporary directory
+	if(not -d "$workdir\/cluster_runs"){
+		mkdir("$workdir\/cluster_runs");
+		mkdir("$workdir\/cluster_runs\/ref\/");
+	}
+
+	$dir="$workdir\/cluster_runs\/";
+	#print STDERR "mkdir $dir\n\n";
+	return;
+}
+
+sub genome{
+	if(defined $options{'idx'}){
+		system("perl $path\/matching.pl -i $data3 -g $genome_fa -v $mis -p $t -r $hit -o $dir -index $options{idx}") ;
+	}else{
+		system("perl $path\/matching.pl -i $data3 -g $genome_fa -v $mis -p $t -r $hit -o $dir ") ;
+	}
+	#=================== mapping sta ===================================================
+	my $map_file=$dir."genome_match\/genome_mapped\.fa";
+	open (MAP,"<$map_file")||die"$!";
+	print "\n#each sample mapping reads sta:\n\n";
+	print "#$mark\ttotal\n";
+	while (my $ID=<MAP>) {
+		chomp $ID;
+		my @tmp=split/\:/,$ID;
+		my @exp=split/\_/,$tmp[1];
+		$exp[-1] =~ s/^x//;
+		for (my $i=0;$i<@exp ;$i++) {
+			$map_read[$i]+=$exp[$i];
+		}
+		$map_tag++;
+		my $seq=<MAP>;
+	}
+	my $map_read=join"\t",@map_read;
+	print "$map_read\n\n";
+	print "#total mapped tags:$map_read\n\n";
+	close MAP;
+	return 0;
+}
+
+sub bwt2bed{
+	$cluster_file=$dir."cluster\/";
+	mkdir ("$cluster_file");
+	print "sam file changed to bed file\n";
+	my ($file) = $dir."genome_match\/genome_mapped\.bwt";
+
+	my $sam2bed=`perl $path\/sam2Bed_bowtie.pl -i $file -mark $sample_mark -o $bed `;
+	print "perl $path\/sam2Bed_bowtie.pl -i $file -mark $sample_mark -o $bed\n\n";
+	return 0;
+}
+
+sub cluster{
+	print "tags is ready to merged clusters\n\n";
+	my ($file) =$bed;
+	if ($cluster_mothod eq "F") {
+		my $cluster=`perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt`;
+		print "Using converntional method\n perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt\n\n";
+	}
+	elsif($cluster_mothod eq "T"){
+		my $cluster=`perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $read_txt -k $sample_mark`;
+		print "Using nibls method\n perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $dir\/cluster.txt -k $sample_mark\n\n";
+	}
+	else{print "\-p is wrong!\n\n";}
+	return 0;
+}
+
+
+sub quantify{
+	print "clusters is ready to quantified\n\n";
+	my @depth=@map_read;
+	pop @depth;
+	my $depth=join ",",@depth;
+	my $quantify=`perl $path\/quantify_siRNA.pl -i $read -d $depth -o $rpkm`;
+	print "perl $path\/quantify_siRNA.pl -i $read -d $depth -o $rpkm\n\n\n";
+	return 0;
+}
+
+sub phase{
+	$annotate_dir=$dir."annotate\/";
+	mkdir ("$annotate_dir");
+	print "clusters is to predict phase siRNA\n";
+	my $phase=`perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out`;
+	print "perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out\n\n\n";
+	return 0;
+}
+
+sub class{
+	print "clusters is ready to annotate by sources\n\n";
+	my $nat=$options{'nat'};
+	my $repeat=$options{'repeat'};
+	my $class=`perl $path\/ClassAnnotate.pl -i $rpkm -g $gff  -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt`;
+	print "perl $path\/ClassAnnotate.pl -i $rpkm -g $gff  -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt\n\n";
+}
+
+sub annotate{
+	print "clusters is ready to annotate by gff file\n\n";
+	my $file;
+	if (defined($options{'nat'})&&defined($options{'repeat'})) {
+		$file="$annotate_dir\/sample_class.anno";
+	}
+	else{
+		$file=$rpkm;
+	}
+	my $annotate=`perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno`;
+	print "perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno\n\n";
+	return 0;
+}
+sub get_genelist{
+
+	my $get_genelist=`perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt`;
+	print "perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt";
+}
+sub dec{
+	print "deg reading\n\n";
+	my $deg_file=$options{'deg'};
+	open IN,"<$deg_file";
+	my @deg;
+	my $s=0;
+		while (my $aline=<IN>) {
+		chomp $aline;
+		next if($aline=~/^\#/);
+		$deg[$s]=$aline;
+		my @ea=split/\s+/,$aline;
+		push @pairdir,"$ea[0]_VS_$ea[1]\/";
+		#print "$deg[$s]\n";
+		$s++;
+	}
+	close IN;
+	$deg_dir=$dir."deg\/";
+	mkdir ("$deg_dir");
+	my $max_process = 10;
+	my $pm = new Parallel::ForkManager( $max_process );
+	my $number=@deg-1;
+	foreach(0..$number){
+		$pm->start and next;
+		&dec_pel($deg[$_]);
+		$pm->finish;
+	}
+	$pm->wait_all_children;
+}
+
+sub dec_pel{
+	print "\n******************\nstart:\n";
+	Time();
+	my $sample=shift(@_);
+	my @each=split/\s+/,$sample;
+	print "$each[0]\t$each[1]\n";
+	my $deg_sample_dir=$deg_dir."$each[0]_VS_$each[1]\/";
+	mkdir ("$deg_sample_dir");
+	print "read: $read\n";
+	print "deg_sample_dir: $deg_sample_dir\n";
+	print "$id{$each[0]}\t$each[0]\n";
+	print "$id{$each[1]}\t$each[1]\n";
+	my $deg=`perl $path\/DEGseq_2.pl -i $read -outdir $deg_sample_dir -column1 $id{$each[0]} -mark1 $each[0] -column2 $id{$each[1]} -mark2 $each[1]`; #-depth1 -depth2
+	my $time2=time();
+	print "end:\n*************************\n";
+	Time();
+	sleep 1;
+}
+
+sub infor_merge{
+	my ($input,$mark);
+	foreach (@pairdir) {
+		print "@pairdir\n";
+		$mark.=" -mark $_ ";
+		$input.=" -i $dir/deg\/$_\/output_score\.txt ";
+		print "$input\n$mark\n";
+	}
+	my $infor_merge=`perl $path\/SampleDEGseqMerge.pl $input $mark -f $annotate_dir\/sample_c_p.anno -n $sample_number -o $dir\/total.result `;
+	print "perl $path\/SampleDEGseqMerge.pl $input $mark -f $annotate_dir\/sample_c_p.anno -n $sample_number -o $dir\/total.result\n\n";
+}
+
+sub infor_merge_no_dec{
+	my $infor_merge_no_dec=`cp $annotate_dir\/sample_c_p.anno $dir\/total.result`;
+}
+
+sub genome_length{
+	my $length=`perl $path\/count_ref_length.pl -i $genome_fa -o $dir\/ref\/genome\.length`;
+	print "perl $path\/count_ref_length.pl -i $genome_fa -o $dir\/ref\/genome\.length\n\n"
+
+}
+
+sub plot{
+	$plot_dir="$dir\/plot\/";
+	mkdir ("$plot_dir");
+	my $span=defined($options{span})?$options{span}:50000;
+	my $cen="";
+	if (defined $options{cen}) {
+		$cen="-cen $options{cen}";
+	}
+	my $plot=`perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome\.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt `;
+	"print perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt \n";
+
+}
+
+sub html{
+	my $pathfile="$dir/path.txt";
+	open PA,">$pathfile";
+	print PA "$config\n";
+	print PA "$preprocess\n";
+	print PA "$dir"."rfam_match\n";
+	print PA "$dir"."genome_match\n";
+	print PA "$cluster_file\n";
+	print PA "$annotate_dir\n";
+	print PA "$plot_dir\n";
+	if (defined($deg_dir)) {
+			print PA "$deg_dir\n";
+	}
+	close PA;
+	#my $html=`perl $path\/html.pl -i $pathfile -format $format -o $dir/result.html`;
+}
+
+sub Time{
+	my $time=time();
+	my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+	$month++;
+	$year+=1900;
+	if (length($sec) == 1) {$sec = "0"."$sec";}
+	if (length($min) == 1) {$min = "0"."$min";}
+	if (length($hour) == 1) {$hour = "0"."$hour";}
+	if (length($day) == 1) {$day = "0"."$day";}
+	if (length($month) == 1) {$month = "0"."$month";}
+	print "$year-$month-$day $hour:$min:$sec\n";
+	return("$year-$month-$day-$hour-$min-$sec");
+}
+#################################################################################
+sub read_config{
+	open CON,"<$config";
+	while (my $aline=<CON>) {
+		chomp $aline;
+		my @tmp=split/\t/,$aline;
+		push @filein,$tmp[0];
+		push @mark,$tmp[1];
+		#&check_rawdata($tmp[0]);
+	}
+	close CON;
+	if (@filein != @mark) {
+		#&printErr();
+		die "Maybe config file have some wrong!!!\n";
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/siRNA.xml	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,181 @@
+<tool id="plant_sirna_v1" name="siRNA" veision="1.0.0">
+  <description>Program for plant siRNA analysis (siRNA only)</description>
+
+  <requirements>
+    <requirement type="set_environment">SCRIPT_PATH</requirement>
+    <requirement type="package" version="0.12.7">bowtie</requirement>
+    <requirement type="package" version="3.0.1">R</requirement>
+	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
+	<requirement type="package" version="1.96">threads</requirement>
+	<requirement type="package" version="1.06">Parallel-ForkManager</requirement>
+	<requirement type="package" version="2.59">SVG</requirement>
+	<requirement type="package" version="1.4_001">Boost-Graph</requirement>
+  </requirements>
+
+  <command interpreter="perl">siRNA.pl 
+   ## Change this to accommodate the number of threads you have available.
+        -t \${GALAXY_SLOTS:-4}
+
+	-path \$SCRIPT_PATH
+
+      ## prepare bowtie index
+      #set index_path = ''
+      #if str($reference_genome.source) == "history":
+         #### bowtie-build "$reference_genome.own_file" genome; ln -s "$reference_genome.own_file" genome.fa;
+          #set index_path = $reference_genome.own_file
+		-g $index_path 
+
+	  #else:
+          #set index_path = $reference_genome.index.fields.path
+		-g ${index_path}.fa -idx $index_path 
+	  #end if
+
+
+   ## Do or not annotate siRNAs by function
+    #if $params.function_anno == "yes":
+
+      ## prepare bowtie index
+      #set nat_path = ''
+      #if str($params.nat_file.source) == "history":
+          #set nat_path = $params.nat_file.nat
+
+	  #else:
+          #set nat_path = $params.nat_file.index.fields.path
+	  #end if
+
+      ## prepare bowtie index
+      #set repeat_path = ''
+      #if str($params.repeat_file.source) == "history":
+          #set repeat_path = $params.repeat_file.repeat
+
+	  #else:
+          #set repeat_path = $params.repeat_file.index.fields.path
+	  #end if
+
+
+	 -nat $nat_path -repeat $repeat_path
+	#end if
+   
+   ## Do or not DEG
+    #if $degseq.degseq_analysis == "yes" :
+    -deg $degseq.deg
+    #end if
+
+  -i $reads -config $config -n $mapnt -format $format -f $gff -mis $mis -d $d -p $p -l $l  -cen $cen -span $span > run.log
+
+  </command>
+
+  <inputs>
+
+	<param name="config" type="data" label="Raw data configs file" />
+	<param name="reads" type="data" label="Input Fasta. file of candidate microRNA sequence" />
+
+	<param name="format" type="select" lable=" Data format" multiple="false">
+	  <option value="fastq">Input data is fastq. format</option>
+	  <option value="fasta">Input data is fasta. format</option>
+	</param>
+	
+
+        <conditional name="reference_genome">
+          <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+              <options from_data_table="bowtie_indexes">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+          </when>
+        </conditional>	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
+
+
+	<param name="gff" type="data" label="gff file" />
+	<param name="mis" type="integer" value="0" label="Number of allowed mismatches when mapping reads to genome" />
+	<param name="mapnt" type="integer" value="25" label="A read is allowed to map up to this number of positions in the genome" />
+	<param name="d" type="integer" value="100" label="Distance of tag to merged a cluster" />
+
+	<param name="p" type="select" lable="siRNA cluster method" multiple="false">
+	  <option value="F">conventional</option>
+	  <option value="T">NIBLES</option>
+	</param>
+	<param name="l" type="integer" value="1000" label="The length of the upstream and downstream,used in position annotate" />
+
+
+	<conditional name="params">
+		<param name="function_anno" type="select" label="Do or not annotate siRNAs by function">
+		  <option value="no" selected="true">no</option>
+		  <option value="yes">yes</option>
+		 </param>
+		 <when value="yes">
+
+
+        <conditional name="nat_file">
+          <param name="source" type="select" label="Will you select a atural antisense transcripts file from your history ?" help="down load from ***">
+            <option value="indexed">Use a built-in file</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a atural antisense transcripts file" help="If your species of interest is not listed, contact the Galaxy team">
+              <options from_data_table="nat_annotate">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No files are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="nat" type="data" format="txt" label="atural antisense transcripts file" />
+		  </when>
+        </conditional>	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
+
+        <conditional name="repeat_file">
+          <param name="source" type="select" label="Will you select a repeat information file from your history ?" help="down load from ***">
+            <option value="indexed">Use a built-in file</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a repeat information file" help="If your species of interest is not listed, contact the Galaxy team">
+              <options from_data_table="repeat_annotate">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No files are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+			<param name="repeat" type="data" label="repeat information file out of Repeatmasker" />
+		  </when>
+        </conditional>	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
+		 </when>
+    </conditional> <!-- params -->
+	
+	<param name="cen" type="data" label="centromere file input" />
+	<param name="span" type="integer" value="50000" label="plot span" />
+	
+    <conditional name="degseq">
+       <param name="degseq_analysis" type="select" label="Do or not identify Difference Expression Clusters">
+		  <option value="no" selected="true">no</option>
+		  <option value="yes">yes</option>
+       </param>
+       <when value="yes">
+			<param name="deg" type="data" label="file config of de sample" />
+        </when>
+    </conditional>
+
+	
+	</inputs>
+
+  <outputs>
+   <data format="txt" name="siRNA cluster" from_work_dir="cluster_runs/total.result" label="${tool.name} on ${on_string}: siRNA cluster"/>
+   <data format="html" name="analysis result" from_work_dir="cluster_runs/result.html" label="${tool.name} on ${on_string}: analysis result"/>
+
+  </outputs>
+
+ <help>
+
+ </help>
+ </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/siRNA_pipeline.pl	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,524 @@
+#!/usr/bin/perl -w
+my $version=1.00;
+use strict;
+use warnings;
+use Getopt::Long;
+use Getopt::Std;
+use threads;
+#use threads::shared;
+use Parallel::ForkManager;
+#use lib '/leofs/biotrans/chentt/perl_module/';
+#perl ../siRNA.pl -i config -g /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome.fa -f /share_bio/hs4/disk3-4/Reference/Plants/Rice_TIGR/Reference/TIGR/version_6.1/all.dir/all.gff3 -path /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/ -o /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test -t 3 -rfam /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/Rfam.fasta -idx /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome -idx2 /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/rfam -deg deg -n 25 -nat class/nat_1 -repeat class/repeat_1 -cen centromere_TIGR.txt -format fastq 
+print "
+#####################################
+#                                   # 
+#          sRNA cluster             # 
+#                                   #
+#####################################
+";
+###########################################################################################
+my $usage="$0
+Options:
+-i input file# raw data file
+-tag string #raw data sample name
+-g  genome file
+-f  gff file
+
+-o  workdir file
+-path  script path
+-t  int,    number of threads [1]
+-format  fastq, fq, fasta or fa
+-idx  string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
+		string must be the prefix of the bowtie index. For instance, if
+		the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+		the prefix is 'h_sapiens_37_asm'.##can be null
+-mis  int     number of allowed mismatches when mapping reads to genome, default 0
+-rfam  string,  input file# rfam database file.
+-idx2  string,  rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter
+		string must be the prefix of the bowtie index. For instance, if
+		the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+		the prefix is 'h_sapiens_37_asm'.##can be null
+
+-v  int report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment
+
+-a string,  ADAPTER string. default is ATCTCGTATG.
+-n  int max hits number,default 25; used in genome alignment
+-d  int distance of tag to merged a cluster; default 100
+-p  cluster method F :conventional default is F
+				   T :NIBLES 
+-l  int the length of the upstream and downstream,default 1000;used in position annotate
+
+-nat  natural antisense transcripts file
+-repeat  repeat information file out of Repeatmasker
+-deg  file config of de sample
+-cen  centromere file input
+-span  plot span, default 50000
+";
+
+my %options;
+GetOptions(\%options,"i:s@","tag:s@","g=s","phred:i","f=s","o=s","a:s","path:s","p=s","format=s","nat:s","repeat:s","deg:s","n:i","mis:i","rfam:s","t:i","v:i","d:i","l:i","idx:s","idx2:s","cen:s","span:s","h");
+#print help if that option is used
+if($options{h}){die $usage;}
+
+my @filein=@{$options{'i'}};
+my @mark=@{$options{'tag'}};
+
+#my $config=$options{'i'};
+my $genome_fa=$options{'g'};
+my $gff=$options{'f'};
+
+
+##########################################################################################
+my $predir=`pwd`;
+chomp $predir;
+my $workdir=defined($options{'o'}) ? $options{'o'}:$predir;
+
+my $path=$options{'path'};
+
+my $t=defined($options{'t'})? $options{'t'}:1; #threads number
+
+my $mis=defined $options{'mis'} ? $options{'mis'}:0;
+
+my $mis_rfam=defined $options{'v'} ? $options{'v'}:0;
+
+my $hit=defined $options{'n'}?$options{'n'}:25;
+
+my $distance_of_merged_tag=defined $options{'d'} ? $options{'d'}:100;
+
+my $up_down_dis=defined $options{'l'} ?$options{'l'}:1000;
+
+my $cluster_mothod=defined $options{'p'}?$options{'p'}:"F";
+
+my $format=$options{'format'};
+#if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { 
+#	die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
+#}
+
+my $adpter="ATCTCGTATG";  #adapter
+if (defined $options{'a'}) {$adpter=$options{'a'};}
+
+
+my $phred_qv=64;
+if(defined $options{'phred'}){$phred_qv=$options{'phred'};}
+my $sample_number;
+my ($dir,$dir_tmp);
+################################  MAIN  ##################################################
+print "\ncluster program start:";
+my $time=Time();
+make_dir_tmp();
+
+my @clip;
+my $mark;
+my $sample_mark;
+
+my $config=$dir."/input_config";
+open CONFIG,">$config";
+	for (my $i=0;$i<@filein;$i++) {
+		print CONFIG $filein[$i],"\t",$mark[$i],"\n";
+	}
+close CONFIG;
+if (@filein != @mark) {
+	die "Maybe config file have some wrong!!!\n";
+}
+$sample_number=@mark;
+$mark=join "\t",@mark;
+$sample_mark=join "\#",@mark;
+
+
+#read_config();
+
+trim_adapter_and_filter();
+
+my $filter_out=$dir."preProcess\/"."collapse_reads_out.fa";## raw clean data
+my $data2=$filter_out;   ### mirbase not mapped reads
+my $data3=$dir."\/rfam_match\/rfam_not_mapped\.fa";   ### rfam not mapped reads
+my $bed=$dir."cluster\/"."sample\.bed";
+my $read=$dir."cluster\/"."sample_reads\.cluster";
+my $read_txt=$dir."cluster\/"."cluster\.txt";
+my $rpkm=$dir."cluster\/"."sample_rpkm\.cluster";
+my $preprocess;
+my $cluster_file;
+my $annotate_dir;
+my $deg_dir;
+my $plot_dir;
+my %id;
+for (my $i=0;$i<@mark ;$i++) {
+	$id{$mark[$i]}=$i+4;
+}
+
+print "\n######## tiandm test start ###########\n";
+print "\@mark: @mark\n\%id keys number:";
+print scalar keys %id;
+print "\n";
+foreach my $kyess (keys %id){
+	print $kyess," --> $id{$kyess}\n";	
+}
+print "\n######## tiandm test end ############\n";
+group_and_filter();   #collapse reads to tags
+
+rfam();
+
+my @map_read;
+my $map_tag=0;
+genome();
+
+bwt2bed();
+
+cluster();
+
+quantify();
+
+phase();
+
+if (defined($options{'nat'})&&defined($options{'repeat'})) {
+	class();
+}
+else{
+	get_genelist();
+}
+
+annotate();
+
+genome_length();
+
+plot();
+
+my @pairdir;
+if (defined($options{'deg'})) {
+	dec();
+	infor_merge();
+}
+else{infor_merge_no_dec()}
+html();
+print "\ncluster program end:";
+Time();
+############################sub program###################################################
+sub make_dir_tmp{
+
+	#make temporary directory
+	if(not -d "$workdir\/cluster_runs"){
+		mkdir("$workdir\/cluster_runs");
+		mkdir("$workdir\/cluster_runs\/ref\/");
+	}
+
+	$dir="$workdir\/cluster_runs\/";
+	#print STDERR "mkdir $dir\n\n";
+	return;
+}
+
+#sub read_config{
+#	open IN,"<$config";
+#	while (my $aline=<IN>) {
+#		chomp $aline;
+#		my @tmp=split/\t/,$aline;
+#		push @filein,$tmp[0];
+#		push @mark,$tmp[1];
+#	}
+#	close IN;
+#	if (@filein != @mark) {
+#		die "Maybe config file have some wrong!!!\n";
+#	}
+#	$sample_number=@mark;
+#	$mark=join "\t",@mark;
+#	$sample_mark=join "\#",@mark;
+#}
+
+
+sub trim_adapter_and_filter{
+	my $time=time();
+	$preprocess=$dir."preProcess/";
+	mkdir $preprocess;
+	my $can_use_threads = eval 'use threads; 1';
+	if ($can_use_threads) {
+	# Do processing using threads
+		my @filein1=@filein; my @mark1=@mark;
+		while (@filein1>0) {
+			my @thrs; my @res;
+			for (my $i=0;$i<$t ;$i++) {
+				last if(@filein1==0);
+				my $in=shift @filein1;
+				my $out=shift @mark1;
+				push @clip,$dir."preProcess\/$out\_clip\.fq";
+				$thrs[$i]=threads->create(\&clips,$in,$out);
+			}
+			for (my $i=0;$i<@thrs;$i++) {
+				$res[$i]=$thrs[$i]->join();
+			}
+		}
+	} 
+	else {
+# Do not processing using threads
+		for (my $i=0;$i<@filein ;$i++) {
+			my $in=$filein[$i];
+			my $out=$mark[$i];
+			push @clip,$dir."preProcess\/$out\_clip\.fq";
+			&clips($in,$out);
+		}
+	}
+}
+
+sub clips{
+	my ($filein,$fileout)=@_;
+	my $adapter=$dir."preProcess\/$fileout\_clip\.fq";
+	if($format eq "fq" || $format eq "fastq"){
+		my $clip=`fastx_clipper -a $adpter -M 6  -Q $phred_qv -i $filein -o $adapter`;
+	}
+	if($format eq "fa" || $format eq "fasta"){
+		my $clip=`fastx_clipper -a $adpter -M 6 -i $filein -o $adapter`;
+	}
+	#my $clean=$dir."preProcess\/$fileout\_clean.fq";
+	#my $filter=`filterReadsByLength.pl -i $adapter -o $clean -min 18 -max 40 `;
+	return $fileout;
+}	
+
+sub group_and_filter{
+	#my ($ins,$data)=@_;
+	my @ins=@clip;
+	my $str="";
+	my $group_out_file=$dir."preProcess\/"."collapse_reads.fa";
+	#print "$$ins[0]\t$$ins[0]\n";
+	for (my $i=0;$i<@clip;$i++) {
+		$str .="-i $clip[$i] ";
+		#print "$$ins[$i]\n";
+	}
+	my $group=`perl $path\/collapseReads2Tags.pl $str -mark seq -o $group_out_file -format $format`;
+	print "perl $path\/collapseReads2Tags.pl $str -mark seq -o $group_out_file -format $format\n\n";
+
+	my $l_out=$dir."preProcess\/"."collapse_reads_18-40.fa";
+	my $tmpmark=join ",", @mark;
+
+	my $length_f=`perl $path\/filterReadsByLength.pl -i $group_out_file -o $l_out -min 18 -max 40 -mark $tmpmark`;
+	print "perl $path\/filterReadsByLength.pl -i $group_out_file -o $l_out -min 18 -max 40 -mark $tmpmark\n\n";
+	my $cout_f=`perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark`;
+	print "perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark\n\n";
+	my $plot_l_D=`perl $path/Length_Distibution.pl -i $dir/preProcess/reads_length_distribution_after_count_filter.txt -o $dir/preProcess/length.html `;
+	print "perl $path\/Length_Distibution.pl -i $dir\/preProcess\/reads_length_distribution_after_count_filter.txt -o $dir\/preProcess\/length\.html\n\n";
+	return 0;
+}
+
+sub rfam{
+	if (defined $options{'idx2'}) {
+		system("perl $path\/rfam.pl -i $data2 -ref $options{rfam} -v $mis_rfam -p $t -o $dir -index $options{idx2}");
+	}else{
+		system("perl $path\/rfam.pl -i $data2 -ref $options{rfam} -v $mis_rfam -p $t -o $dir");
+	}
+	my $tag=join "\\;" ,@mark;
+	my $rfam_count=`perl $path\/count_rfam_express.pl -i $dir\/rfam_match\/rfam_mapped.bwt -tag $tag -o $dir\/rfam_match\/rfam_non-miRNA_annotation.txt`;
+	return 0;
+}
+sub genome{
+	if(defined $options{'idx'}){
+		system("perl $path\/matching.pl -i $data3 -g $genome_fa -v $mis -p $t -r $hit -o $dir -index $options{idx}") ;
+	}else{
+		system("perl $path\/matching.pl -i $data3 -g $genome_fa -v $mis -p $t -r $hit -o $dir ") ;
+	}
+	#=================== mapping sta ===================================================
+	my $map_file=$dir."genome_match\/genome_mapped\.fa";
+	open (MAP,"<$map_file")||die"$!";
+	print "\n#each sample mapping reads sta:\n\n";
+	print "#$mark\ttotal\n";
+	while (my $ID=<MAP>) {
+		chomp $ID;
+		my @tmp=split/\:/,$ID;
+		my @exp=split/\_/,$tmp[1];
+		$exp[-1] =~ s/^x//;
+		for (my $i=0;$i<@exp ;$i++) {
+			$map_read[$i]+=$exp[$i];
+		}
+		$map_tag++;
+		my $seq=<MAP>;
+	}
+	my $map_read=join"\t",@map_read;
+	print "$map_read\n\n";
+	print "#total mapped tags:$map_read\n\n";
+	close MAP;
+	return 0;
+}
+
+sub bwt2bed{
+	$cluster_file=$dir."cluster\/";
+	mkdir ("$cluster_file");
+	print "sam file changed to bed file\n";
+	my ($file) = $dir."genome_match\/genome_mapped\.bwt";
+
+	my $sam2bed=`perl $path\/sam2Bed_bowtie.pl -i $file -mark $sample_mark -o $bed `;
+	print "perl $path\/sam2Bed_bowtie.pl -i $file -mark $sample_mark -o $bed\n\n";
+	return 0;
+}
+
+sub cluster{
+	print "tags is ready to merged clusters\n\n";
+	my ($file) =$bed;
+	if ($cluster_mothod eq "F") {
+		my $cluster=`perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt`;
+		print "Using converntional method\n perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt\n\n";
+	}
+	elsif($cluster_mothod eq "T"){
+		my $cluster=`perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $read_txt -k $sample_mark`;
+		print "Using nibls method\n perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $dir\/cluster.txt -k $sample_mark\n\n";
+	}
+	else{print "\-p is wrong!\n\n";}
+	return 0;
+}
+
+
+sub quantify{
+	print "clusters is ready to quantified\n\n";
+	my @depth=@map_read;
+	pop @depth;
+	my $depth=join ",",@depth;
+	my $quantify=`perl $path\/quantify_siRNA.pl -i $read -d $depth -o $rpkm`;
+	print "perl $path\/quantify_siRNA.pl -i $read -d $depth -o $rpkm\n\n\n";
+	return 0;
+}
+
+sub phase{
+	$annotate_dir=$dir."annotate\/";
+	mkdir ("$annotate_dir");
+	print "clusters is to predict phase siRNA\n";
+	my $phase=`perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out`;
+	print "perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out\n\n\n";
+	return 0;
+}
+
+sub class{
+	print "clusters is ready to annotate by sources\n\n";
+	my $nat=$options{'nat'};
+	my $repeat=$options{'repeat'};
+	my $class=`perl $path\/ClassAnnotate.pl -i $rpkm -g $gff  -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt`;
+	print "perl $path\/ClassAnnotate.pl -i $rpkm -g $gff  -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt\n\n";
+}
+
+sub annotate{
+	print "clusters is ready to annotate by gff file\n\n";
+	my $file;
+	if (defined($options{'nat'})&&defined($options{'repeat'})) {
+		$file="$annotate_dir\/sample_class.anno";
+	}
+	else{
+		$file=$rpkm;
+	}
+	my $annotate=`perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno`;
+	print "perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno\n\n";
+	return 0;
+}
+sub get_genelist{
+
+	my $get_genelist=`perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt`;
+	print "perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt";
+}
+
+sub dec{
+	print "deg reading\n\n";
+	my $deg_file=$options{'deg'};
+	open IN,"<$deg_file";
+	my @deg;
+	my $s=0;
+		while (my $aline=<IN>) {
+		chomp $aline;
+		next if($aline=~/^\#/);
+		$deg[$s]=$aline;
+		my @ea=split/\s+/,$aline;
+		push @pairdir,"$ea[0]_VS_$ea[1]\/";
+		#print "$deg[$s]\n";
+		$s++;
+	}
+	close IN;
+	$deg_dir=$dir."deg\/";
+	mkdir ("$deg_dir");
+	my $max_process = 10;
+	my $pm = new Parallel::ForkManager( $max_process );
+	my $number=@deg-1;
+	foreach(0..$number){
+		$pm->start and next;
+		&dec_pel($deg[$_]);
+		$pm->finish;
+	}
+	$pm->wait_all_children;
+}
+
+sub dec_pel{
+	print "\n******************\nstart:\n";
+	Time();
+	my $sample=shift(@_);
+	my @each=split/\s+/,$sample;
+	print "$each[0]\t$each[1]\n";
+	my $deg_sample_dir=$deg_dir."$each[0]_VS_$each[1]\/";
+	mkdir ("$deg_sample_dir");
+	print "read: $read\n";
+	print "deg_sample_dir: $deg_sample_dir\n";
+	print "$id{$each[0]}\t$each[0]\n";
+	print "$id{$each[1]}\t$each[1]\n";
+	my $deg=`perl $path\/DEGseq_2.pl -i $read -outdir $deg_sample_dir -column1 $id{$each[0]} -mark1 $each[0] -column2 $id{$each[1]} -mark2 $each[1]`; #-depth1 -depth2
+	my $time2=time();
+	print "end:\n*************************\n";
+	Time();
+	sleep 1;
+}
+
+sub infor_merge{
+	my ($input,$mark);
+	foreach (@pairdir) {
+		print "@pairdir\n";
+		$mark.=" -mark $_ ";
+		$input.=" -i $dir/deg\/$_\/output_score\.txt ";
+		print "$input\n$mark\n";
+	}
+	my $infor_merge=`perl $path\/SampleDEGseqMerge.pl $input $mark -f $annotate_dir\/sample_c_p.anno -n $sample_number -o $dir\/total.result `;
+	print "perl $path\/SampleDEGseqMerge.pl $input $mark -f $annotate_dir\/sample_c_p.anno -n $sample_number -o $dir\/total.result\n\n";
+}
+
+sub infor_merge_no_dec{
+	my $infor_merge_no_dec=`cp $annotate_dir\/sample_c_p.anno $dir\/total.result`;
+}
+
+sub genome_length{
+	my $length=`perl $path\/count_ref_length.pl -i $genome_fa -o $dir\/ref\/genome\.length`;
+	print "perl $path\/count_ref_length.pl -i $genome_fa -o $dir\/ref\/genome\.length\n\n"
+
+}
+
+sub plot{
+	$plot_dir="$dir\/plot\/";
+	mkdir ("$plot_dir");
+	my $span=defined($options{span})?$options{span}:50000;
+	my $cen="";
+	if (defined $options{cen}) {
+		$cen="-cen $options{cen}";
+	}
+	my $plot=`perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome\.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt `;
+	"print perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt \n";
+
+}
+
+sub html{
+	my $pathfile="$dir/path.txt";
+	open PA,">$pathfile";
+	print PA "$config\n";
+	print PA "$preprocess\n";
+	print PA "$dir"."rfam_match\n";
+	print PA "$dir"."genome_match\n";
+	print PA "$cluster_file\n";
+	print PA "$annotate_dir\n";
+	print PA "$plot_dir\n";
+	if (defined($deg_dir)) {
+			print PA "$deg_dir\n";
+	}
+	close PA;
+	my $html=`perl $path\/html_siRNA.pl -i $pathfile -format $format -o $dir/result.html`;
+}
+
+sub Time{
+	my $time=time();
+	my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+	$month++;
+	$year+=1900;
+	if (length($sec) == 1) {$sec = "0"."$sec";}
+	if (length($min) == 1) {$min = "0"."$min";}
+	if (length($hour) == 1) {$hour = "0"."$hour";}
+	if (length($day) == 1) {$day = "0"."$day";}
+	if (length($month) == 1) {$month = "0"."$month";}
+	print "$year-$month-$day $hour:$min:$sec\n";
+	return("$year-$month-$day-$hour-$min-$sec");
+}
+#################################################################################
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/siRNA_pipeline.xml	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,221 @@
+<tool id="plant_sirna_pipeline_v1" name="siRNA_pipeline" veision="1.0.0">
+  <description>Program for plant siRNA analysis (rawdata preprocess -> genome alignment -> non-coding annotate ->  siRNA analysis)</description>
+
+  <requirements>
+    <requirement type="set_environment">SCRIPT_PATH</requirement>
+    <requirement type="package" version="0.12.7">bowtie</requirement>
+    <requirement type="package" version="3.0.1">R</requirement>
+	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
+	<requirement type="package" version="1.96">threads</requirement>
+	<requirement type="package" version="1.06">Parallel-ForkManager</requirement>
+	<requirement type="package" version="2.59">SVG</requirement>
+	<requirement type="package" version="1.4_001">Boost-Graph</requirement>
+  </requirements>
+
+  <command interpreter="perl">siRNA_pipeline.pl 
+   ## Change this to accommodate the number of threads you have available.
+        -t \${GALAXY_SLOTS:-4}
+
+	-path \$SCRIPT_PATH
+
+    #for $j, $s in enumerate( $series )
+    ##rank_of_series=$j
+    -i ${s.input}
+    -tag ${s.tag}
+    #end for
+
+      ## prepare bowtie index
+      #set index_path = ''
+      #if str($reference_genome.source) == "history":
+          #set index_path = $reference_genome.own_file
+		-g  $index_path 
+	  #else:
+          #set index_path = $reference_genome.index.fields.path
+		-g ${index_path}.fa -idx $index_path 
+	  #end if
+
+
+	  ## prepare Rfam bowtie index
+	  #set rfam_index_path = ''
+	  #if str($reference_rfam.source) == "history":
+		  #set rfam_index_path = $reference_rfam.own_file
+		-rfam $rfam_index_path -v $v 
+	  #else:
+		  #set rfam_index_path = $reference_rfam.index.fields.path
+		-rfam ${rfam_index_path}.fa -idx2 $rfam_index_path -v $v 
+	  #end if
+
+
+
+   ## Do or not annotate siRNAs by function
+    #if $params.function_anno == "yes":
+
+      ## prepare bowtie index
+      #set nat_path = ''
+      #if str($params.nat_file.source) == "history":
+          #set nat_path = $params.nat_file.nat
+
+	  #else:
+          #set nat_path = $params.nat_file.index.fields.path
+	  #end if
+
+      ## prepare bowtie index
+      #set repeat_path = ''
+      #if str($params.repeat_file.source) == "history":
+          #set repeat_path = $params.repeat_file.repeat
+
+	  #else:
+          #set repeat_path = $params.repeat_file.index.fields.path
+	  #end if
+
+
+	 -nat $nat_path -repeat $repeat_path
+	#end if
+   
+   ## Do or not DEG
+    #if $degseq.degseq_analysis == "yes" :
+    -deg $degseq.deg
+    #end if
+
+  -format $format -phred $phred -f $gff -mis $mis -a $a -n $mapnt -d $d -p $p -l $l  -cen $cen -span $span > run.log
+
+  </command>
+
+  <inputs>
+
+   <repeat name="series" title="Raw sequence data file">
+     <param name="input" type="data" label="Raw data file"/>
+     <param name="tag" type="text" data_ref="input" label="Sample name of raw data"/>
+   </repeat>
+
+	<param name="format" type="select" label="raw data format" multiple="false">
+	  <option value="fastq">Raw data is fastq. format</option>
+	  <option value="fasta">Raw data is fasta. format</option>
+	</param>
+	
+	<param name="phred" type="select" label="input quals are Phred+64 or Phred+33" multiple="false">
+	  <option value="64">Phred+64</option>
+	  <option value="33" selected="true">Phred+33</option>
+	</param>
+
+        <conditional name="reference_genome">
+          <param name="source" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+              <options from_data_table="bowtie_indexes">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+          </when>
+        </conditional>	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
+
+	        <conditional name="reference_rfam">
+          <param name="source" type="select" label="Will you select a rfam reference  from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a non-coding reference " help="If your rfam of interest is not listed, contact the Galaxy team">
+              <options from_data_table="rfam_bowtie_indexes">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="own_file" type="data" format="fasta" metadata_name="dbkey" label="Select the reference" />
+          </when>
+        </conditional>
+
+	<param name="gff" type="data" label="gff file" />
+	<param name="mis" type="integer" value="0" label="number of allowed mismatches when mapping reads to genome" />
+	<param name="v" type="integer" value="0" label="report end-to-end hits less than v mismatches for non-coding alignment"/>
+	<param name="a" type="text" value="TGGAATTCTCGGGTGCCAAGG" label="3' adapter sequence" />
+	<param name="mapnt" type="integer" value="25" label="a read is allowed to map up to this number of positions in the genome" />
+	<param name="d" type="integer" value="100" label="distance of tag to merged a cluster" />
+
+	<param name="p" type="select" label="siRNA cluster identify method" multiple="false">
+	  <option value="F">conventional</option>
+	  <option value="T">NIBLES</option>
+	</param>
+	<param name="l" type="integer" value="1000" label="the length of the upstream and downstream,used in position annotate" />
+
+
+	<conditional name="params">
+		<param name="function_anno" type="select" label="Do or not annotate siRNAs by function">
+		  <option value="no" selected="true">no</option>
+		  <option value="yes">yes</option>
+		 </param>
+		 <when value="yes">
+
+
+        <conditional name="nat_file">
+          <param name="source" type="select" label="Will you select a atural antisense transcripts file from your history ?" help="down load from ***">
+            <option value="indexed">Use a built-in file</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a atural antisense transcripts file" help="If your species of interest is not listed, contact the Galaxy team">
+              <options from_data_table="nat_annotate">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No files are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+            <param name="nat" type="data" format="txt" metadata_name="dbkey" label="atural antisense transcripts file" />
+		  </when>
+        </conditional>	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
+
+        <conditional name="repeat_file">
+          <param name="source" type="select" label="Will you select a repeat information file from your history ?" help="down load from ***">
+            <option value="indexed">Use a built-in file</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a repeat information file" help="If your species of interest is not listed, contact the Galaxy team">
+              <options from_data_table="repeat_annotate">
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No files are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>
+          <when value="history">
+			<param name="repeat" type="data" metadata_name="dbkey" label="repeat information file out of Repeatmasker" />
+		  </when>
+        </conditional>	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
+		 </when>
+    </conditional> <!-- params -->
+	
+	<param name="cen" type="data" label="centromere file input" />
+	<param name="span" type="integer" value="50000" label="plot span" />
+	
+    <conditional name="degseq">
+       <param name="degseq_analysis" type="select" label="Do or not identify Difference Expression Clusters">
+		  <option value="no" selected="true">no</option>
+		  <option value="yes">yes</option>
+       </param>
+       <when value="yes">
+			<param name="deg" type="data" label="file config of de sample" />
+        </when>
+    </conditional>
+    
+  </inputs>
+
+  <outputs>
+   <data format="txt" name="siRNA cluster" from_work_dir="cluster_runs/total.result" label="${tool.name} on ${on_string}: siRNA cluster"/>
+   <data format="html" name="analysis result" from_work_dir="cluster_runs/result.html" label="${tool.name} on ${on_string}: analysis result"/>
+
+  </outputs>
+
+ <help>
+
+ </help>
+ </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie_indices.loc.sample	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18	hg18	hg18	/depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/nat_annotate.loc.sample	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18	hg18	hg18	/depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/repeat_annotate.loc.sample	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18	hg18	hg18	/depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/rfam_bowtie_indices.loc.sample	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18	hg18	hg18	/depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,24 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of indexes in the Bowtie mapper format -->
+    <table name="bowtie_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie_indices.loc" />
+    </table>
+    <!-- Locations of indexes in the Bowtie mapper format -->
+    <table name="rfam_bowtie_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/rfam_bowtie_indices.loc" />
+    </table>
+    <!-- Locations of repeat annotation file -->
+    <table name="repeat_annotate" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/repeat_annotate.loc" />
+    </table>
+    <!-- Locations of nat annotation file -->
+    <table name="nat_annotate" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/nat_annotate.loc" />
+    </table>
+
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Sun Jan 04 02:47:25 2015 -0500
@@ -0,0 +1,134 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="fastx_toolkit" version="0.0.13">
+        <repository changeset_revision="ec66ae4c269b" name="package_fastx_toolkit_0_0_13" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="bowtie" version="0.12.7">
+        <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <set_environment version="1.0">
+        <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>
+    </set_environment>
+	<!--package name="R" version="3.0.1">
+	   <repository name="package_r_3_0_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+	</package!-->
+
+	<package name="ViennaRNA" version="2.1.8">
+	   <install version="1.0">
+		   <actions>
+		     <action type="download_by_url">http://www.tbi.univie.ac.at/RNA/packages/source/ViennaRNA-2.1.8.tar.gz</action>
+			 <action type="shell_command">./configure --prefix=$INSTALL_DIR </action>
+			 <action type="shell_command">make</action>
+			 <action type="shell_command">make install</action>
+			 <action type="set_environment">
+			   <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+			 </action>
+		   </actions>
+	   </install>
+	</package>
+	<package name="R" version="3.0.1">
+	   <repository changeset_revision="c5ff6dd33c79" name="package_r_3_0_1" owner="iuc" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" />
+
+		<install version="1.0">
+		   <actions>
+		      <action type="set_environment_for_install">
+                    <repository changeset_revision="c5ff6dd33c79" name="package_r_3_0_1" owner="iuc" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu">
+                        <package name="R" version="3.0.1" />
+                    </repository>
+              </action>
+		      <action type="shell_command">R CMD BATCH $REPOSITORY_INSTALL_DIR/install_DEG.R </action>
+		      <action type="shell_command">echo "export PATH=$PATH" &gt; $INSTALL_DIR/env.sh </action>
+			  <action type="shell_command">chmod 755 $INSTALL_DIR/env.sh </action>
+
+		 </actions>
+		</install>
+	</package>
+
+	<package name="threads" version="1.96">
+	   <install version="1.0">
+		   <actions>
+		     <action type="download_by_url">http://www.cpan.org/authors/id/J/JD/JDHEDDEN/threads-1.96.tar.gz</action>
+			<action type="make_directory">$INSTALL_DIR/lib/perl5</action>
+			<action type="shell_command">
+				perl Makefile.PL INSTALL_BASE=$INSTALL_DIR &amp;&amp;
+				make &amp;&amp;
+				make install 
+			</action>
+			<action type="set_environment">
+				<environment_variable action="append_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5/:$INSTALL_DIR/lib/perl5/x86_64-linux-gnu-thread-multi/</environment_variable>	
+			</action>
+		   </actions>
+	   </install>
+	</package>
+
+
+	<package name="Parallel-ForkManager" version="1.06">
+	   <install version="1.0">
+		   <actions>
+		     <action type="download_by_url">http://www.cpan.org/authors/id/S/SZ/SZABGAB/Parallel-ForkManager-1.06.tar.gz</action>
+			<action type="make_directory">$INSTALL_DIR/lib/perl5</action>
+			<action type="shell_command">
+				perl Makefile.PL INSTALL_BASE=$INSTALL_DIR &amp;&amp;
+				make &amp;&amp;
+				make install 
+			</action>
+			<action type="set_environment">
+				<environment_variable action="append_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5/:$INSTALL_DIR/lib/perl5/x86_64-linux-gnu-thread-multi/</environment_variable>	
+			</action>
+		   </actions>
+	   </install>
+	</package>
+
+	<package name="Boost-Graph" version="1.4_001">
+	   <install version="1.0">
+		   <actions>
+		     <action type="download_by_url">http://www.cpan.org/authors/id/D/DU/DUFFEE/Boost-Graph-1.4_001.tar.gz</action>
+			<action type="make_directory">$INSTALL_DIR/lib/perl5</action>
+			<action type="shell_command">
+				perl Makefile.PL INSTALL_BASE=$INSTALL_DIR &amp;&amp;
+				ls &amp;&amp;
+				make &amp;&amp;
+				make install 
+			</action>
+			<action type="set_environment">
+				<environment_variable action="append_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5/:$INSTALL_DIR/lib/perl5/x86_64-linux-gnu-thread-multi/</environment_variable>	
+			</action>
+		   </actions>
+	   </install>
+	</package>
+
+	<package name="SVG" version="2.59">
+	   <install version="1.0">
+		   <actions>
+		     <action type="download_by_url">http://www.cpan.org/authors/id/S/SZ/SZABGAB/SVG-2.59.tar.gz</action>
+			<action type="make_directory">$INSTALL_DIR/lib/perl5</action>
+			<action type="shell_command">
+				perl Makefile.PL INSTALL_BASE=$INSTALL_DIR &amp;&amp;
+				make &amp;&amp;
+				make install 
+			</action>
+			<action type="set_environment">
+				<environment_variable action="append_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5/:$INSTALL_DIR/lib/perl5/x86_64-linux-gnu-thread-multi/</environment_variable>	
+			</action>
+		   </actions>
+	   </install>
+	</package>
+
+	<!--package name="parent" version="0.228">
+	   <install version="1.0">
+		   <actions>
+		     <action type="download_by_url">http://www.cpan.org/authors/id/C/CO/CORION/parent-0.228.tar.gz</action>
+			<action type="make_directory">$INSTALL_DIR/lib/perl5</action>
+			<action type="shell_command">
+				perl Makefile.PL INSTALL_BASE=$INSTALL_DIR &amp;&amp;
+				make &amp;&amp;
+				make install 
+			</action>
+			<action type="set_environment">
+				<environment_variable action="append_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5/:$INSTALL_DIR/lib/perl5/x86_64-linux-gnu-thread-multi/</environment_variable>	
+			</action>
+		   </actions>
+	   </install>
+	</package-->
+
+</tool_dependency>