changeset 49:f008ab2cadc6 draft

Uploaded
author big-tiandm
date Wed, 03 Dec 2014 02:03:27 -0500
parents 28ad3b598670
children 7b5a48b972e9
files miRPlant.pl miRPlant.xml tool-data/bowtie_indices.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 5 files changed, 681 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/miRPlant.pl	Wed Dec 03 02:03:27 2014 -0500
@@ -0,0 +1,506 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-4-22
+#Modified:
+#Description: plant microRNA prediction
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use threads;
+#use threads::shared;
+use File::Path;
+use File::Basename;
+#use RNA;
+#use Term::ANSIColor;
+
+my %opts;
+GetOptions(\%opts,"i:s@","tag:s@","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h");
+if (!(defined $opts{i} and defined $opts{format} and defined $opts{gfa} and defined $opts{pre} and defined $opts{mat}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $time=&Time();
+print "miPlant program start:\n   The time is $time!\n";
+print "Command line:\n   $0 @ARGV\n";
+
+my $format=$opts{'format'};
+if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { 
+	#&printErr();
+	die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
+}
+
+my $phred_qv=64;
+
+
+my @inputfiles=@{$opts{'i'}};
+my @inputtags=@{$opts{'tag'}};
+
+my  $mypath=`pwd`;
+chomp $mypath;
+
+my $dir=defined $opts{'o'} ? $opts{'o'} : "$mypath/miRPlant_out/";
+
+
+unless ($dir=~/\/$/) {$dir.="/";}
+if (not -d $dir) {
+	mkdir $dir;
+}
+my $config=$dir."/input_config";
+open CONFIG,">$config";
+	for (my $i=0;$i<@inputfiles;$i++) {
+		print CONFIG $inputfiles[$i],"\t",$inputtags[$i],"\n";
+	}
+close CONFIG;
+
+my $scipt_path=defined $opts{'path'} ? $opts{'path'} : "/Users/big/galaxy-dist/tools/myTools/";
+
+my $a="ATCTCGTATG";  #adapter
+if (defined $opts{'a'}) {$a=$opts{'a'};}
+
+my $m=6;  #adapter minimum mapped nt
+if (defined $opts{'M'}) {$m=$opts{'M'};}
+
+my $t=1; #threads number
+if (defined $opts{'t'}) {$t=$opts{'t'};}
+
+my $min_nt=19; # minimum reads length
+if (defined $opts{'min'}) {$min_nt=$opts{'min'};}
+
+my $max_nt=28; #maximum reads length
+if (defined $opts{'max'}) {$max_nt=$opts{'max'};}
+
+my $mis=0; #mismatch number for microRNA
+if (defined $opts{'mis'}) {$mis=$opts{'mis'};}
+
+my $mis_rfam=0;# mismatch number for rfam
+if (defined $opts{'v'}) {$mis_rfam=$opts{'v'};}
+
+my $hit=25; # maximum reads mapping hits in genome
+if (defined $opts{'r'}) {$hit=$opts{'r'};}
+
+my $upstream = 2; # microRNA 5' extension
+$upstream = $opts{'e'} if(defined $opts{'e'});
+
+my $downstream = 5;# microRNA 3' extension
+$downstream = $opts{'f'} if(defined $opts{'f'});
+
+my $maxd=defined $opts{'dis'} ? $opts{'dis'} : 200;
+my $flank=defined $opts{'flank'} ? $opts{'flank'} :10;
+my $mfe=defined $opts{'mfe'} ? $opts{'mfe'} : -20;
+
+$time=&Time();
+print "$time, Checking input file!\n";
+
+my (@filein,@mark,@clean);
+#&read_config();
+@filein=@inputfiles;
+@mark=@inputtags;
+
+&checkfa($opts{pre});
+&checkfa($opts{mat});
+&checkfa($opts{gfa});
+
+
+##### clip adpter --> clean data  start
+$time=&Time();
+print "$time, Preprocess:\n   trim adapter, reads collapse and filter reads by length.\n";
+
+$time=~s/:/-/g;
+$time=~s/ /-/g;
+my $preprocess=$dir."preProcess_${time}/";
+mkdir $preprocess;
+my $can_use_threads = eval 'use threads; 1';
+if ($can_use_threads) {
+# Do processing using threads
+	print "Do processing using threads\n";
+	my @filein1=@filein; my @mark1=@mark;
+	while (@filein1>0) {
+		my @thrs; my @res;
+		for (my $i=0;$i<$t ;$i++) {
+			last if(@filein1==0);
+			my $in=shift @filein1;
+			my $out=shift @mark1;
+			push @clean,$preprocess.$out."_clips_adapter.fq";
+			$thrs[$i]=threads->create(\&clips,$in,$out);
+		}
+		for (my $i=0;$i<@thrs;$i++) {
+			$res[$i]=$thrs[$i]->join();
+		}
+	}
+} else {
+# Do not processing using threads
+	print "Do not processing using threads\n";
+	for (my $i=0;$i<@filein ;$i++) {
+		my $in=$filein[$i];
+		my $out=$mark[$i];
+		push @clean,$preprocess.$out."_clips_adapter.fq";
+		&clips($in,$out);
+	}
+}
+
+##### clip adpter --> clean data  end
+
+my $collapsed=$preprocess."collapse_reads.fa";
+my $data=$preprocess."collapse_reads_${min_nt}_${max_nt}.fa"; ## raw clean data
+my $data2;   ### mirbase not mapped reads
+my $data3;   ### rfam not mapped reads
+&collapse(\@clean,$collapsed);   #collapse reads to tags
+
+&filterbylength();  # filter <$min_nt  && >$max_nt
+
+print "The final clean data file is $data, only contains reads which length is among $min_nt\~$max_nt\n\n";
+
+$time=Time();
+print "$time: known microRNA quantify!\n\n";
+
+chdir $dir;
+
+$time=~s/:/-/g;
+$time=~s/ /-/g;
+my $known_result=$dir."miRNA_Express_${time}/";
+&quantify(); ### known microRAN quantify
+
+
+#my $miR_exp_dir=&search($known_result,"miRNA_Express_");
+$data2=$known_result."/mirbase_not_mapped.fa";
+
+my $pathfile="$dir/path.txt";
+open PA,">$pathfile";
+print PA "$config\n";
+print PA "$preprocess\n";
+print PA "$known_result\n";
+
+if (defined $opts{'rfam'}) {              #rfam mapping and analysis
+	$time=Time();
+	print "$time: RNA annotate!\n\n";
+	$time=~s/:/-/g;
+	$time=~s/ /-/g;
+	my $rfam_exp_dir=$dir."rfam_match_${time}";
+	&rfam();
+	#my $rfam_exp_dir=&search($dir,"rfam_match_");
+	$data3=$rfam_exp_dir."/rfam_not_mapped.fa";
+print PA "$rfam_exp_dir\n";
+
+	my $tag=join "\\;" ,@mark;
+	system("perl $scipt_path/count_rfam_express.pl -i $rfam_exp_dir/rfam_mapped.bwt -tag $tag -o rfam_non-miRNA_annotation.txt");
+}
+
+my $data4=$data;
+if (defined $opts{'D'}) {                 #genome mapping 
+	$data4=$data3;
+}else{
+	$data4=$data2;
+}
+
+$time=Time();
+print "$time: Genome alignment!\n\n";
+$time=~s/:/-/g;
+$time=~s/ /-/g;
+my $genome_map=$dir."genome_match_${time}";
+&genome($data4);
+print PA "$genome_map\n";
+#my $genome_map=&search($dir,"genome_match_");
+my $mapfile=$genome_map."/genome_mapped.bwt";
+my $mapfa=$genome_map."/genome_mapped.fa";
+my $unmap=$genome_map."/genome_not_mapped.fa";
+
+#$time=Time();
+#print "$time: Novel microRNA prediction!\n\n";
+
+&predict($mapfa);
+
+close PA;
+system("perl $scipt_path/html.pl -i $pathfile -format $format -o $dir/result.html");
+
+$time=Time();
+print "$time: Program end!!\n";
+
+############################## sub programs ###################################
+sub predict{
+	my ($file)=@_;
+	$time=&Time();
+	print "$time: Novel microRNA prediction!\n\n";
+	$time=~s/:/-/g;
+	$time=~s/ /-/g;
+	my $predict=$dir."miRNA_predict_${time}";
+print PA "$predict\n";
+	mkdir $predict;
+	chdir $predict;
+	system("perl $scipt_path/precursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe");
+#	print "\nprecursors.pl -map $mapfile -g $opts{gfa} -d $maxd -f $flank -o $predict/excised_precursor.fa -s $predict/excised_precursor_struc.txt -e $mfe\n";
+	
+	system("bowtie-build -f excised_precursor.fa excised_precursor");
+#	print "\nbowtie-build -f excised_precursor.fa excised_precursor\n";
+	
+	system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt 2> run.log");
+#	print "\nbowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt\n";
+	
+	system("perl $scipt_path/convert_bowtie_to_blast.pl  precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst");
+#	print "\nconvert_bowtie_to_blast.pl  precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst\n";
+
+	system("sort -k 4  precursor_mapped.bst  > signatures.bst");
+#	print "\nsort +3 -25  precursor_mapped.bst  > ../signatures.bst\n";
+
+	chdir $dir;
+	system("perl $scipt_path/miRDeep_plant.pl $predict/signatures.bst $predict/excised_precursor_struc.txt novel_tmp_dir -y > microRNA_prediction.mrd");
+#	print "\nmiRDeep_plant.pl $dir/signatures.bst $predict/excised_precursor_struc.txt tmp_dir -y > microRNA_prediction.txt\n";
+	#system("rm novel_tmp_dir -rf");
+	my $tag=join "," ,@mark;
+	system("perl $scipt_path/miRNA_Express_and_sequence.pl -i microRNA_prediction.mrd -list novel_microRNA_express.txt -fa novel_microRNA_mature.fa -pre novel_microRNA_precursor.fa -tag $tag");
+}
+
+sub genome{
+	my ($file)=@_;
+	if(defined $opts{'idx'}){
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -index $opts{idx} -time $time\n";
+	}else{
+		system("perl $scipt_path/matching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time") ;
+#		print "\nmatching.pl -i $file -g $opts{gfa} -v $mis -p $t -r $hit -o $dir -time $time\n";
+	}
+}
+sub rfam{
+	if (defined $opts{'idx2'}) {
+		system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -index $opts{idx2} -time $time\n";
+	}else{
+		system("perl $scipt_path/rfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time");
+#		print "\nrfam.pl -i $data2 -ref $opts{rfam} -v $mis_rfam -p $t -o $dir -time $time\n";
+	}
+}
+sub quantify{
+	my $tag=join "\\;" ,@mark;
+	system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag");
+	print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n";
+}
+sub filterbylength{
+	my $tmpmark=join ",", @mark;
+	system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
+	system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt  -o $preprocess/length.html");
+#	print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n";
+
+}
+sub collapse{
+	my ($ins,$data)=@_;
+		my $str="";
+	for (my $i=0;$i<@{$ins};$i++) {
+		$str .="-i $$ins[$i] ";
+	}
+	system ("perl $scipt_path/collapseReads2Tags.pl $str -mark seq -o $data -format $format");
+#	print "\ncollapseReads2Tags.pl $str -mark seq -o $data -format $format\n";
+}
+
+sub clips{
+	my ($in,$out)=@_;
+	my $adapter=$preprocess.$out."_clips_adapter.fq";
+	if($format eq "fq" || $format eq "fastq"){
+		system("fastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter") ;
+		print "\nfastx_clipper -a $a -M $m -Q $phred_qv -i $in -o $adapter\n";
+	}
+	if($format eq "fa" || $format eq "fasta"){
+		system("fastx_clipper -a $a -M $m -i $in -o $adapter") ;
+        #		print "\nfastx_clipper -a $a -M $m -i $in -o $adapter\n";
+	}
+	#my $clean=$preprocess.$out."_clean.fq";
+	#system("filterReadsByLength.pl -i $adapter -o $clean -min $min_nt -max $max_nt ");
+
+	return;
+}
+
+sub read_config{
+	open CON,"<$config";
+	while (my $aline=<CON>) {
+		chomp $aline;
+		my @tmp=split/\t/,$aline;
+		push @filein,$tmp[0];
+		push @mark,$tmp[1];
+		&check_rawdata($tmp[0]);
+	}
+	close CON;
+	if (@filein != @mark) {
+		#&printErr();
+		die "Maybe config file have some wrong!!!\n";
+	}
+}
+sub check_rawdata{
+	my ($fileforcheck)=@_;
+	if (!(-s $fileforcheck)) {
+		#&printErr();
+		die "Can not find $fileforcheck, or file is empty!!!\n";
+	}
+	if ($format eq "fasta" || $format eq "fa") {
+		&checkfa($fileforcheck);
+	}
+	if ($format eq "fastq" || $format eq "fq") {
+		&checkfq($fileforcheck);
+	}
+}
+sub checkfa{
+	my ($file_reads)=@_;
+	open N,"<$file_reads";
+	my $line=<N>;
+	chomp $line;
+    if($line !~ /^>\S+/){
+        #printErr();
+        die "The first line of file $file_reads does not start with '>identifier'
+Reads file $file_reads is not a valid fasta file\n\n";
+    }
+    if(<N> !~ /^[ACGTNacgtn]*$/){
+        #printErr();
+        die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+    }
+	close N;
+}
+sub checkfq{
+	my ($file_reads)=@_;
+
+	open N,"<$file_reads";
+	for (my $i=0;$i<10;$i++) {
+		my $a=<N>;
+		my $b=<N>;
+		my $c=<N>;
+		my $d=<N>;
+		chomp $a;
+		chomp $b;
+		chomp $c;
+		chomp $d;
+		if($a!~/^\@/){
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if($b!~ /^[ACGTNacgtn]*$/){
+			#&printErr();
+			die "File $file_reads contains not allowed characters in sequences
+Allowed characters are ACGTN
+Reads file $file_reads is not a fasta file\n\n";
+		}
+		if ($c!~/^\@/ && $c!~/^\+/) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		if ((length $b) != (length $d)) {
+			#&printErr();
+			die "$file_reads is not a fastq file\n\n";
+		}
+		my @qv=split //,$d;
+		for (my $j=0;$j<@qv ;$j++) {
+			my $q=ord($qv[$j])-64;
+			if($q<0){$phred_qv=33;}
+		}
+	}
+	close N;
+}
+
+sub search{
+	my ($dir,$str)=@_;
+	opendir I,$dir;
+	my @ret;
+	while (my $file=readdir I) {
+		if ($file=~/$str/) {
+			push @ret, $file;
+		}
+	}
+	closedir I;
+	if (@ret != 1) {
+		#&printErr();
+
+		die "Can not find directory or file which name has string: $str !!!\n";
+	}
+	return $ret[0];
+}
+
+=cut
+
+sub printErr{
+    print STDERR color 'bold red';
+    print STDERR "Error: ";
+    print STDERR color 'reset';
+}
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day-$hour-$min-$sec");
+}
+=cut
+sub Time{
+        my $time=time();
+        my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
+        $month++;
+        $year+=1900;
+        if (length($sec) == 1) {$sec = "0"."$sec";}
+        if (length($min) == 1) {$min = "0"."$min";}
+        if (length($hour) == 1) {$hour = "0"."$hour";}
+        if (length($day) == 1) {$day = "0"."$day";}
+        if (length($month) == 1) {$month = "0"."$month";}
+        #print "$year-$month-$day $hour:$min:$sec\n";
+        return("$year-$month-$day $hour:$min:$sec");
+}
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+
+$0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t  -o  -path
+options:
+-i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
+-tag string # raw data file names, -tag xxx -tag xxx
+
+-format string,#specific input rawdata file format : fastq|fq|fasta|fa
+
+-path scirpt path
+
+-gfa string,  input file # genome fasta. sequence file
+-idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-pre string,  input file #species specific microRNA precursor sequences
+-mat string,  input file #species specific microRNA mature sequences
+
+-rfam string,  input file# rfam database file, microRNAs must not be contained in this file## if not define, rfam small RNA will not be count.
+-idx2 string,  rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter
+                string must be the prefix of the bowtie index. For instance, if
+                the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then
+                the prefix is 'h_sapiens_37_asm'.##can be null
+
+-D      If [-D] is specified,will discard rfam mapped reads(nead -rfam).
+
+-a string,  ADAPTER string. default is ATCTCGTATG.
+-M int,    require minimum adapter alignment length of N. If less than N nucleotides aligned with the adapter - don't clip it. 
+-min int,  reads min length,default is 19.
+-max int,  reads max length,default is 28.
+
+-mis [int]     number of allowed mismatches when mapping reads to precursors, default 0
+-e [int]     number of nucleotides upstream of the mature sequence to consider, default 2
+-f [int]     number of nucleotides downstream of the mature sequence to consider, default 5
+-v <int>     report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment
+-r int       a read is allowed to map up to this number of positions in the genome,default is 25 
+
+-dis <int>   Maximal space between miRNA and miRNA* (200)
+-flank <int>   Flank sequence length of miRNA precursor (10)
+-mfe <folat> Maximal free energy allowed for a miRNA precursor (-20)
+
+-t int,    number of threads [1]
+
+-o output directory# absolute path
+-h help
+USAGE
+exit(1);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/miRPlant.xml	Wed Dec 03 02:03:27 2014 -0500
@@ -0,0 +1,89 @@
+<tool id="plant_microRNA_v1" name="miRPlant" veision="1.0.0">
+  <description>tool for plant microRNA analisis</description>
+
+  <requirements>
+	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
+    <requirement type="package" version="0.12.7">bowtie</requirement>
+    <requirement type="set_environment">SCRIPT_PATH</requirement>
+    <!--requirement type="package" version="3.0.1">R</requirement!-->
+	<requirement type="package" version="2.59">SVG</requirement>
+	<requirement type="package" version="2.1.8">ViennaRNA</requirement>
+  </requirements>
+
+  <!--command interpreter="perl">miPlant.pl -i $input -format $format -gfa $gfa -idx $index -pre $pre -mat $mat -rfam $rfam -idx2 $idx2 -D $D -a $a -M $M -min $min -max $max -mis $mis -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe -t $t -o $output</command-->
+
+  <command interpreter="perl">miRPlant.pl 
+   ## Change this to accommodate the number of threads you have available.
+        -t \${GALAXY_SLOTS:-4}
+   ## Do or not delet rfam mapped tags
+    #if $params.delet_rfam == "yes":
+	-D 
+	#end if
+	-path \$SCRIPT_PATH
+
+    #for $j, $s in enumerate( $series )
+    ##rank_of_series=$j
+    -i ${s.input}
+    -tag ${s.tag}
+    #end for
+
+    -format $format -gfa $gfa -pre $pre -mat $mat -rfam $rfam  -a $a -M $mapnt -min $min -max $max -mis $mismatch -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe > run.log
+  </command>
+
+  <inputs>
+
+   <repeat name="series" title="Series">
+     <param name="input" type="data" label="Raw data"/>
+     <param name="tag" type="text" data_ref="input" label="Sample name of raw data"/>
+   </repeat>
+
+	<conditional name="params">
+		<param name="delet_rfam" type="select" label="delet rfam mapped reads">
+		  <option value="yes" selected="true">yes</option>
+		  <option value="no">no</option>
+		 </param>
+    </conditional> <!-- params -->
+
+	<!--param name="input" format="tabular"  type="data" label="input config file" /-->
+	
+	<param name="format" type="select" lable="raw data format" multiple="false">
+	  <option value="fastq">Raw data is fastq. format</option>
+	  <option value="fasta">Raw data is fasta. format</option>
+	</param>
+	
+	<param name="gfa"  type="data" label="genome sequence fasta file"/>
+	<!--param type="data" name="index" label="genome sequence bowtie index"/-->
+	<param name="mat" type="data" label="mature microRNA sequence file" />
+	<param name="pre" type="data" label="precursor microRNA sequence fie" />
+	<param name="rfam" type="data" label="rfam sequence file" />
+	<!--param type="data" name="idx2" label="rfam sequence bowtie index " -->
+	<param name="a" type="text" value="ATCTCGTATG" label="3' adapter sequence" />
+	<param name="mapnt" type="integer" value="8" label="minimum adapter map nts" />
+	<param name="min" type="integer" value="19" label="minimum microRNA length" />
+	<param name="max" type="integer" value="28" label="maximum microRNA length" />
+	<param name="mismatch" type="integer" value="0" label="number of allowed mismatches when mapping reads to precursors" />
+	<param name="e" type="integer" value="2" label="number of nucleotides upstream of the mature sequence to consider" />
+	<param name="f" type="integer" value="5" label="number of nucleotides downstream of the mature sequence to consider" />
+	<param name="v" type="integer" value="0" label="report end-to-end hits less than v mismatches"/>
+	<param name="r" type="integer" value="25" label="a read is allowed to map up to this number of positions in the genome" />
+	<param name="dis" type="integer" value="200" label="Maximal space between miRNA and miRNA*" />
+	<param name="flank" type="integer" value="10" label="Flank sequence length of miRNA precursor" />
+	<param name="mfe" type="float" value="-30" label="Maximal free energy allowed for a miRNA precursor" />
+  </inputs>
+
+  <outputs>
+   <data format="txt" name="known microRNA express list" from_work_dir="miRPlant_out/known_microRNA_express.txt" label="${tool.name} on ${on_string}: known microRNA express list"/>
+   <data format="txt" name="known microRNA express alignment" from_work_dir="miRPlant_out/known_microRNA_express.aln" label="${tool.name} on ${on_string}: known microRNA express alignment"/>
+   <data format="txt" name="known microRNA moRs result" from_work_dir="miRPlant_out/known_microRNA_express.moRs" label="${tool.name} on ${on_string}: known microRNA moRs result"/>
+   <data format="txt" name="known microRNA precursor file" from_work_dir="miRPlant_out/known_microRNA_precursor.fa" label="${tool.name} on ${on_string}: known microRNA precursor file"/>
+   <data format="txt" name="known microRNA mature file" from_work_dir="miRPlant_out/known_microRNA_mature.fa" label="${tool.name} on ${on_string}: known microRNA mature file"/>
+   <data format="txt" name="novel microRNA express list" from_work_dir="miRPlant_out/novel_microRNA_express.txt" label="${tool.name} on ${on_string}: novel microRNA express list"/>
+   <data format="txt" name="novel microRNA precursor file" from_work_dir="miRPlant_out/novel_microRNA_precursor.fa" label="${tool.name} on ${on_string}: novel microRNA precursor file"/>
+   <data format="txt" name="novel microRNA mature sequence file" from_work_dir="miRPlant_out/novel_microRNA_mature.fa" label="${tool.name} on ${on_string}: novel microRNA mature sequence file"/>
+   <data format="html" name="analysis result" from_work_dir="miRPlant_out/result.html" label="${tool.name} on ${on_string}: analysis result"/>
+  </outputs>
+
+ <help>
+
+ </help>
+ </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie_indices.loc.sample	Wed Dec 03 02:03:27 2014 -0500
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18	hg18	hg18	/depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
--- a/tool_data_table_conf.xml.sample	Wed Dec 03 01:58:46 2014 -0500
+++ b/tool_data_table_conf.xml.sample	Wed Dec 03 02:03:27 2014 -0500
@@ -1,6 +1,6 @@
 <!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
 <tables>
-    <!-- Locations of indexes in the Bowtie2 mapper format -->
+    <!-- Locations of indexes in the Bowtie mapper format -->
     <table name="bowtie_indexes" comment_char="#">
         <columns>value, dbkey, name, path</columns>
         <file path="tool-data/bowtie_indices.loc" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Dec 03 02:03:27 2014 -0500
@@ -0,0 +1,48 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="fastx_toolkit" version="0.0.13">
+        <repository changeset_revision="ec66ae4c269b" name="package_fastx_toolkit_0_0_13" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="bowtie" version="0.12.7">
+        <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <set_environment version="1.0">
+        <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>
+    </set_environment>
+	<!--package name="R" version="3.0.1">
+	   <repository name="package_r_3_0_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+	</package!-->
+
+	<package name="ViennaRNA" version="2.1.8">
+	   <install version="1.0">
+		   <actions>
+		     <action type="download_by_url">http://www.tbi.univie.ac.at/RNA/packages/source/ViennaRNA-2.1.8.tar.gz</action>
+			 <action type="shell_command">./configure --prefix=$INSTALL_DIR </action>
+			 <action type="shell_command">make</action>
+			 <action type="shell_command">make install</action>
+			 <action type="set_environment">
+			   <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+			 </action>
+		   </actions>
+	   </install>
+	</package>
+
+	<package name="SVG" version="2.59">
+	   <install version="1.0">
+		   <actions>
+		     <action type="download_by_url">http://www.cpan.org/authors/id/S/SZ/SZABGAB/SVG-2.59.tar.gz</action>
+			<action type="make_directory">$INSTALL_DIR/lib/perl5</action>
+			<action type="shell_command">
+				perl Makefile.PL INSTALL_BASE=$INSTALL_DIR &amp;&amp;
+				make &amp;&amp;
+				make install 
+			</action>
+			<action type="set_environment">
+				<environment_variable action="append_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5/:$INSTALL_DIR/lib/perl5/x86_64-linux-gnu-thread-multi/</environment_variable>	
+			</action>
+		   </actions>
+	   </install>
+	</package>
+
+
+</tool_dependency>