changeset 46:ca05d68aca13 draft

Uploaded
author big-tiandm
date Thu, 13 Nov 2014 22:43:35 -0500
parents 2cb6add23dfe
children c75593f79aa9
files DEGseq.pl filterReadsByLength.pl miRDeep_plant.pl miRPlant.pl miRPlant.xml precursors.pl quantify.pl tool_dependencies.xml
diffstat 8 files changed, 94 insertions(+), 107 deletions(-) [+]
line wrap: on
line diff
--- a/DEGseq.pl	Thu Oct 30 21:29:35 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-#!/usr/bin/perl -w
-#Filename:
-#Author: Tian Dongmei
-#Email: tiandm@big.ac.cn
-#Date: 2009-05-06
-#Modified:
-#Description: 
-my $version=1.00;
-
-use strict;
-use Getopt::Long;
-use File::Basename;
-
-my %opts;
-GetOptions(\%opts,"i=s","outdir=s","column1:i","mark1=s","depth1:i","depth2:i","column2:i","mark2=s","h");
-if (!(defined $opts{i} and defined $opts{outdir} and defined $opts{mark1} and defined $opts{mark2}) || defined $opts{h}) { #necessary arguments
-&usage;
-}
-
-my $filein=$opts{'i'};
-my $outputdir=$opts{'outdir'};
-unless ($outputdir=~/\/$/) {$outputdir .="/";}
-my $column1=defined $opts{column1} ? $opts{column1} : 3;
-my $column2=defined $opts{column2} ? $opts{column2} : 4;
-my $mark1=$opts{mark1};
-my $mark2=$opts{mark2};
-my $fileout=$outputdir."degseq.R";
-
-open OUT,">$fileout"; #output file  
-
-print OUT "library(DEGseq)\n";
-print OUT "geneExpFile <- system.file(package=\"DEGseq\")\n";
-print OUT "geneExpFile<-file.path(\"$filein\")\n";
-print OUT "layout(matrix(c(1,2,3,4,5,6), 3, 2, byrow=TRUE))\npar(mar=c(2, 2, 2,2))\n";
-print OUT "outputdir<-file.path(\"$outputdir\")\n";
-print OUT "geneExpMatrix1 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column1))\n";
-print OUT "geneExpMatrix2 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column2))\n";
-if(defined $opts{'depth1'} && defined $opts{'depth2'}){
-print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",depth1=$opts{depth1},depth2=$opts{depth2},outputDir=outputdir,method=\"MARS\")\n";
-}
-else{
-print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",outputDir=outputdir,method=\"MARS\")\n";
-}
-close OUT;
-
-system("R CMD BATCH $fileout");
-
-sub usage{
-print <<"USAGE";
-Version $version
-Usage:
-$0 -i -outdir -column1 -mark1 -column2 -mark2 -depth1 -depth2
-options:
--i input file
--outdir output file dir
--column1 the first column for DEGseq
--mark1 the name of the column1
--depth1 depth for the first file,use for normalize
--column2 the second column for DEGseq
--mark2 the name of the column2
--depth2 depth for the second file,use for normalize
-
--h help
-USAGE
-exit(1);
-}
-
--- a/filterReadsByLength.pl	Thu Oct 30 21:29:35 2014 -0400
+++ b/filterReadsByLength.pl	Thu Nov 13 22:43:35 2014 -0500
@@ -97,7 +97,7 @@
 close OUT;
 close R;
 
-system ("R CMD BATCH $dir/length_distribution.R");
+#system ("R CMD BATCH $dir/length_distribution.R");
 
 #system ("rm $dir/length_distribution.R");
 #system ("rm $dir/length_distribution.Rout");
--- a/miRDeep_plant.pl	Thu Oct 30 21:29:35 2014 -0400
+++ b/miRDeep_plant.pl	Thu Nov 13 22:43:35 2014 -0500
@@ -3,7 +3,7 @@
 use warnings;
 use strict;
 use Getopt::Std;
-use RNA;
+#use RNA;
 
 
 ################################# MIRDEEP #################################################
@@ -385,14 +385,26 @@
 	my $cpt_ega = 1;
 	
 	my ($seq,$number_of_randomizations)=@_;
-	my $str =$seq;
-	my $mfe = RNA::fold($seq,$str);
+	#my $str =$seq;
+	#my $mfe = RNA::fold($seq,$str);
+		my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
+		my @rawfolds=split/\s+/,$rnafold;
+		my $str=$rawfolds[1];
+		my $mfe=$rawfolds[-1];
+		$mfe=~s/\(//;
+		$mfe=~s/\)//;
 
 	for (my $i=0;$i<$number_of_randomizations;$i++) {
 		$seq = shuffle_sequence_dinucleotide($seq);
-		$str = $seq;
+		#$str = $seq;
 	
-		my $rand_mfe = RNA::fold($str,$str);
+		#my $rand_mfe = RNA::fold($str,$str);
+		$rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
+		my @rawfolds=split/\s+/,$rnafold;
+		my $str=$rawfolds[1];
+		my $rand_mfe=$rawfolds[-1];
+		$rand_mfe=~s/\(//;
+		$rand_mfe=~s/\)//;
 	
 		if ($rand_mfe < $mfe) {
 			$cpt_inf++;
--- a/miRPlant.pl	Thu Oct 30 21:29:35 2014 -0400
+++ b/miRPlant.pl	Thu Nov 13 22:43:35 2014 -0500
@@ -10,11 +10,11 @@
 use strict;
 use Getopt::Long;
 use threads;
-use threads::shared;
+#use threads::shared;
 use File::Path;
 use File::Basename;
-use RNA;
-use Term::ANSIColor;
+#use RNA;
+#use Term::ANSIColor;
 
 my %opts;
 GetOptions(\%opts,"i:s@","tag:s@","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h");
@@ -28,7 +28,7 @@
 
 my $format=$opts{'format'};
 if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { 
-	&printErr();
+	#&printErr();
 	die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n";
 }
 
@@ -274,7 +274,7 @@
 sub quantify{
 	my $tag=join "\\;" ,@mark;
 	system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag");
-#	print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n";
+	print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n";
 }
 sub filterbylength{
 	my $tmpmark=join ",", @mark;
@@ -321,14 +321,14 @@
 	}
 	close CON;
 	if (@filein != @mark) {
-		&printErr();
+		#&printErr();
 		die "Maybe config file have some wrong!!!\n";
 	}
 }
 sub check_rawdata{
 	my ($fileforcheck)=@_;
 	if (!(-s $fileforcheck)) {
-		&printErr();
+		#&printErr();
 		die "Can not find $fileforcheck, or file is empty!!!\n";
 	}
 	if ($format eq "fasta" || $format eq "fa") {
@@ -344,12 +344,12 @@
 	my $line=<N>;
 	chomp $line;
     if($line !~ /^>\S+/){
-        printErr();
+        #printErr();
         die "The first line of file $file_reads does not start with '>identifier'
 Reads file $file_reads is not a valid fasta file\n\n";
     }
     if(<N> !~ /^[ACGTNacgtn]*$/){
-        printErr();
+        #printErr();
         die "File $file_reads contains not allowed characters in sequences
 Allowed characters are ACGTN
 Reads file $file_reads is not a fasta file\n\n";
@@ -370,21 +370,21 @@
 		chomp $c;
 		chomp $d;
 		if($a!~/^\@/){
-			&printErr();
+			#&printErr();
 			die "$file_reads is not a fastq file\n\n";
 		}
 		if($b!~ /^[ACGTNacgtn]*$/){
-			&printErr();
+			#&printErr();
 			die "File $file_reads contains not allowed characters in sequences
 Allowed characters are ACGTN
 Reads file $file_reads is not a fasta file\n\n";
 		}
 		if ($c!~/^\@/ && $c!~/^\+/) {
-			&printErr();
+			#&printErr();
 			die "$file_reads is not a fastq file\n\n";
 		}
 		if ((length $b) != (length $d)) {
-			&printErr();
+			#&printErr();
 			die "$file_reads is not a fastq file\n\n";
 		}
 		my @qv=split //,$d;
@@ -407,19 +407,20 @@
 	}
 	closedir I;
 	if (@ret != 1) {
-		&printErr();
+		#&printErr();
 
 		die "Can not find directory or file which name has string: $str !!!\n";
 	}
 	return $ret[0];
 }
 
+=cut
+
 sub printErr{
     print STDERR color 'bold red';
     print STDERR "Error: ";
     print STDERR color 'reset';
 }
-=cut
 sub Time{
         my $time=time();
         my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6];
@@ -453,15 +454,16 @@
 print <<"USAGE";
 Version $version
 Usage:
+
 $0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t  -o  -path
 options:
--i string,  input file#input files information file
-		/path/filename	mark
-		/path/filename	mark
-		...
+-i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ...
+-tag string # raw data file names, -tag xxx -tag xxx
 
 -format string,#specific input rawdata file format : fastq|fq|fasta|fa
 
+-path scirpt path
+
 -gfa string,  input file # genome fasta. sequence file
 -idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter
                 string must be the prefix of the bowtie index. For instance, if
--- a/miRPlant.xml	Thu Oct 30 21:29:35 2014 -0400
+++ b/miRPlant.xml	Thu Nov 13 22:43:35 2014 -0500
@@ -2,11 +2,11 @@
   <description>tool for plant microRNA analisis</description>
 
   <requirements>
-    <requirement type="set_environment">SCRIPT_PATH</requirement>
+	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
     <requirement type="package" version="0.12.7">bowtie</requirement>
-    <requirement type="package" version="3.0.1">R</requirement>
-	<requirement type="package" version="0.0.13">fastx_toolkit </requirement>
-	<requirement type="package" version="1.5.0">libx11</requirement>
+    <requirement type="set_environment">SCRIPT_PATH</requirement>
+    <!--requirement type="package" version="3.0.1">R</requirement!-->
+	<requirement type="package" version="2.59">SVG</requirement>
 	<requirement type="package" version="2.1.8">ViennaRNA</requirement>
   </requirements>
 
--- a/precursors.pl	Thu Oct 30 21:29:35 2014 -0400
+++ b/precursors.pl	Thu Nov 13 22:43:35 2014 -0500
@@ -9,7 +9,7 @@
 
 use strict;
 use Getopt::Long;
-use RNA;
+#use RNA;
 
 my %opts;
 GetOptions(\%opts,"map=s","g=s","d:i","f:i","o=s","e:f","s=s","h");
@@ -17,6 +17,7 @@
 &usage;
 }
 
+my $checkno=1;
 my $filein=$opts{'map'};
 my $faout=$opts{'o'};
 my $strout=$opts{'s'};
@@ -237,8 +238,14 @@
 	$tag2_end=$tag2_beg+length($tag2)-1;
 
 	# fold
-	my ($struct,$mfe)=RNA::fold($seq);
-	$mfe=sprintf "%.2f", $mfe;
+	#my ($struct,$mfe)=RNA::fold($seq);
+		my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
+		my @rawfolds=split/\s+/,$rnafold;
+		my $struct=$rawfolds[1];
+		my $mfe=$rawfolds[-1];
+		$mfe=~s/\(//;
+		$mfe=~s/\)//;
+	#$mfe=sprintf "%.2f", $mfe;
 	if ($mfe > $MAX_ENERGY) {return 0;}
 
 	# tag1
@@ -331,8 +338,14 @@
 	
 	
 	# fold
-	my ($struct,$mfe)=RNA::fold($seq);
-	$mfe=sprintf "%.2f",$mfe;
+	#my ($struct,$mfe)=RNA::fold($seq);
+		my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`;
+		my @rawfolds=split/\s+/,$rnafold;
+		my $struct=$rawfolds[1];
+		my $mfe=$rawfolds[-1];
+		$mfe=~s/\(//;
+		$mfe=~s/\)//;
+	
 	if ($mfe > $MAX_ENERGY) {
 		$pass=0;
 		return $pass;
--- a/quantify.pl	Thu Oct 30 21:29:35 2014 -0400
+++ b/quantify.pl	Thu Nov 13 22:43:35 2014 -0500
@@ -12,7 +12,7 @@
 use File::Basename;
 #use Getopt::Std;
 use Getopt::Long;
-use RNA;
+#use RNA;
 
 my %opts;
 GetOptions(\%opts,"r=s","p=s","m=s","mis:i","t:i","e:i","f:i","tag:s","o=s","time:s","h");
@@ -297,9 +297,17 @@
 sub structure{
 	foreach my $key (keys %pre_mature) {
 		if (!(defined $pre{$key})){die "!!!!! No precursor sequence $key, please check it!\n";}
-		my ($str,$mfe)=RNA::fold($pre{$key});
+		#my ($str,$mfe)=RNA::fold($pre{$key});
+		my $rnafold=`perl -e 'print "$pre{$key}"' | RNAfold --noPS`;
+		my @rnafolds=split/\s+/,$rnafold;
+		my $str=$rnafolds[1];
+		my $mfe=$rnafolds[-1];
+		$mfe=~s/\(//;
+		$mfe=~s/\)//;
+
 		$struc{$key}{"struc"}=$str;
-		$struc{$key}{"mfe"}=sprintf ("%.2f",$mfe);
+		#$struc{$key}{"mfe"}=sprintf ("%.2f",$mfe);
+		$struc{$key}{"mfe"}=$mfe;
 
 		foreach my $id (keys %{$pre_mature{$key}}) {
 			($pre_mature{$key}{$id}{"star"}[0],$pre_mature{$key}{$id}{"star"}[1])=&other_pair($pre_mature{$key}{$id}{"mature"}[0],$pre_mature{$key}{$id}{"mature"}[1],$str);
--- a/tool_dependencies.xml	Thu Oct 30 21:29:35 2014 -0400
+++ b/tool_dependencies.xml	Thu Nov 13 22:43:35 2014 -0500
@@ -9,9 +9,9 @@
     <set_environment version="1.0">
         <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>
     </set_environment>
-	<package name="R" version="3.0.1">
-	   <repository changeset_revision="c5ff6dd33c79" name="package_r_3_0_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
-	</package>
+	<!--package name="R" version="3.0.1">
+	   <repository name="package_r_3_0_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+	</package!-->
 
 	<package name="ViennaRNA" version="2.1.8">
 	   <install version="1.0">
@@ -26,4 +26,23 @@
 		   </actions>
 	   </install>
 	</package>
+
+	<package name="SVG" version="2.59">
+	   <install version="1.0">
+		   <actions>
+		     <action type="download_by_url">http://www.cpan.org/authors/id/S/SZ/SZABGAB/SVG-2.59.tar.gz</action>
+			<action type="make_directory">$INSTALL_DIR/lib/perl5</action>
+			<action type="shell_command">
+				perl Makefile.PL INSTALL_BASE=$INSTALL_DIR &amp;&amp;
+				make &amp;&amp;
+				make install 
+			</action>
+			<action type="set_environment">
+				<environment_variable action="append_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5/:$INSTALL_DIR/lib/perl5/x86_64-linux-gnu-thread-multi/</environment_variable>	
+			</action>
+		   </actions>
+	   </install>
+	</package>
+
+
 </tool_dependency>