Mercurial > repos > big-tiandm > mirplant2
changeset 46:ca05d68aca13 draft
Uploaded
author | big-tiandm |
---|---|
date | Thu, 13 Nov 2014 22:43:35 -0500 |
parents | 2cb6add23dfe |
children | c75593f79aa9 |
files | DEGseq.pl filterReadsByLength.pl miRDeep_plant.pl miRPlant.pl miRPlant.xml precursors.pl quantify.pl tool_dependencies.xml |
diffstat | 8 files changed, 94 insertions(+), 107 deletions(-) [+] |
line wrap: on
line diff
--- a/DEGseq.pl Thu Oct 30 21:29:35 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2009-05-06 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","outdir=s","column1:i","mark1=s","depth1:i","depth2:i","column2:i","mark2=s","h"); -if (!(defined $opts{i} and defined $opts{outdir} and defined $opts{mark1} and defined $opts{mark2}) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $outputdir=$opts{'outdir'}; -unless ($outputdir=~/\/$/) {$outputdir .="/";} -my $column1=defined $opts{column1} ? $opts{column1} : 3; -my $column2=defined $opts{column2} ? $opts{column2} : 4; -my $mark1=$opts{mark1}; -my $mark2=$opts{mark2}; -my $fileout=$outputdir."degseq.R"; - -open OUT,">$fileout"; #output file - -print OUT "library(DEGseq)\n"; -print OUT "geneExpFile <- system.file(package=\"DEGseq\")\n"; -print OUT "geneExpFile<-file.path(\"$filein\")\n"; -print OUT "layout(matrix(c(1,2,3,4,5,6), 3, 2, byrow=TRUE))\npar(mar=c(2, 2, 2,2))\n"; -print OUT "outputdir<-file.path(\"$outputdir\")\n"; -print OUT "geneExpMatrix1 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column1))\n"; -print OUT "geneExpMatrix2 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column2))\n"; -if(defined $opts{'depth1'} && defined $opts{'depth2'}){ -print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",depth1=$opts{depth1},depth2=$opts{depth2},outputDir=outputdir,method=\"MARS\")\n"; -} -else{ -print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",outputDir=outputdir,method=\"MARS\")\n"; -} -close OUT; - -system("R CMD BATCH $fileout"); - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -outdir -column1 -mark1 -column2 -mark2 -depth1 -depth2 -options: --i input file --outdir output file dir --column1 the first column for DEGseq --mark1 the name of the column1 --depth1 depth for the first file,use for normalize --column2 the second column for DEGseq --mark2 the name of the column2 --depth2 depth for the second file,use for normalize - --h help -USAGE -exit(1); -} -
--- a/filterReadsByLength.pl Thu Oct 30 21:29:35 2014 -0400 +++ b/filterReadsByLength.pl Thu Nov 13 22:43:35 2014 -0500 @@ -97,7 +97,7 @@ close OUT; close R; -system ("R CMD BATCH $dir/length_distribution.R"); +#system ("R CMD BATCH $dir/length_distribution.R"); #system ("rm $dir/length_distribution.R"); #system ("rm $dir/length_distribution.Rout");
--- a/miRDeep_plant.pl Thu Oct 30 21:29:35 2014 -0400 +++ b/miRDeep_plant.pl Thu Nov 13 22:43:35 2014 -0500 @@ -3,7 +3,7 @@ use warnings; use strict; use Getopt::Std; -use RNA; +#use RNA; ################################# MIRDEEP ################################################# @@ -385,14 +385,26 @@ my $cpt_ega = 1; my ($seq,$number_of_randomizations)=@_; - my $str =$seq; - my $mfe = RNA::fold($seq,$str); + #my $str =$seq; + #my $mfe = RNA::fold($seq,$str); + my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`; + my @rawfolds=split/\s+/,$rnafold; + my $str=$rawfolds[1]; + my $mfe=$rawfolds[-1]; + $mfe=~s/\(//; + $mfe=~s/\)//; for (my $i=0;$i<$number_of_randomizations;$i++) { $seq = shuffle_sequence_dinucleotide($seq); - $str = $seq; + #$str = $seq; - my $rand_mfe = RNA::fold($str,$str); + #my $rand_mfe = RNA::fold($str,$str); + $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`; + my @rawfolds=split/\s+/,$rnafold; + my $str=$rawfolds[1]; + my $rand_mfe=$rawfolds[-1]; + $rand_mfe=~s/\(//; + $rand_mfe=~s/\)//; if ($rand_mfe < $mfe) { $cpt_inf++;
--- a/miRPlant.pl Thu Oct 30 21:29:35 2014 -0400 +++ b/miRPlant.pl Thu Nov 13 22:43:35 2014 -0500 @@ -10,11 +10,11 @@ use strict; use Getopt::Long; use threads; -use threads::shared; +#use threads::shared; use File::Path; use File::Basename; -use RNA; -use Term::ANSIColor; +#use RNA; +#use Term::ANSIColor; my %opts; GetOptions(\%opts,"i:s@","tag:s@","format=s","gfa=s","pre=s","mat=s","rfam:s","dis:i","flank:i","mfe:f","idx:s","idx2:s","mis:i","r:i","v:i","e:i","f:i","a:s","M:i","t:i","min:i","max:i","o:s","path:s","D","h"); @@ -28,7 +28,7 @@ my $format=$opts{'format'}; if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { - &printErr(); + #&printErr(); die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; } @@ -274,7 +274,7 @@ sub quantify{ my $tag=join "\\;" ,@mark; system("perl $scipt_path/quantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag"); -# print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n"; + print "\nquantify.pl -p $opts{pre} -m $opts{mat} -r $data -o $dir -time $time -mis $mis -t $t -e $upstream -f $downstream -tag $tag\n"; } sub filterbylength{ my $tmpmark=join ",", @mark; @@ -321,14 +321,14 @@ } close CON; if (@filein != @mark) { - &printErr(); + #&printErr(); die "Maybe config file have some wrong!!!\n"; } } sub check_rawdata{ my ($fileforcheck)=@_; if (!(-s $fileforcheck)) { - &printErr(); + #&printErr(); die "Can not find $fileforcheck, or file is empty!!!\n"; } if ($format eq "fasta" || $format eq "fa") { @@ -344,12 +344,12 @@ my $line=<N>; chomp $line; if($line !~ /^>\S+/){ - printErr(); + #printErr(); die "The first line of file $file_reads does not start with '>identifier' Reads file $file_reads is not a valid fasta file\n\n"; } if(<N> !~ /^[ACGTNacgtn]*$/){ - printErr(); + #printErr(); die "File $file_reads contains not allowed characters in sequences Allowed characters are ACGTN Reads file $file_reads is not a fasta file\n\n"; @@ -370,21 +370,21 @@ chomp $c; chomp $d; if($a!~/^\@/){ - &printErr(); + #&printErr(); die "$file_reads is not a fastq file\n\n"; } if($b!~ /^[ACGTNacgtn]*$/){ - &printErr(); + #&printErr(); die "File $file_reads contains not allowed characters in sequences Allowed characters are ACGTN Reads file $file_reads is not a fasta file\n\n"; } if ($c!~/^\@/ && $c!~/^\+/) { - &printErr(); + #&printErr(); die "$file_reads is not a fastq file\n\n"; } if ((length $b) != (length $d)) { - &printErr(); + #&printErr(); die "$file_reads is not a fastq file\n\n"; } my @qv=split //,$d; @@ -407,19 +407,20 @@ } closedir I; if (@ret != 1) { - &printErr(); + #&printErr(); die "Can not find directory or file which name has string: $str !!!\n"; } return $ret[0]; } +=cut + sub printErr{ print STDERR color 'bold red'; print STDERR "Error: "; print STDERR color 'reset'; } -=cut sub Time{ my $time=time(); my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; @@ -453,15 +454,16 @@ print <<"USAGE"; Version $version Usage: + $0 -i -format -gfa -index -pre -mat -rfam -D -a -M -min -max -mis -e -f -v -t -o -path options: --i string, input file#input files information file - /path/filename mark - /path/filename mark - ... +-i input files, # raw data file, can be multipe eg. -i xxx.fq -i xxx .fq ... +-tag string # raw data file names, -tag xxx -tag xxx -format string,#specific input rawdata file format : fastq|fq|fasta|fa +-path scirpt path + -gfa string, input file # genome fasta. sequence file -idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter string must be the prefix of the bowtie index. For instance, if
--- a/miRPlant.xml Thu Oct 30 21:29:35 2014 -0400 +++ b/miRPlant.xml Thu Nov 13 22:43:35 2014 -0500 @@ -2,11 +2,11 @@ <description>tool for plant microRNA analisis</description> <requirements> - <requirement type="set_environment">SCRIPT_PATH</requirement> + <requirement type="package" version="0.0.13">fastx_toolkit </requirement> <requirement type="package" version="0.12.7">bowtie</requirement> - <requirement type="package" version="3.0.1">R</requirement> - <requirement type="package" version="0.0.13">fastx_toolkit </requirement> - <requirement type="package" version="1.5.0">libx11</requirement> + <requirement type="set_environment">SCRIPT_PATH</requirement> + <!--requirement type="package" version="3.0.1">R</requirement!--> + <requirement type="package" version="2.59">SVG</requirement> <requirement type="package" version="2.1.8">ViennaRNA</requirement> </requirements>
--- a/precursors.pl Thu Oct 30 21:29:35 2014 -0400 +++ b/precursors.pl Thu Nov 13 22:43:35 2014 -0500 @@ -9,7 +9,7 @@ use strict; use Getopt::Long; -use RNA; +#use RNA; my %opts; GetOptions(\%opts,"map=s","g=s","d:i","f:i","o=s","e:f","s=s","h"); @@ -17,6 +17,7 @@ &usage; } +my $checkno=1; my $filein=$opts{'map'}; my $faout=$opts{'o'}; my $strout=$opts{'s'}; @@ -237,8 +238,14 @@ $tag2_end=$tag2_beg+length($tag2)-1; # fold - my ($struct,$mfe)=RNA::fold($seq); - $mfe=sprintf "%.2f", $mfe; + #my ($struct,$mfe)=RNA::fold($seq); + my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`; + my @rawfolds=split/\s+/,$rnafold; + my $struct=$rawfolds[1]; + my $mfe=$rawfolds[-1]; + $mfe=~s/\(//; + $mfe=~s/\)//; + #$mfe=sprintf "%.2f", $mfe; if ($mfe > $MAX_ENERGY) {return 0;} # tag1 @@ -331,8 +338,14 @@ # fold - my ($struct,$mfe)=RNA::fold($seq); - $mfe=sprintf "%.2f",$mfe; + #my ($struct,$mfe)=RNA::fold($seq); + my $rnafold=`perl -e 'print "$seq"' | RNAfold --noPS`; + my @rawfolds=split/\s+/,$rnafold; + my $struct=$rawfolds[1]; + my $mfe=$rawfolds[-1]; + $mfe=~s/\(//; + $mfe=~s/\)//; + if ($mfe > $MAX_ENERGY) { $pass=0; return $pass;
--- a/quantify.pl Thu Oct 30 21:29:35 2014 -0400 +++ b/quantify.pl Thu Nov 13 22:43:35 2014 -0500 @@ -12,7 +12,7 @@ use File::Basename; #use Getopt::Std; use Getopt::Long; -use RNA; +#use RNA; my %opts; GetOptions(\%opts,"r=s","p=s","m=s","mis:i","t:i","e:i","f:i","tag:s","o=s","time:s","h"); @@ -297,9 +297,17 @@ sub structure{ foreach my $key (keys %pre_mature) { if (!(defined $pre{$key})){die "!!!!! No precursor sequence $key, please check it!\n";} - my ($str,$mfe)=RNA::fold($pre{$key}); + #my ($str,$mfe)=RNA::fold($pre{$key}); + my $rnafold=`perl -e 'print "$pre{$key}"' | RNAfold --noPS`; + my @rnafolds=split/\s+/,$rnafold; + my $str=$rnafolds[1]; + my $mfe=$rnafolds[-1]; + $mfe=~s/\(//; + $mfe=~s/\)//; + $struc{$key}{"struc"}=$str; - $struc{$key}{"mfe"}=sprintf ("%.2f",$mfe); + #$struc{$key}{"mfe"}=sprintf ("%.2f",$mfe); + $struc{$key}{"mfe"}=$mfe; foreach my $id (keys %{$pre_mature{$key}}) { ($pre_mature{$key}{$id}{"star"}[0],$pre_mature{$key}{$id}{"star"}[1])=&other_pair($pre_mature{$key}{$id}{"mature"}[0],$pre_mature{$key}{$id}{"mature"}[1],$str);
--- a/tool_dependencies.xml Thu Oct 30 21:29:35 2014 -0400 +++ b/tool_dependencies.xml Thu Nov 13 22:43:35 2014 -0500 @@ -9,9 +9,9 @@ <set_environment version="1.0"> <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable> </set_environment> - <package name="R" version="3.0.1"> - <repository changeset_revision="c5ff6dd33c79" name="package_r_3_0_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> + <!--package name="R" version="3.0.1"> + <repository name="package_r_3_0_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package!--> <package name="ViennaRNA" version="2.1.8"> <install version="1.0"> @@ -26,4 +26,23 @@ </actions> </install> </package> + + <package name="SVG" version="2.59"> + <install version="1.0"> + <actions> + <action type="download_by_url">http://www.cpan.org/authors/id/S/SZ/SZABGAB/SVG-2.59.tar.gz</action> + <action type="make_directory">$INSTALL_DIR/lib/perl5</action> + <action type="shell_command"> + perl Makefile.PL INSTALL_BASE=$INSTALL_DIR && + make && + make install + </action> + <action type="set_environment"> + <environment_variable action="append_to" name="PERL5LIB">$INSTALL_DIR/lib/perl5/:$INSTALL_DIR/lib/perl5/x86_64-linux-gnu-thread-multi/</environment_variable> + </action> + </actions> + </install> + </package> + + </tool_dependency>