# HG changeset patch
# User big-tiandm
# Date 1414718959 14400
# Node ID 0c4e110189348a08c1c7bd7c21e5a137b48a30e0
# Parent 4c0b1a94b88224abf871f80cfd11bac4b8bec889
Uploaded
diff -r 4c0b1a94b882 -r 0c4e11018934 DEGseq.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/DEGseq.pl Thu Oct 30 21:29:19 2014 -0400
@@ -0,0 +1,67 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2009-05-06
+#Modified:
+#Description:
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+use File::Basename;
+
+my %opts;
+GetOptions(\%opts,"i=s","outdir=s","column1:i","mark1=s","depth1:i","depth2:i","column2:i","mark2=s","h");
+if (!(defined $opts{i} and defined $opts{outdir} and defined $opts{mark1} and defined $opts{mark2}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $outputdir=$opts{'outdir'};
+unless ($outputdir=~/\/$/) {$outputdir .="/";}
+my $column1=defined $opts{column1} ? $opts{column1} : 3;
+my $column2=defined $opts{column2} ? $opts{column2} : 4;
+my $mark1=$opts{mark1};
+my $mark2=$opts{mark2};
+my $fileout=$outputdir."degseq.R";
+
+open OUT,">$fileout"; #output file
+
+print OUT "library(DEGseq)\n";
+print OUT "geneExpFile <- system.file(package=\"DEGseq\")\n";
+print OUT "geneExpFile<-file.path(\"$filein\")\n";
+print OUT "layout(matrix(c(1,2,3,4,5,6), 3, 2, byrow=TRUE))\npar(mar=c(2, 2, 2,2))\n";
+print OUT "outputdir<-file.path(\"$outputdir\")\n";
+print OUT "geneExpMatrix1 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column1))\n";
+print OUT "geneExpMatrix2 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column2))\n";
+if(defined $opts{'depth1'} && defined $opts{'depth2'}){
+print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",depth1=$opts{depth1},depth2=$opts{depth2},outputDir=outputdir,method=\"MARS\")\n";
+}
+else{
+print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",outputDir=outputdir,method=\"MARS\")\n";
+}
+close OUT;
+
+system("R CMD BATCH $fileout");
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -outdir -column1 -mark1 -column2 -mark2 -depth1 -depth2
+options:
+-i input file
+-outdir output file dir
+-column1 the first column for DEGseq
+-mark1 the name of the column1
+-depth1 depth for the first file,use for normalize
+-column2 the second column for DEGseq
+-mark2 the name of the column2
+-depth2 depth for the second file,use for normalize
+
+-h help
+USAGE
+exit(1);
+}
+
diff -r 4c0b1a94b882 -r 0c4e11018934 Length_Distibution.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Length_Distibution.pl Thu Oct 30 21:29:19 2014 -0400
@@ -0,0 +1,219 @@
+#!/usr/bin/perl -w
+#==========================================================================================
+# Date:
+# Title:
+# Comment: Program to plot gene structure
+# Input: 1. input file of Gene region annotation which format like GenePred
+# 2. input file of Transcripts region annotation which format like GenePred
+# 3. input file of gene snp detail info
+# Output: output file of gene structure graph by html or svg formt
+# Test Usage:
+#========================================================================================
+#use strict;
+my $version=1.00;
+use SVG;
+use Getopt::Long;
+my %opt;
+GetOptions(\%opt,"i=s","o=s",,"h");
+if (!(defined $opt{i} and defined $opt{o}) || defined $opt{h}) {
+&usage;
+}
+#===============================Define Attribute==========================================
+my %attribute=(
+ canvas=>{
+ 'width'=>1500,
+ 'height'=>1800
+ },
+ text=>{
+ 'stroke'=>"#000000",
+ 'fill'=>"none",
+ 'stroke-width'=>0.5
+ #'stroke-width2'=>1
+ },
+ line=>{
+ 'stroke'=>"black",
+ 'stroke-width'=>1
+ },
+ font=>{
+ 'fill'=>"#000000",
+ 'font-size'=>12,
+ 'font-size2'=>10,
+ 'font-weight'=>'bold',
+ 'font-family'=>"Arial"
+ #'font-family2'=>"ArialNarrow-bold"
+ },
+ rect=>{
+ 'fill'=>"lightgreen",
+ 'stroke'=>"black",
+ 'stroke-width'=>0.5
+ },
+ readwidth=>0.5
+);
+#my $Xscale=600/$length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算
+#========================================data============================
+open(IN,"$opt{i}")||die"cannot open the file $opt{i}";
+my @R_length;
+my @T_length;
+my $R_number=0;
+my $T_number=0;
+my $R_max=0;
+my $T_max=0;
+
+my $title=;
+chomp $title;
+my @title=split/\t/,$title;
+my @mark=split/\s+/,$title[1];
+my $sample_number=@mark;
+while (my $aline=) {
+ if ($aline=~/^\s/) {
+ my $T_title=;
+ chomp $T_title;
+ while (my $a_aline=) {
+ chomp $a_aline;
+ my @temp=split/\t/,$a_aline;
+ my @number=split/\s+/,$temp[1];
+ for (my $i=0;$i<@number ;$i++) {
+ if ($R_max<$number[$i]) {
+ $R_max=$number[$i];
+ }
+ }
+ push @R_length,[$temp[0],@number];
+ $R_number++;
+ }
+ }
+ else {
+ chomp $aline;
+ my @temp=split/\t/,$aline;
+ my @number=split/\s+/,$temp[1];
+ for (my $i=0;$i<@number ;$i++) {
+ if ($T_max<$number[$i]) {
+ $T_max=$number[$i];
+ }
+ }
+ push @T_length,[$temp[0],@number];
+ $T_number++;
+ }
+}
+close IN;
+print "Tag max: $T_max\nRead max: $R_max\n";
+my $kd_number=5;
+##=======================Reads 纵坐标刻度==========================
+my $r=1;
+my $rr=1;
+my $R=$R_max;
+while ($R>10) {
+ $R=$R/10;
+ $r=$r*10;
+ $rr++;
+}
+$R=int($R+0.5);
+my $R_xg=$R/$kd_number*$r;#纵坐标一小格大小(一共10格)
+my $R_kedu_scale_x=6*$rr;#纵坐标刻度文字
+##=======================Tags 纵坐标刻度==========================
+my $t=1;
+my $tt=1;
+my $T=$T_max;
+while ($T>10) {
+ $T=$T/10;
+ $t=$t*10;
+ $tt++;
+}
+$T=int($T+0.5);
+my $T_xg=$T/$kd_number*$t;#纵坐标一小格大小(一共10格)
+my $T_kedu_scale_x=6*$tt;#纵坐标刻度文字
+
+#############################s#define start coordinate and scale
+my $XOFFSET=50;
+my $YOFFSET=60;
+my $width=800;
+my $heigth=800;
+my $X_width=600;
+#my $height=1600;
+#### Starting ####
+#新建画布
+my $svg=SVG->new(width=>$width,height=>$heigth);
+####坐标轴
+my $axisL=300;#read 纵坐标长度
+my $x_margin = 50;
+#=========Reads number setting==========================================
+my $Y_R_title=30;#标题的纵向宽度
+my $Y_R_0=$YOFFSET+$axisL+$Y_R_title;
+my $X_R_0=$XOFFSET+$x_margin;
+my $R_Yscale=$axisL/$R_xg/$kd_number;
+my $R_Xscale=$X_width/$R_number/($sample_number+1);
+#=====================================Reads Y axis======================
+$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0,'y2',$Y_R_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+for (my $i=1;$i<$kd_number ;$i++) {
+ $svg->line('x1',$X_R_0-5,'y1',$Y_R_0-$i*$R_xg*$R_Yscale,'x2',$X_R_0,'y2',$Y_R_0-$i*$R_xg*$R_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+ $svg->text('x',$X_R_0-$R_kedu_scale_x,'y',$Y_R_0-$i*$R_xg*$R_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$R_xg);
+}
+#=====================================Reads X axis======================
+$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0+$X_width,'y2',$Y_R_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+
+#print "$R_number\t$sample_number\n";
+for ($i=0;$i<$R_number ;$i++) {
+ for (my $j=1;$j<$sample_number+1 ;$j++) {
+ my $red=$j/$sample_number*255;
+ $svg->rect('x',$X_R_0+($j+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0-$R_length[$i][$j]*$R_Yscale,'width',$R_Xscale,'height',$R_length[$i][$j]*$R_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+ }
+ $svg->text('x',$X_R_0+(1+$sample_number/2+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$R_length[$i][0]);
+}
+#===Reads number title
+$svg->text('x',$XOFFSET+400,'y',$YOFFSET+$Y_R_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Reads Length Distribution");
+#===Reads
+for (my $i=0;$i<$sample_number ;$i++) {
+ my $red=($i+1)/$sample_number*255;
+ $svg->rect('x',$X_R_0+550,'y',$YOFFSET+$Y_R_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+ $svg->text('x',$X_R_0+550+30,'y',$YOFFSET+$Y_R_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]);
+}
+####==================================================================================
+#=========================================Tag s
+my $Y_T_title=30;#标题的纵向宽度
+my $Y_T_0=$Y_R_0+$axisL+$Y_R_title+50;#length size
+my $X_T_0=$XOFFSET+$x_margin;
+my $T_Yscale=$axisL/$T_xg/$kd_number;
+my $T_Xscale=$X_width/$T_number/($sample_number+1);
+#=====================================Tags Y axis======================
+$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0,'y2',$Y_T_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+for (my $i=1;$i<$kd_number ;$i++) {
+ $svg->line('x1',$X_T_0-5,'y1',$Y_T_0-$i*$T_xg*$T_Yscale,'x2',$X_T_0,'y2',$Y_T_0-$i*$T_xg*$T_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+ $svg->text('x',$X_T_0-$T_kedu_scale_x,'y',$Y_T_0-$i*$T_xg*$T_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$T_xg);
+}
+#=====================================Tags X axis======================
+$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0+$X_width,'y2',$Y_T_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'});
+
+#print "$R_number\t$sample_number\n";
+for ($i=0;$i<$T_number ;$i++) {
+ for (my $j=1;$j<$sample_number+1 ;$j++) {
+ my $red=$j/$sample_number*255;
+ $svg->rect('x',$X_T_0+($j+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0-$T_length[$i][$j]*$T_Yscale,'width',$T_Xscale,'height',$T_length[$i][$j]*$T_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+ }
+ $svg->text('x',$X_T_0+(1+$sample_number/2+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$T_length[$i][0]);
+}
+#===Reads number title
+$svg->text('x',$XOFFSET+400,'y',$Y_R_0+30+$Y_T_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Tags Length Distribution");
+#===Reads
+for (my $i=0;$i<$sample_number ;$i++) {
+ my $red=($i+1)/$sample_number*255;
+ $svg->rect('x',$X_T_0+550,'y',$Y_R_0+30+$Y_T_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)");
+ $svg->text('x',$X_T_0+550+30,'y',$Y_R_0+30+$Y_T_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]);
+}
+
+
+
+
+open (OUT,">$opt{o}");
+print OUT $svg->xmlify();
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0
+options:
+-i
+-o svg output
+-h help
+USAGE
+exit(1);
+}
\ No newline at end of file
diff -r 4c0b1a94b882 -r 0c4e11018934 collapseReads2Tags.pl
diff -r 4c0b1a94b882 -r 0c4e11018934 convert_bowtie_to_blast.pl
diff -r 4c0b1a94b882 -r 0c4e11018934 count_rfam_express.pl
diff -r 4c0b1a94b882 -r 0c4e11018934 filterReadsByLength.pl
diff -r 4c0b1a94b882 -r 0c4e11018934 html.pl
--- a/html.pl Tue Oct 28 01:35:32 2014 -0400
+++ b/html.pl Thu Oct 30 21:29:19 2014 -0400
@@ -161,13 +161,17 @@
The clean data file path is: $clean
1. Sequence length count
- 1.1 Reads length
";
+print OUT "\n";
-print OUT "
- 1.2 Tags length count
-
- Note:
The sequence length data: length file
+my $length=$prepath."length.html";
+open IN,"<$length";
+while (my $aline=) {
+ chomp $aline;
+ print OUT "$aline\n";
+}
+
+print OUT " Note:
The sequence length data: length file
";
diff -r 4c0b1a94b882 -r 0c4e11018934 matching.pl
--- a/matching.pl Tue Oct 28 01:35:32 2014 -0400
+++ b/matching.pl Thu Oct 30 21:29:19 2014 -0400
@@ -44,7 +44,7 @@
}
### genome mapping
-`bowtie -v $mis -f -p $threads -m $hits -a --best --strata $index $filein --al genome_mapped.fa --un genome_not_mapped.fa --max genome_mapped_Mlimit.fa > genome_mapped.bwt`;
+`bowtie -v $mis -f -p $threads -m $hits -a --best --strata $index $filein --al genome_mapped.fa --un genome_not_mapped.fa --max genome_mapped_Mlimit.fa > genome_mapped.bwt 2> run.log`;
#`convert_bowtie_to_blast.pl genome_mapped.bwt genome_mapped.fa $genome > genome_mapped.bst`;
diff -r 4c0b1a94b882 -r 0c4e11018934 miRDeep_plant.pl
--- a/miRDeep_plant.pl Tue Oct 28 01:35:32 2014 -0400
+++ b/miRDeep_plant.pl Thu Oct 30 21:29:19 2014 -0400
@@ -3,7 +3,7 @@
use warnings;
use strict;
use Getopt::Std;
-
+use RNA;
################################# MIRDEEP #################################################
@@ -125,7 +125,7 @@
#parse signature file in blast_parsed format and resolve each potential precursor
parse_file_blast_parsed($file_blast_parsed);
-`rm -rf $tmpdir`;
+#`rm -rf $tmpdir`;
exit;
@@ -360,14 +360,16 @@
#print sequence to temporary file, test randfold value, return 1 or 0
# print_file("pri_seq.fa",">pri_seq\n".$hash_comp{"pri_seq"});
- my $tmpfile=$tmpdir.$hash_comp{"pri_id"};
- open(FILE, ">$tmpfile");
- print FILE ">pri_seq\n",$hash_comp{"pri_seq"};
- close FILE;
+ #my $tmpfile=$tmpdir.$hash_comp{"pri_id"};
+ #open(FILE, ">$tmpfile");
+ #print FILE ">pri_seq\n",$hash_comp{"pri_seq"};
+ #close FILE;
# my $p_value=`randfold -s $tmpfile 999 | cut -f 3`;
- my $p1=`randfold -s $tmpfile 999 | cut -f 3`;
- my $p2=`randfold -s $tmpfile 999 | cut -f 3`;
+ #my $p1=`randfold -s $tmpfile 999 | cut -f 3`;
+ #my $p2=`randfold -s $tmpfile 999 | cut -f 3`;
+ my $p1=&randfold_pvalue($hash_comp{"pri_seq"},999);
+ my $p2=&randfold_pvalue($hash_comp{"pri_seq"},999);
my $p_value=($p1+$p2)/2;
wait;
# system "rm $tmpfile";
@@ -377,18 +379,82 @@
return 0;
}
+sub randfold_pvalue{
+ my $cpt_sup = 0;
+ my $cpt_inf = 0;
+ my $cpt_ega = 1;
+
+ my ($seq,$number_of_randomizations)=@_;
+ my $str =$seq;
+ my $mfe = RNA::fold($seq,$str);
-#sub print_file{
+ for (my $i=0;$i<$number_of_randomizations;$i++) {
+ $seq = shuffle_sequence_dinucleotide($seq);
+ $str = $seq;
+
+ my $rand_mfe = RNA::fold($str,$str);
+
+ if ($rand_mfe < $mfe) {
+ $cpt_inf++;
+ }
+ if ($rand_mfe == $mfe) {
+ $cpt_ega++;
+ }
+ if ($rand_mfe > $mfe) {
+ $cpt_sup++;
+ }
+ }
+
+ my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1);
- #print string to file
+ #print "$name\t$mfe\t$proba\n";
+ return $proba;
+}
+
+sub shuffle_sequence_dinucleotide {
+
+ my ($str) = @_;
-# my($file,$string)=@_;
+ # upper case and convert to ATGC
+ $str = uc($str);
+ $str =~ s/U/T/g;
+
+ my @nuc = ('A','T','G','C');
+ my $count_swap = 0;
+ # set maximum number of permutations
+ my $stop = length($str) * 10;
+
+ while($count_swap < $stop) {
+
+ my @pos;
+
+ # look start and end letters
+ my $firstnuc = $nuc[int(rand 4)];
+ my $thirdnuc = $nuc[int(rand 4)];
-# open(FILE, ">$file");
-# print FILE "$string";
-# close FILE;
-#}
-
+ # get positions for matching nucleotides
+ for (my $i=0;$i<(length($str)-2);$i++) {
+ if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) {
+ push (@pos,($i+1));
+ $i++;
+ }
+ }
+
+ # swap at random trinucleotides
+ my $max = scalar(@pos);
+ for (my $i=0;$i<$max;$i++) {
+ my $swap = int(rand($max));
+ if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) {
+ $count_swap++;
+ my $w1 = substr($str,$pos[$i],1);
+ my $w2 = substr($str,$pos[$swap],1);
+ substr($str,$pos[$i],1,$w2);
+ substr($str,$pos[$swap],1,$w1);
+ }
+ }
+ }
+ return($str);
+}
sub test_nucleus_conservation{
@@ -1279,7 +1345,7 @@
my $freq=$1;
return $freq;
}else{
- print STDERR "Problem with read format\n";
+ #print STDERR "Problem with read format\n";
return 0;
}
}
@@ -1497,7 +1563,7 @@
#parameters of known precursors and background hairpins, scale and location
my $a=1.339e-12;my $b=2.778e-13;my $c=45.834;
my $ev=$e**($mfe_adj1*$c);
- print STDERR "\n***",$ev,"**\t",$ev+$b,"\t";
+ #print STDERR "\n***",$ev,"**\t",$ev+$b,"\t";
my $log_odds=($a/($b+$ev));
@@ -1506,7 +1572,7 @@
my $odds=$prob_test/$prob_background;
my $log_odds_2=log($odds);
- print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n";
+ #print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n";
return $log_odds;
}
diff -r 4c0b1a94b882 -r 0c4e11018934 miRNA_Express_and_sequence.pl
diff -r 4c0b1a94b882 -r 0c4e11018934 miRPlant.pl
--- a/miRPlant.pl Tue Oct 28 01:35:32 2014 -0400
+++ b/miRPlant.pl Thu Oct 30 21:29:19 2014 -0400
@@ -13,7 +13,7 @@
use threads::shared;
use File::Path;
use File::Basename;
-#use RNA;
+use RNA;
use Term::ANSIColor;
my %opts;
@@ -235,19 +235,19 @@
system("bowtie-build -f excised_precursor.fa excised_precursor");
# print "\nbowtie-build -f excised_precursor.fa excised_precursor\n";
- system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt");
+ system("bowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt 2> run.log");
# print "\nbowtie -v $mis -f -p $t -m $hit -a --best --strata excised_precursor $file > precursor_mapped.bwt\n";
system("perl $scipt_path/convert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst");
# print "\nconvert_bowtie_to_blast.pl precursor_mapped.bwt $file excised_precursor.fa > precursor_mapped.bst\n";
- system("sort +3 -25 precursor_mapped.bst > signatures.bst");
+ system("sort -k 4 precursor_mapped.bst > signatures.bst");
# print "\nsort +3 -25 precursor_mapped.bst > ../signatures.bst\n";
chdir $dir;
system("perl $scipt_path/miRDeep_plant.pl $predict/signatures.bst $predict/excised_precursor_struc.txt novel_tmp_dir -y > microRNA_prediction.mrd");
# print "\nmiRDeep_plant.pl $dir/signatures.bst $predict/excised_precursor_struc.txt tmp_dir -y > microRNA_prediction.txt\n";
- system("rm novel_tmp_dir -rf");
+ #system("rm novel_tmp_dir -rf");
my $tag=join "," ,@mark;
system("perl $scipt_path/miRNA_Express_and_sequence.pl -i microRNA_prediction.mrd -list novel_microRNA_express.txt -fa novel_microRNA_mature.fa -pre novel_microRNA_precursor.fa -tag $tag");
}
@@ -279,6 +279,7 @@
sub filterbylength{
my $tmpmark=join ",", @mark;
system("perl $scipt_path/filterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark");
+ system("perl $scipt_path/Length_Distibution.pl -i $preprocess/reads_length_distribution.txt -o $preprocess/length.html");
# print "\nfilterReadsByLength.pl -i $collapsed -o $data -min $min_nt -max $max_nt -mark $tmpmark\n";
}
diff -r 4c0b1a94b882 -r 0c4e11018934 miRPlant.xml
--- a/miRPlant.xml Tue Oct 28 01:35:32 2014 -0400
+++ b/miRPlant.xml Thu Oct 30 21:29:19 2014 -0400
@@ -4,9 +4,9 @@
SCRIPT_PATH
bowtie
- R
+ R
fastx_toolkit
- X11
+ libx11
ViennaRNA
@@ -27,7 +27,7 @@
-tag ${s.tag}
#end for
- -format $format -gfa $gfa -pre $pre -mat $mat -rfam $rfam -a $a -M $mapnt -min $min -max $max -mis $mismatch -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe
+ -format $format -gfa $gfa -pre $pre -mat $mat -rfam $rfam -a $a -M $mapnt -min $min -max $max -mis $mismatch -e $e -f $f -v $v -r $r -dis $dis -flank $flank -mfe $mfe > run.log
@@ -72,16 +72,15 @@
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
diff -r 4c0b1a94b882 -r 0c4e11018934 precursors.pl
diff -r 4c0b1a94b882 -r 0c4e11018934 quantify.pl
--- a/quantify.pl Tue Oct 28 01:35:32 2014 -0400
+++ b/quantify.pl Thu Oct 30 21:29:19 2014 -0400
@@ -422,16 +422,16 @@
sub mapping{
my $err;
## build bowtie index
- print STDERR "building bowtie index\n";
+ #print STDERR "building bowtie index\n";
$err = `bowtie-build $pre_file_name miRNA_precursor`;
## map mature sequences against precursors
- print STDERR "mapping mature sequences against index\n";
- $err = `bowtie -p $threads -f -v 0 -a --best --strata --norc miRNA_precursor $mature mature_mapped.bwt`;
+ #print STDERR "mapping mature sequences against index\n";
+ $err = `bowtie -p $threads -f -v 0 -a --best --strata --norc miRNA_precursor $mature > mature_mapped.bwt 2> run.log`;
## map reads against precursors
- print STDERR "mapping read sequences against index\n";
- $err=`bowtie -p $threads -f -v $mismatch -a --best --strata --norc miRNA_precursor $read --al mirbase_mapped.fa --un mirbase_not_mapped.fa read_mapped.bwt `;
+ #print STDERR "mapping read sequences against index\n";
+ $err=`bowtie -p $threads -f -v $mismatch -a --best --strata --norc miRNA_precursor $read --al mirbase_mapped.fa --un mirbase_not_mapped.fa > read_mapped.bwt 2> run.log`;
}
diff -r 4c0b1a94b882 -r 0c4e11018934 randfold
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/randfold Thu Oct 30 21:29:19 2014 -0400
@@ -0,0 +1,139 @@
+# randfold (c) Eric Bonnet & Jan Wuyts 2003
+# randomization test for rna secondary structure
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# This soft needs BioPerl and ViennaRNA packages
+# See http://www.bioperl.org and http://www.tbi.univie.ac.at/~ivo/RNA/
+# See also README
+
+use strict;
+use Bio::SeqIO;
+use RNA;
+
+if (scalar(@ARGV) != 3) {
+ die "Usage $0 fasta_file number_of_randomizations type[m|d]\n";
+}
+
+my $in = Bio::SeqIO->new(-file => "$ARGV[0]", -format => "fasta");
+my $number_of_randomizations = $ARGV[1];
+my $type = $ARGV[2];
+if ($type !~ /[m|d]/) {
+ die "error: wrong type of randomization.";
+}
+
+while (my $o = $in->next_seq) {
+ my $seq = uc($o->seq());
+ my $name = $o->display_id();
+
+ my $cpt_sup = 0;
+ my $cpt_inf = 0;
+ my $cpt_ega = 1;
+
+ my $str = $seq;
+ my $mfe = RNA::fold($seq,$str);
+
+ for (my $i=0;$i<$number_of_randomizations;$i++) {
+ if ($type eq "d") {
+ $seq = shuffle_sequence_dinucleotide($seq);
+ $str = $seq;
+ }
+ elsif ($type eq "m") {
+ my @tmp = split //,$seq;
+ fisher_yates_shuffle(\@tmp);
+ $seq = join '',@tmp;
+ $str = $seq;
+ }
+
+ my $rand_mfe = RNA::fold($str,$str);
+
+ if ($rand_mfe < $mfe) {
+ $cpt_inf++;
+ }
+ if ($rand_mfe == $mfe) {
+ $cpt_ega++;
+ }
+ if ($rand_mfe > $mfe) {
+ $cpt_sup++;
+ }
+ }
+
+ my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1);
+
+ print "$name\t$mfe\t$proba\n";
+}
+
+# fisher_yates_shuffle( \@array ) :
+# generate a random permutation of @array in place
+# input array ref
+# return nothing
+sub fisher_yates_shuffle {
+ my $array = shift;
+ my $i;
+ for ($i = @$array; --$i; ) {
+ my $j = int rand ($i+1);
+ next if $i == $j;
+ @$array[$i,$j] = @$array[$j,$i];
+ }
+}
+
+# shuffle a sequence while preserving dinucleotide distribution
+# input sequence string
+# return sequence string shuffled
+sub shuffle_sequence_dinucleotide {
+
+ my ($str) = @_;
+
+ # upper case and convert to ATGC
+ $str = uc($str);
+ $str =~ s/U/T/g;
+
+ my @nuc = ('A','T','G','C');
+ my $count_swap = 0;
+ # set maximum number of permutations
+ my $stop = length($str) * 10;
+
+ while($count_swap < $stop) {
+
+ my @pos;
+
+ # look start and end letters
+ my $firstnuc = $nuc[int(rand 4)];
+ my $thirdnuc = $nuc[int(rand 4)];
+
+ # get positions for matching nucleotides
+ for (my $i=0;$i<(length($str)-2);$i++) {
+ if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) {
+ push (@pos,($i+1));
+ $i++;
+ }
+ }
+
+ # swap at random trinucleotides
+ my $max = scalar(@pos);
+ for (my $i=0;$i<$max;$i++) {
+ my $swap = int(rand($max));
+ if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) {
+ $count_swap++;
+ my $w1 = substr($str,$pos[$i],1);
+ my $w2 = substr($str,$pos[$swap],1);
+ substr($str,$pos[$i],1,$w2);
+ substr($str,$pos[$swap],1,$w1);
+ }
+ }
+ }
+ return($str);
+}
+
diff -r 4c0b1a94b882 -r 0c4e11018934 rfam.pl
--- a/rfam.pl Tue Oct 28 01:35:32 2014 -0400
+++ b/rfam.pl Thu Oct 30 21:29:19 2014 -0400
@@ -47,7 +47,7 @@
$index="$rfam";
}
### genome mapping
-`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt`;
+`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt 2> run.log`;
sub checkACGT{
my $string;
diff -r 4c0b1a94b882 -r 0c4e11018934 tool_dependencies.xml
--- a/tool_dependencies.xml Tue Oct 28 01:35:32 2014 -0400
+++ b/tool_dependencies.xml Thu Oct 30 21:29:19 2014 -0400
@@ -9,12 +9,8 @@
$REPOSITORY_INSTALL_DIR
-
-
-
-
-
-
+
+