diff miRDeep_plant.pl @ 44:0c4e11018934 draft

Uploaded
author big-tiandm
date Thu, 30 Oct 2014 21:29:19 -0400
parents dc5a29826c7d
children ca05d68aca13
line wrap: on
line diff
--- a/miRDeep_plant.pl	Tue Oct 28 01:35:32 2014 -0400
+++ b/miRDeep_plant.pl	Thu Oct 30 21:29:19 2014 -0400
@@ -3,7 +3,7 @@
 use warnings;
 use strict;
 use Getopt::Std;
-
+use RNA;
 
 
 ################################# MIRDEEP #################################################
@@ -125,7 +125,7 @@
 
 #parse signature file in blast_parsed format and resolve each potential precursor
 parse_file_blast_parsed($file_blast_parsed);
-`rm -rf $tmpdir`;
+#`rm -rf $tmpdir`;
 exit;
 
 
@@ -360,14 +360,16 @@
     #print sequence to temporary file, test randfold value, return 1 or 0
 
 #    print_file("pri_seq.fa",">pri_seq\n".$hash_comp{"pri_seq"});
-	my $tmpfile=$tmpdir.$hash_comp{"pri_id"};
-	open(FILE, ">$tmpfile");
-	print FILE ">pri_seq\n",$hash_comp{"pri_seq"};
-	close FILE;
+	#my $tmpfile=$tmpdir.$hash_comp{"pri_id"};
+	#open(FILE, ">$tmpfile");
+	#print FILE ">pri_seq\n",$hash_comp{"pri_seq"};
+	#close FILE;
 
 #	my $p_value=`randfold -s $tmpfile 999 | cut -f 3`;
-	my $p1=`randfold -s $tmpfile 999 | cut -f 3`;
-	my $p2=`randfold -s $tmpfile 999 | cut -f 3`;
+	#my $p1=`randfold -s $tmpfile 999 | cut -f 3`;
+	#my $p2=`randfold -s $tmpfile 999 | cut -f 3`;
+	my $p1=&randfold_pvalue($hash_comp{"pri_seq"},999);
+	my $p2=&randfold_pvalue($hash_comp{"pri_seq"},999);
 	my $p_value=($p1+$p2)/2;
 	wait;
 #    system "rm $tmpfile";
@@ -377,18 +379,82 @@
     return 0;
 }
 
+sub randfold_pvalue{
+	my $cpt_sup = 0;
+	my $cpt_inf = 0;
+	my $cpt_ega = 1;
+	
+	my ($seq,$number_of_randomizations)=@_;
+	my $str =$seq;
+	my $mfe = RNA::fold($seq,$str);
 
-#sub print_file{
+	for (my $i=0;$i<$number_of_randomizations;$i++) {
+		$seq = shuffle_sequence_dinucleotide($seq);
+		$str = $seq;
+	
+		my $rand_mfe = RNA::fold($str,$str);
+	
+		if ($rand_mfe < $mfe) {
+			$cpt_inf++;
+		}
+		if ($rand_mfe == $mfe) {
+			$cpt_ega++;
+		}
+		if ($rand_mfe > $mfe) {		
+			$cpt_sup++;
+		}
+	}
+	
+	my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1);
 
-    #print string to file
+	#print "$name\t$mfe\t$proba\n";
+	return $proba;
+}
+
+sub shuffle_sequence_dinucleotide {
+
+	my ($str) = @_;
 
-#    my($file,$string)=@_;
+	# upper case and convert to ATGC
+	$str = uc($str);
+	$str =~ s/U/T/g;
+	
+	my @nuc = ('A','T','G','C');
+	my $count_swap = 0;
+	# set maximum number of permutations
+	my $stop = length($str) * 10;
+	
+	while($count_swap < $stop) {
+
+		my @pos;
+
+		# look start and end letters
+		my $firstnuc = $nuc[int(rand 4)];
+		my $thirdnuc = $nuc[int(rand 4)];	
 
-#    open(FILE, ">$file");
-#    print FILE "$string";
-#    close FILE;
-#}
-
+		# get positions for matching nucleotides
+		for (my $i=0;$i<(length($str)-2);$i++) {
+			if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) {
+				push (@pos,($i+1));
+				$i++;
+			}
+		}
+	
+		# swap at random trinucleotides
+		my $max = scalar(@pos);
+		for (my $i=0;$i<$max;$i++) {
+			my $swap = int(rand($max));
+			if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) {			
+				$count_swap++;
+				my $w1 = substr($str,$pos[$i],1);
+				my $w2 = substr($str,$pos[$swap],1);			
+				substr($str,$pos[$i],1,$w2);
+				substr($str,$pos[$swap],1,$w1);		
+			}
+		}
+	}
+	return($str);	
+}
 
 sub test_nucleus_conservation{
 
@@ -1279,7 +1345,7 @@
 	my $freq=$1;
 	return $freq;
     }else{
-	print STDERR "Problem with read format\n";
+	#print STDERR "Problem with read format\n";
 	return 0;
     }
 }
@@ -1497,7 +1563,7 @@
     #parameters of known precursors and background hairpins, scale and location
 	my $a=1.339e-12;my $b=2.778e-13;my $c=45.834;
 	my $ev=$e**($mfe_adj1*$c);
-	print STDERR "\n***",$ev,"**\t",$ev+$b,"\t";
+	#print STDERR "\n***",$ev,"**\t",$ev+$b,"\t";
 	my $log_odds=($a/($b+$ev));
 
 
@@ -1506,7 +1572,7 @@
 
     my $odds=$prob_test/$prob_background;
     my $log_odds_2=log($odds);
-	print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n";
+	#print STDERR "log_odds :",$log_odds,"\t",$log_odds_2,"\n";
    return $log_odds;
 }