# HG changeset patch # User big-tiandm # Date 1414718975 14400 # Node ID 2cb6add23dfe91118e7597d3736b81c9dfacb92f # Parent 0c4e110189348a08c1c7bd7c21e5a137b48a30e0 Deleted selected files diff -r 0c4e11018934 -r 2cb6add23dfe randfold --- a/randfold Thu Oct 30 21:29:19 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,139 +0,0 @@ -# randfold (c) Eric Bonnet & Jan Wuyts 2003 -# randomization test for rna secondary structure - -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -# This soft needs BioPerl and ViennaRNA packages -# See http://www.bioperl.org and http://www.tbi.univie.ac.at/~ivo/RNA/ -# See also README - -use strict; -use Bio::SeqIO; -use RNA; - -if (scalar(@ARGV) != 3) { - die "Usage $0 fasta_file number_of_randomizations type[m|d]\n"; -} - -my $in = Bio::SeqIO->new(-file => "$ARGV[0]", -format => "fasta"); -my $number_of_randomizations = $ARGV[1]; -my $type = $ARGV[2]; -if ($type !~ /[m|d]/) { - die "error: wrong type of randomization."; -} - -while (my $o = $in->next_seq) { - my $seq = uc($o->seq()); - my $name = $o->display_id(); - - my $cpt_sup = 0; - my $cpt_inf = 0; - my $cpt_ega = 1; - - my $str = $seq; - my $mfe = RNA::fold($seq,$str); - - for (my $i=0;$i<$number_of_randomizations;$i++) { - if ($type eq "d") { - $seq = shuffle_sequence_dinucleotide($seq); - $str = $seq; - } - elsif ($type eq "m") { - my @tmp = split //,$seq; - fisher_yates_shuffle(\@tmp); - $seq = join '',@tmp; - $str = $seq; - } - - my $rand_mfe = RNA::fold($str,$str); - - if ($rand_mfe < $mfe) { - $cpt_inf++; - } - if ($rand_mfe == $mfe) { - $cpt_ega++; - } - if ($rand_mfe > $mfe) { - $cpt_sup++; - } - } - - my $proba = ($cpt_ega + $cpt_inf) / ($number_of_randomizations + 1); - - print "$name\t$mfe\t$proba\n"; -} - -# fisher_yates_shuffle( \@array ) : -# generate a random permutation of @array in place -# input array ref -# return nothing -sub fisher_yates_shuffle { - my $array = shift; - my $i; - for ($i = @$array; --$i; ) { - my $j = int rand ($i+1); - next if $i == $j; - @$array[$i,$j] = @$array[$j,$i]; - } -} - -# shuffle a sequence while preserving dinucleotide distribution -# input sequence string -# return sequence string shuffled -sub shuffle_sequence_dinucleotide { - - my ($str) = @_; - - # upper case and convert to ATGC - $str = uc($str); - $str =~ s/U/T/g; - - my @nuc = ('A','T','G','C'); - my $count_swap = 0; - # set maximum number of permutations - my $stop = length($str) * 10; - - while($count_swap < $stop) { - - my @pos; - - # look start and end letters - my $firstnuc = $nuc[int(rand 4)]; - my $thirdnuc = $nuc[int(rand 4)]; - - # get positions for matching nucleotides - for (my $i=0;$i<(length($str)-2);$i++) { - if ((substr($str,$i,1) eq $firstnuc) && (substr($str,$i+2,1) eq $thirdnuc)) { - push (@pos,($i+1)); - $i++; - } - } - - # swap at random trinucleotides - my $max = scalar(@pos); - for (my $i=0;$i<$max;$i++) { - my $swap = int(rand($max)); - if ((abs($pos[$swap] - $pos[$i]) >= 3) && (substr($str,$pos[$i],1) ne substr($str,$pos[$swap],1))) { - $count_swap++; - my $w1 = substr($str,$pos[$i],1); - my $w2 = substr($str,$pos[$swap],1); - substr($str,$pos[$i],1,$w2); - substr($str,$pos[$swap],1,$w1); - } - } - } - return($str); -} -