Previous changeset 6:34e7e6ea1c17 (2017-03-28) Next changeset 8:e23da2ebbc5b (2017-03-28) |
Commit message:
Deleted selected files |
removed:
regionFitness.pl |
b |
diff -r 34e7e6ea1c17 -r 1c94d9b7eda7 regionFitness.pl --- a/regionFitness.pl Tue Mar 28 14:03:34 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,802 +0,0 @@\n-#!/usr/bin/perl -w\n-\n-#Margaret Antonio 17.01.06\n-\n-# Sliding Window specifically for Galaxy implementation.\n-\n-use strict;\n-use Getopt::Long;\n-use warnings;\n-use Bio::SeqIO;\n-use Data::Random qw(:all);\n-use List::BinarySearch qw( :all );\n-use autodie;\n-no warnings;\n-\n-#AVAILABLE OPTIONS. WILL PRINT UPON ERROR\n-sub print_usage() {\n-\n- print "\\n####################################################################\\n";\n- print "USAGE:\\n";\n- print "slidingWindow.pl -f <fasta file> -r <genbank file> <inputs>\\n";\n- \n- print "\\nREQUIRED:\\n";\n- print " -d\\tDirectory containing all input files (output files from\\n";\n- print " \\tcalcFitness tool)\\n";\n- print " \\tOR\\n";\n- print " \\tIn the command line (without a flag), input filename(s)\\n";\n- print " -f\\tFilename for genome sequence, in fasta format\\n";\n- print " -r\\tFilename for genome annotation, in GenBank format\\n";\n- \n- print "\\nOPTIONAL:\\n";\n- print " -h\\tPrint usage\\n";\n- print " -size\\tThe size of the sliding window. Default=500\\n";\n- print " -step\\tThe window spacing. Default=10\\n";\n- print " -c\\tExclude values with avg. counts less than x where (c1+c2)/2<x\\n";\n- print " \\tDefault=15\\n";\n- print " -log\\tSend all output to a log file instead of the terminal\\n";\n- print " -max\\tExpected max number of TA sites in a window.\\n";\n- print " \\tUsed for creating null distribution library. Default=100\\n";\n- print " -w\\tDo weighted average for fitness per insertion\\n";\n- print " -wc\\tInteger value for weight ceiling. Default=50\\n";\n- \n- print " \\n~~~~Always check that file paths are correctly specified~~~~\\n";\n- print "\\n##################################################################\\n";\n- \n-\n-}\n-\n-#ASSIGN INPUTS TO VARIABLES\n-our ($cutoff,$step, $size, $help,$fasta, $log, $ref,$weight_ceiling,$weight,$custom,$run,$max,$f1,$f2,$f3,$f4,$f5,$f6);\n-\n-GetOptions(\n-\'c:i\'=>\\$cutoff,\n-\'step:i\' => \\$step,\n-\'size:i\' => \\$size,\n-\'f:s\' => \\$fasta,\n-\'r:s\' => \\$ref,\n-\'log\' => \\$log,\n-\'h\' => \\$help,\n-\'m:i\'=>\\$max,\n-\'wc:i\'=>\\$weight_ceiling,\n-\'w\'=>\\$weight,\n-\'u:s\'=>\\$custom,\n-\'n:s\'=>\\$run,\n-\'f1:s\'=>\\$f1,\n-\'f2:s\'=>\\$f2,\n-\'f3:s\'=>\\$f3,\n-\'f4:s\'=>\\$f4,\n-\'f5:s\'=>\\$f5,\n-\'f6:s\'=>\\$f6,\n-\n-);\n-\n-sub get_time() {\n- my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);\n- return "$hour:$min:$sec";\n-}\n-\n-sub uniq {\n- my %seen;\n- return grep { ! $seen{$_}++ } @_;\n-}\n-\n-# Just to test out the script opening\n-if ($help){\n-\tprint_usage();\n-\tprint "\\n";\n- exit;\n-}\n-my $round=\'%.4f\';\n-\n-# every output file will have this prefix\n-#if (!$run){$run = "run1";}\n-$run = $run . "_";\n-\n-if (!$weight_ceiling){$weight_ceiling=50;}\n-if (!$cutoff){$cutoff=15;}\n-\n-\n-#open (STDOUT, ">>", $run . "log.txt");\n-\n-#CHECKING PARAMETERS: Check to make sure required option inputs are there and if not then assign default\n-if (!$size) { $size = 500 }; #set the default sliding window size to 500\n-if (!$step) { $step = 10 }; #set the default step size to 10\n-if (!$cutoff) { $cutoff = 10 };\n-if (!$max) { $max = 100 }; # most insertions expected in a given window region (for making null dist lib)\n-\n-print "Window size: $size\\n";\n-print "Step value: $step\\n";\n-print "Cutoff: $cutoff\\n";\n-\n-\n-# Returns mean, variance, sd, se\n-sub average {\n- my $scoreref = shift @_;\n- my @scores = @{$scoreref};\n- \n- my $sum=0;\n- my $num=0;\n- \n- # Get the average.\n- foreach my $w (@scores) {\n- $sum += $w;\n- $num++;\n- }\n- my $average= $sum/$num;\n- my $xminusxbars = 0;\n- \n- # Get the variance.\n- foreach my $w (@scores) {\n- $xminusxbars += ($w-$average)**2;\n- }\n- my $variance;\n- if ($num<=1){\n- $variance=0.10;\n- }\n- else{\n- $variance = sprintf($round,(1/($num-1)) * $xminusxbars);\n- }\n- my $sd = sprintf($round,sqrt($variance));\n- my $se = sprintf($round,$sd / sqrt($num));\n- \n- return ($average, $variance, $s'..b' $meanFit=mean(@allFits);\n-print "Average fitness for all windows: ",$meanFit,"\\n";\n-my @expWindows=();\n-foreach (@newWindows){\n- my @entry=@{$_};\n- my $mean=$entry[7];\n- my $absdev=sprintf("$round",$mean-$meanFit);\n- push (@entry,$absdev);\n- push @expWindows,\\@entry;\n-}\n-\n-@newWindows=@expWindows;\n-\n-#MAKE OUTPUT CSV FILE WITH ESSENTIAL WINDOW CALCULATIONS\n-\n-print "Start csv ouput file creation: ",get_time(),"\\n";\n-\n-open CSV, \'>\', $f3; # $run . "slidingWindows.csv" or die "Cannot open sliding window output file";\n-\t\n-my @csvHeader= ("start", "end","mutants","insertions","TA_sites","ratio","p-value","average", "variance","stdev","stderr","genes","fit-mean");\n-my $header=join(",",@csvHeader);\n-print CSV $header,"\\n";\n-foreach my $winLine(@newWindows){\n- my $string=join(",",@{$winLine});\n- print CSV $string, "\\n";\n-}\n-close CSV;\n-print "End csv ouput file creation: ",get_time(),"\\n\\n";\n-\n-my $in = Bio::SeqIO->new(-file=>$ref, -format => \'genbank\');\n-my $refseq = $in->next_seq;\n-my $refname = $refseq->id;\n-\n-#MAKE essentials WIG FILE---->later make BW--->IGV\n-sub printwig{\n- print "Start wig file creation: ",get_time(),"\\n";\n- my $in = Bio::SeqIO->new(-file=>$ref, -format => \'genbank\');\n- my $refseq = $in->next_seq;\n- my $refname = $refseq->id;\n- my $ewig="essentialWindows.wig";\n- my $FILE5 = $run . $ewig;\n- open eWIG, ">", $f5; #$FILE5";\n- printf eWIG "track type=wiggle_0 name=$ewig\\n";\n- printf eWIG "variableStep chrom=$refname\\n";\n- foreach my $wigLine(@newWindows){\n- my @wigFields=@$wigLine;\n- my $position=$wigFields[0];\n- while ($position<=$wigFields[1]){\n- printf eWIG $position, " ",$wigFields[7],"\\n"; #7 for pvalue, but 2 for fitness!!!!!!\n- $position=$position+1;\n- }\n- }\n- close eWIG;\n- print "End wig file creation: ",get_time(),"\\n\\n";\n- print "If this wig file needs to be converted to a Big Wig, then use USCS program wigToBigWig in terminal: \\n \\t./wigToBigWig gview/12G.wig organism.txt BigWig/output.bw \\n\\n";\n-}\n-\n-\n-#OUTPUT REGULAR SLIDING INFORMATION WITHOUT ESSENTIALS CALCULATIONS (P-VALUES)\n-\n-\n-#MAKE OUTPUT CSV FILE WITH FITNESS WINDOW CALCULATIONS\n- print "Start csv ouput file creation: ",get_time(),"\\n";\n- open FITFILE, ">", $f4; #$run . "fitWindows.csv" or die "Failed to open file";\n- my @head = ("start", "end","fitness","mutant_count", "\\n");\n- print FITFILE join(",",@head); #header\n- foreach my $winLine(@allWindows){\n- print FITFILE join(",", @$winLine),"\\n";\n- }\n- close FITFILE;\n- print "End csv ouput file creation: ",get_time(),"\\n\\n";\n-\n-#MAKE WIG FILE---->later make BW--->IGV\n- print "Start wig file creation: ",get_time(),"\\n";\n- my $fin = Bio::SeqIO->new(-file=>$ref, -format => \'genbank\');\n- $refseq = $fin->next_seq;\n- $refname = $refseq->id;\n- my $fwig="fitWindows.wig";\n- open fWIG, ">", $f5; #$run . $fwig;\n- print fWIG "track type=wiggle_0 name=$fwig\\n";\n- print fWIG "variableStep chrom=$refname\\n";\n- foreach my $wigLine(@allWindows){\n- my @wigFields=@$wigLine;\n- my $pos=$wigFields[0];\n- my $fit=$wigFields[7];\n- print fWIG $pos," ",$fit,"\\n";\n- }\n- close fWIG;\n- print "End wig file creation: ",get_time(),"\\n\\n";\n- print "If this wig file needs to be converted to a Big Wig,\\n";\n- print " then use USCS program wigToBigWig in terminal: \\n";\n- print "\\t./wigToBigWig gview/12G.wig organism.txt BigWig/output.bw \\n\\n";\n-\n-\n-#GOING TO MAKE A TEXT FILE FOR BED CONVERSION TO BIGBED, NEED CHROM # IN COLUMN 0\n-\n-my @cummulative;\n-\n-#MAKING A REGULAR TEXT FILE fields: [chrom,start,end,fitness,count]\n-\n-print "Start text file creation time: ",get_time(),"\\n";\n-open my $TXT, \'>\', $f6; #$run . "fitWindows.txt" or die $!;\n-print $TXT ("start","end","W","mutant_count","insertion_count\\n");\n-print $TXT (join("\\t",@$_),"\\n") for @allWindows;\n-close $TXT;\n-print "End text file creation: ",get_time(),"\\n\\n";\n-\n-\n' |