Mercurial > repos > antmarge > regionfitness
view regionFitness.pl @ 2:e8749de582fa draft
Uploaded
author | antmarge |
---|---|
date | Tue, 28 Mar 2017 10:51:53 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w #Margaret Antonio 17.01.06 # Sliding Window specifically for Galaxy implementation. use strict; use Getopt::Long; use warnings; use Bio::SeqIO; use Data::Random qw(:all); use List::BinarySearch qw( :all ); use autodie; no warnings; #AVAILABLE OPTIONS. WILL PRINT UPON ERROR sub print_usage() { print "\n####################################################################\n"; print "USAGE:\n"; print "slidingWindow.pl -f <fasta file> -r <genbank file> <inputs>\n"; print "\nREQUIRED:\n"; print " -d\tDirectory containing all input files (output files from\n"; print " \tcalcFitness tool)\n"; print " \tOR\n"; print " \tIn the command line (without a flag), input filename(s)\n"; print " -f\tFilename for genome sequence, in fasta format\n"; print " -r\tFilename for genome annotation, in GenBank format\n"; print "\nOPTIONAL:\n"; print " -h\tPrint usage\n"; print " -size\tThe size of the sliding window. Default=500\n"; print " -step\tThe window spacing. Default=10\n"; print " -c\tExclude values with avg. counts less than x where (c1+c2)/2<x\n"; print " \tDefault=15\n"; print " -log\tSend all output to a log file instead of the terminal\n"; print " -max\tExpected max number of TA sites in a window.\n"; print " \tUsed for creating null distribution library. Default=100\n"; print " -w\tDo weighted average for fitness per insertion\n"; print " -wc\tInteger value for weight ceiling. Default=50\n"; print " \n~~~~Always check that file paths are correctly specified~~~~\n"; print "\n##################################################################\n"; } #ASSIGN INPUTS TO VARIABLES our ($cutoff,$step, $size, $help,$fasta, $log, $ref,$weight_ceiling,$weight,$custom,$run,$max,$f1,$f2,$f3,$f4,$f5,$f6); GetOptions( 'c:i'=>\$cutoff, 'step:i' => \$step, 'size:i' => \$size, 'f:s' => \$fasta, 'r:s' => \$ref, 'log' => \$log, 'h' => \$help, 'm:i'=>\$max, 'wc:i'=>\$weight_ceiling, 'w'=>\$weight, 'u:s'=>\$custom, 'n:s'=>\$run, 'f1:s'=>\$f1, 'f2:s'=>\$f2, 'f3:s'=>\$f3, 'f4:s'=>\$f4, 'f5:s'=>\$f5, 'f6:s'=>\$f6, ); sub get_time() { my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time); return "$hour:$min:$sec"; } sub uniq { my %seen; return grep { ! $seen{$_}++ } @_; } # Just to test out the script opening if ($help){ print_usage(); print "\n"; exit; } my $round='%.4f'; # every output file will have this prefix #if (!$run){$run = "run1";} $run = $run . "_"; if (!$weight_ceiling){$weight_ceiling=50;} if (!$cutoff){$cutoff=15;} #open (STDOUT, ">>", $run . "log.txt"); #CHECKING PARAMETERS: Check to make sure required option inputs are there and if not then assign default if (!$size) { $size = 500 }; #set the default sliding window size to 500 if (!$step) { $step = 10 }; #set the default step size to 10 if (!$cutoff) { $cutoff = 10 }; if (!$max) { $max = 100 }; # most insertions expected in a given window region (for making null dist lib) print "Window size: $size\n"; print "Step value: $step\n"; print "Cutoff: $cutoff\n"; # Returns mean, variance, sd, se sub average { my $scoreref = shift @_; my @scores = @{$scoreref}; my $sum=0; my $num=0; # Get the average. foreach my $w (@scores) { $sum += $w; $num++; } my $average= $sum/$num; my $xminusxbars = 0; # Get the variance. foreach my $w (@scores) { $xminusxbars += ($w-$average)**2; } my $variance; if ($num<=1){ $variance=0.10; } else{ $variance = sprintf($round,(1/($num-1)) * $xminusxbars); } my $sd = sprintf($round,sqrt($variance)); my $se = sprintf($round,$sd / sqrt($num)); return ($average, $variance, $sd, $se); } # Function to clean unwanted characters from lines sub cleaner{ my $line=$_[0]; chomp($line); $line =~ s/\x0d{0,1}\x0a{0,1}\Z//s; return $line; } # Takes two parameters, both hashrefs to lists. # 1) hashref to list of scores # 2) hashref to list of weights, equal in length to the scores. sub weighted_average { my $scoreref = shift @_; my $weightref = shift @_; my @scores = @{$scoreref}; my @weights = @{$weightref}; my $sum=0; my ($weighted_average, $weighted_variance)=(0,0); my $v2; # Go through once to get total, calculate V2. for (my $i=0; $i<@weights; $i++) { $sum += $weights[$i]; $v2 += $weights[$i]**2; } if ($sum == 0) { return 0; } # No scores given? my $scor = join (' ', @scores); my $wght = join (' ', @weights); # Now calculated weighted average. my ($top, $bottom) = (0,0); for (my $i=0; $i<@weights; $i++) { $top += $weights[$i] * $scores[$i]; $bottom += $weights[$i]; } $weighted_average = sprintf($round,$top/$bottom); ($top, $bottom) = (0,0); # Now calculate weighted sample variance. for (my $i=0; $i<@weights; $i++) { $top += ( $weights[$i] * ($scores[$i] - $weighted_average)**2); $bottom += $weights[$i]; } $weighted_variance = sprintf($round,$top/$bottom); my $weighted_stdev = sprintf($round,sqrt($weighted_variance)); my $weighted_stder = sprintf($round,$weighted_stdev / sqrt(@scores)); # / length scores. return ($weighted_average, $weighted_variance, $weighted_stdev, $weighted_stder); } #CREATE AN ARRAY OF DATA FROM INPUT CSV FILE(S) my $rowCount=-1; my $last=0; my @unsorted; my @insertPos; #array to hold all positions of insertions. Going to use this later to match up with TA sites my %wind_summary; my @files = @ARGV; my $num=(scalar @files); my $datestring = localtime(); print "Local date and time $datestring\n"; print "\n---------Importing files--------\n"; print "\tStart input array ",get_time(),"\n"; print "\tNumber of csv files: ", $num,"\n"; #Go through each file from the commandline (ARGV array) and read each line as an array #into select array if values satisfy the cutoff for (my $i=0; $i<$num; $i++){ print "File #",$i+1,"\t"; my $file=$files[$i]; print $file,"\n"; open(DATA, '<', $file) or die "Could not open '$file' Make sure input .csv files are entered in the command line\n"; my $dummy=<DATA>; #read and store column names in dummy variable while (my $entry = <DATA>) { chomp $entry; my @line=split(",",$entry); my $locus = $line[9]; #gene id (SP_0000) my $w = $line[12]; #nW if (!$w){ $w=0 } # For blanks my $c1 = $line[2]; my $c2 = $line[3]; my $avg = ($c1+$c2)/2; #Average counts must be greater than cutoff (minimum allowed) if ($avg > $cutoff) { my @select=($line[0],$w,$avg,$line[9]); my $select=\@select; push(@unsorted,$select); push(@insertPos,$line[0]); #keep track of actual insertion site position $last=$select[0]; $rowCount++; } if ($avg >= $weight_ceiling) { $avg = $weight_ceiling; } # Maximum weight } close DATA; } my @sorted = sort { $a->[0] <=> $b->[0] } @unsorted; @insertPos = sort { $a <=> $b } @insertPos; # Get unique @insertPos= uniq(@insertPos); print "\n\tFinished input array ",get_time(),"\n"; ############################################################################################### print "\n---------Creating sliding windows across the genome--------\n\n"; my $index=-1; my $marker=0; my $totalInsert=0; my $totalWindows=0; #SUBROUTINE FOR EACH WINDOW CALCULATION sub OneWindow{ my $Wstart=shift @_; my $Wend=shift@_; my $Wcount=0; my $insertion=0; my $Wsum=0; my $lastPos=0; my $i; my @allgenes=(); for ($i=$marker;$i<$rowCount;$i++){ my @fields=@{$sorted[$i]}; my $pos=$fields[0]; my $w=$fields[1]; #fitness value for single insertion my $avgCount=$fields[2]; my $gene=$fields[3]; if ($pos<$Wstart){ #if deleted, error shows up next; } if ($pos<=$Wend){ if ($pos<($Wstart+$step)){ $marker++; } my @empty; if (!$wind_summary{$Wstart}) { $wind_summary{$Wstart}{w} = [@empty]; $wind_summary{$Wstart}{s} = [@empty]; $wind_summary{$Wstart}{g} = [@empty]; } # Hash of Fitness scores. $wind_summary{$Wstart}{w} = [@{$wind_summary{$Wstart}{w}}, $w]; # Hash of counts used to generate those fitness scores. $wind_summary{$Wstart}{s} = [@{$wind_summary{$Wstart}{s}}, $avgCount]; if (!($gene =~ /^ *$/)){ $gene=~ s/"//g; $gene=~ s/ //g; $gene=~ s/'//g; push @allgenes,$gene; } $Wsum+=$w; $Wcount++; if ($pos!=$lastPos){ $insertion+=1; $lastPos=$pos; } } #if ($fields[0]>$Wend) #finished with that window, then: else{ if ($Wcount>0){ $totalWindows++; $totalInsert+=$insertion; my ($average, $variance, $stdev, $stderr); if ($num <=1 ) { ($average, $variance, $stdev, $stderr)=(0.10,0.10,"NA","NA"); } else{ if (!$weight) { ($average, $variance, $stdev, $stderr) = &average($wind_summary{$Wstart}{w}); } else { ($average, $variance, $stdev, $stderr)= &weighted_average($wind_summary{$Wstart}{w},$wind_summary{$Wstart}{s}); } } @allgenes= uniq(@allgenes); my @sortedGenes = sort { lc($a) cmp lc($b) } @allgenes; my $wind_genes=join(" ",@sortedGenes); my @window=($Wstart,$Wend,$Wcount,$insertion,$wind_genes,$average,$variance,$stdev,$stderr); return (\@window); } else{ #Even if there were no insertions, still want window in file for consistent start/end my @window=($Wstart,$Wend,$Wcount,$insertion," ","NA","NA","NA","NA"); return (\@window); } #Because count=0 (i.e. there were no insertion mutants in that window) } } } sub customWindow{ my $Wstart=shift @_; my $Wend=shift@_; my $Wavg=0; my $Wcount=0; my $insertion=0; my $Wsum=0; my $lastPos=0; my @allgenes=(); my $i; for ($i=$marker;$i<$rowCount;$i++){ my @fields=@{$sorted[$i]}; my $pos=$fields[0]; my $w=$fields[1]; #fitness value for single insertion my $avgCount=$fields[2]; my $gene=$fields[3]; if ($pos<$Wstart){ #if deleted, error shows up next; } if ($pos<=$Wend){ my @empty; if (!$wind_summary{$Wstart}) { $wind_summary{$Wstart}{w} = [@empty]; $wind_summary{$Wstart}{s} = [@empty]; $wind_summary{$Wstart}{g} = [@empty]; } # Hash of fitness scores $wind_summary{$Wstart}{w} = [@{$wind_summary{$Wstart}{w}}, $w]; # Hash of counts used to generate those fitness scores. $wind_summary{$Wstart}{s} = [@{$wind_summary{$Wstart}{s}}, $avgCount]; if (!($gene =~ /^ *$/)){ $gene=~ s/"//g; $gene=~ s/ //g; $gene=~ s/'//g; push @allgenes,$gene; } $Wsum+=$w; $Wcount++; if ($pos!=$lastPos){ $insertion+=1; $lastPos=$pos; } } #if ($fields[0]>$Wend){ #if finished with that window, then: else{ if ($Wcount>0){ $totalWindows++; $totalInsert+=$insertion; my ($average, $variance, $stdev, $stderr); if ($num <=1 ) { ($average, $variance, $stdev, $stderr)=(0.10,0.10,"NA","NA"); } else{ if (!$weight) { ($average, $variance, $stdev, $stderr) = &average($wind_summary{$Wstart}{w}); } else { ($average, $variance, $stdev, $stderr)= &weighted_average($wind_summary{$Wstart}{w},$wind_summary{$Wstart}{s}); } } @allgenes= uniq(@allgenes); my @sortedGenes = sort { lc($a) cmp lc($b) } @allgenes; my $wind_genes=join(" ",@sortedGenes); print TEST $wind_genes,"\n"; my @window=($Wstart,$Wend,$Wcount,$insertion,$wind_genes,$average,$variance,$stdev,$stderr); return (\@window); } else{ #Even if there were no insertions, still want window in file for consistent start/end my @window=($Wstart,$Wend,$Wcount,$insertion," ","NA","NA","NA","NA"); return (\@window); } #Because count=0 (i.e. there were no insertion mutants in that window) } } } print "Start calculation: ",get_time(),"\n"; my $start=1; my $end=$size+1; my $windowNum=0; my @allWindows; #will be a 2D array containing all window info to be written into output file #IF A CUSTOM LIST OF START/END COORDINATES IS GIVEN THEN CREATE WINDOWS USING IT if ($custom){ open (CUST, '<', $custom); while(my $line=<CUST>){ $line=cleaner($line); my @cwind=split(",",$line); my $start=$cwind[0]; my $end=$cwind[1]; my $window=customWindow($start,$end); push @allWindows,$window; } close CUST; } else{ #if not custom list #WHILE LOOP TO CALL THE ONE WINDOW SUBROUTINE FOR CALCULATIONS===INCREMENTS START AND END VALUES OF THE WINDOW while ($end<=$last-$size){ #100,000bp-->9,950 windows--> only 8500 windows in csv because 0 my($window)=OneWindow($start,$end); push (@allWindows,$window); $start=$start+$step; $end=$end+$step; } print "End calculation: ",get_time(),"\n"; } #my $avgInsert=$totalInsert/$totalWindows; #print "Average number of insertions for $size base pair windows: $avgInsert\n"; #ESSENTIALS: Counting the number of TA sites in the genome and whether an insertion occurred there or not print "\n---------Assessing essentiality of genome region in each window--------\n\n"; my @sites; #First read fasta file into a string my $seqio = Bio::SeqIO->new(-file => $fasta, '-format' => 'Fasta'); my $prev; my $total=0; while(my $seq = $seqio->next_seq) { $fasta = $seq->seq; } #Get number of "TA" sites, regardless of insertion---this is just the fasta file my $x="TA"; my @c = $fasta =~ /$x/g; my $countTA = @c; #At what positions in the genome do TA sites occur? my $pos=0; my $countInsert=0; my @selector; #use this array to hold all 1 and 0 of whether insertion happened or not. #Go through all TA sites identified above and see if an insertion occurs there. #Push results onto two arrays a 2d array with the position and a 1D array with just the 0 or 1 my @unmatched; #hold all unmatched ta sites my @allTAsites; #2d array to hold all occurences of TA sites in genome my $unmatchedCount=0; my $offset=0; my $genPos = index($fasta,'TA',$offset); while (($genPos != -1) and ($pos<scalar @insertPos)) { #as long as the TA site is foun my $res=0; if ($genPos>$insertPos[$pos]){ push @unmatched,$insertPos[$pos]; $unmatchedCount++; $pos++; } if ($genPos==$insertPos[$pos]){ $res=1; $countInsert++; $pos++; } my @sites=($genPos,$res); push @selector,$res; #push the 0 or 1 onto the array @selector---going to use this to draw random sets for the null distribution push (@allTAsites,\@sites); $offset = $genPos + 1; $genPos = index($fasta, 'TA', $offset); $countTA++; } #my $FILE1 = "$run" . "allTAsites.txt"; my $FILE1 = $f1; open (ALL_TA, ">", $FILE1); foreach my $sit(@allTAsites){ foreach (@$sit){ print ALL_TA $_, "\t"; } printf ALL_TA "\n"; } close ALL_TA; my $FILE2 = $f2; #$run . "unmatched.txt"; unless(open UNM, ">", $FILE2){ die "\nUnable to create $FILE2:\n$!"; } foreach (@unmatched){ print UNM $_, "\n"; } close UNM; print "\n\nTotal of unmatched insertions: $unmatchedCount\n"; print "See unmatched.txt for genome indices of unmatched sites\n"; print "See allTAsites.txt for details on all TA sites and insertions\n\n"; #print "\nTotal: $countInsert insertions in $countTA TA sites.\n"; #-------------------------------------------------------------------------------------------------- #Now, have an array for each TA site and if an insertion occurred there. #So per site @sites=(position, 0 or 1 for insertion). #Next step, create null distribution of 10,000 random sets with same number of #TA sites as the window and then calculate p-value #SUBROUTINE FOR MAKING THE NULL DISTRIBUTION SPECIFIC TO THE WINDOW #DISTRIBUTION'S STANDARD DEVIATION my $N=10000; sub mean { if (scalar @_ ==0){ return 0; } my $sum; grep { $sum += $_ } @_; return $sum/@_; } sub stdev{ my @data = @{shift @_}; my $average=shift @_; my $sqtotal = 0; foreach(@data) { $sqtotal += ($average-$_) ** 2; } my $std = ($sqtotal / ($N-1)) ** 0.5; return $std; } #MAKE LIBRARY OF NULL DISTRIBUTIONS: print "Making a library of null distributions.\n For information on distribution library, see nullDist.txt\n"; my @distLib; my $FILE3 = $f3; #$run . "nullDist.txt"; unless(open DIST, ">", $FILE3){ die "\nUnable to create $FILE3:\n$!"; } printf DIST "Sites\tSize(n)\tMin\tMax\tMean\tstdev\tvar\tMinPval\n"; #Loop once for each distribution in the library #(i.e. distribution of 10,000 sites each with 35 TA sites, then a distribution of 10,000 sites each with 36 TA sites, etc) sub pvalue{ #takes in window count average (countAvg) and number of TAsites and makes a null distribution to calculate the pvalue, which it returns my $mean=shift@_; my $TAsites=shift@_; my $N=10000; my @specDist=@{$distLib[$TAsites-1]}; my $rank= binsearch_pos { $a cmp $b } $mean,@specDist; my $i=$rank; while ($i<scalar(@specDist)-1 and $specDist[$i+1]==$specDist[$rank]){ $i++; } $rank=$i; my $pval=$rank/$N; #calculate pval as rank/N return $pval; } for (my $sitez=1; $sitez<=$max;$sitez++){ #print "In the first for loop to make a null distribution\n"; my @unsorted; my $count=0; my $sum=0; for (my $i=1; $i<=$N; $i++){ my @random_set = rand_set( set => \@selector, size => $sitez); my $setMean=mean(@random_set); push (@unsorted, $setMean); #print "$i:\t", "$setMean\n"; $count++; $sum+=$setMean; } my @nullDist= sort { $a <=> $b } @unsorted; my $nullMean=sprintf("$round",($sum/$count)); my $standev =sprintf("$round",stdev(\@nullDist, $nullMean)); my $variance=sprintf("$round",($standev*$standev)); my $min=sprintf("$round",$nullDist[0]); my $max=sprintf("$round",$nullDist[scalar @nullDist-1]); my $setScalar=scalar @nullDist; push (@distLib,\@nullDist); my $minp=pvalue(0,$sitez); printf DIST "$sitez\t$N\t$min\t$max\t$nullMean\t$standev\t$variance\t$minp\n"; } close DIST; #SUBROUTINE TO CALCULATE THE P-VALUE OF THE WINDOW INSERTIONS AGAINST THE NULL DISTRIBUTION #--------------------------------------------------------------------------------------------- #Now we have an array called @allTAsites which contains every TAsite position #with a 0 next to it for "no insertion". #Now just need to replace 0 with 1 if there IS and insertion at that site my @newWindows=(); my $printNum=0; #my $allWindows=\@allWindows; for (my $i=0;$i<scalar @allWindows;$i++){ my @win=@{$allWindows[$i]}; my $starter=$win[0]; my $ender=$win[1]; my $insertions=$win[3]; my $mutcount=$win[2]; #print "num $printNum -->\tStart pos: $starter\tEnd pos: $ender\n"; #How many TA sites are there from $genome[$start] to $genome[$end]? my $seq = substr($fasta,$starter-1,$size); #start-1 becase $start and $end are positions in genome starting at 1,2,3.... substr(string,start, length) needs indexes my $ta="TA"; my @c = $seq =~ /$ta/g; my $TAsites = scalar @c; my $avgInsert=sprintf("$round",$insertions/$TAsites); my $pval=pvalue($avgInsert,$TAsites); #reorder so things make more sense: all fitness related calcs together and all sig together my @new=($starter,$ender,$mutcount,$insertions,$TAsites,$avgInsert,$pval,$win[5],$win[6],$win[7],$win[8],$win[4]); push (@newWindows,\@new); $printNum++; } print "End p-value calculation: ",get_time(),"\n\n"; #print "This is the TA count: $count\nTotal genome size is: $total\n\n"; #CALCULATE THE ABSOLUTE DIFFERENCE BETWEEN REGION'S MEAN FITNESS AND AVERAGE MEAN FITNESS ### For all windows, add a field that has the difference between that window's mean fitness and the #average mean fitness for all of the windows my @allFits = map $_->[7], @newWindows; my $meanFit=mean(@allFits); print "Average fitness for all windows: ",$meanFit,"\n"; my @expWindows=(); foreach (@newWindows){ my @entry=@{$_}; my $mean=$entry[7]; my $absdev=sprintf("$round",$mean-$meanFit); push (@entry,$absdev); push @expWindows,\@entry; } @newWindows=@expWindows; #MAKE OUTPUT CSV FILE WITH ESSENTIAL WINDOW CALCULATIONS print "Start csv ouput file creation: ",get_time(),"\n"; open CSV, '>', $f3; # $run . "slidingWindows.csv" or die "Cannot open sliding window output file"; my @csvHeader= ("start", "end","mutants","insertions","TA_sites","ratio","p-value","average", "variance","stdev","stderr","genes","fit-mean"); my $header=join(",",@csvHeader); print CSV $header,"\n"; foreach my $winLine(@newWindows){ my $string=join(",",@{$winLine}); print CSV $string, "\n"; } close CSV; print "End csv ouput file creation: ",get_time(),"\n\n"; my $in = Bio::SeqIO->new(-file=>$ref, -format => 'genbank'); my $refseq = $in->next_seq; my $refname = $refseq->id; #MAKE essentials WIG FILE---->later make BW--->IGV sub printwig{ print "Start wig file creation: ",get_time(),"\n"; my $in = Bio::SeqIO->new(-file=>$ref, -format => 'genbank'); my $refseq = $in->next_seq; my $refname = $refseq->id; my $ewig="essentialWindows.wig"; my $FILE5 = $run . $ewig; open eWIG, ">", $f5; #$FILE5"; printf eWIG "track type=wiggle_0 name=$ewig\n"; printf eWIG "variableStep chrom=$refname\n"; foreach my $wigLine(@newWindows){ my @wigFields=@$wigLine; my $position=$wigFields[0]; while ($position<=$wigFields[1]){ printf eWIG $position, " ",$wigFields[7],"\n"; #7 for pvalue, but 2 for fitness!!!!!! $position=$position+1; } } close eWIG; print "End wig file creation: ",get_time(),"\n\n"; print "If this wig file needs to be converted to a Big Wig, then use USCS program wigToBigWig in terminal: \n \t./wigToBigWig gview/12G.wig organism.txt BigWig/output.bw \n\n"; } #OUTPUT REGULAR SLIDING INFORMATION WITHOUT ESSENTIALS CALCULATIONS (P-VALUES) #MAKE OUTPUT CSV FILE WITH FITNESS WINDOW CALCULATIONS print "Start csv ouput file creation: ",get_time(),"\n"; open FITFILE, ">", $f4; #$run . "fitWindows.csv" or die "Failed to open file"; my @head = ("start", "end","fitness","mutant_count", "\n"); print FITFILE join(",",@head); #header foreach my $winLine(@allWindows){ print FITFILE join(",", @$winLine),"\n"; } close FITFILE; print "End csv ouput file creation: ",get_time(),"\n\n"; #MAKE WIG FILE---->later make BW--->IGV print "Start wig file creation: ",get_time(),"\n"; my $fin = Bio::SeqIO->new(-file=>$ref, -format => 'genbank'); $refseq = $fin->next_seq; $refname = $refseq->id; my $fwig="fitWindows.wig"; open fWIG, ">", $f5; #$run . $fwig; print fWIG "track type=wiggle_0 name=$fwig\n"; print fWIG "variableStep chrom=$refname\n"; foreach my $wigLine(@allWindows){ my @wigFields=@$wigLine; my $pos=$wigFields[0]; my $fit=$wigFields[7]; print fWIG $pos," ",$fit,"\n"; } close fWIG; print "End wig file creation: ",get_time(),"\n\n"; print "If this wig file needs to be converted to a Big Wig,\n"; print " then use USCS program wigToBigWig in terminal: \n"; print "\t./wigToBigWig gview/12G.wig organism.txt BigWig/output.bw \n\n"; #GOING TO MAKE A TEXT FILE FOR BED CONVERSION TO BIGBED, NEED CHROM # IN COLUMN 0 my @cummulative; #MAKING A REGULAR TEXT FILE fields: [chrom,start,end,fitness,count] print "Start text file creation time: ",get_time(),"\n"; open my $TXT, '>', $f6; #$run . "fitWindows.txt" or die $!; print $TXT ("start","end","W","mutant_count","insertion_count\n"); print $TXT (join("\t",@$_),"\n") for @allWindows; close $TXT; print "End text file creation: ",get_time(),"\n\n";