# HG changeset patch # User antmarge # Date 1490724214 14400 # Node ID 34e7e6ea1c17eedf4edd950bceaf07359115e660 # Parent 48c8d3ed441aade7f3d139de33339995a5973f0a Deleted selected files diff -r 48c8d3ed441a -r 34e7e6ea1c17 regionFitness.xml --- a/regionFitness.xml Tue Mar 28 14:03:22 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ - - - - - - - perl - perl_list_binarysearch - perl_getopt_long - perl_data_random - bioperl - - - assess fitness effect of mutations in a region (sliding window or custom) - - - regionFitness.pl - -f $fasta - -r $ref_genome - -c $cutoff - -n $run - -m $max - #if $region.define == "c": - -u $region.custom - #end if - #if $region.define == "s": - -size $region.size - -step $region.step - #end if - #if $weight.algorithm == "yes": - -w - -wc $weight.ceiling - #end if - - -f1 $allTAsites - -f2 $nullDist - -f3 $slidingWindows - -f4 $fitWindowscsv - -f5 $fitWindowswig - -f6 $fitWindowstxt - - $input - #for $a in $additionalcsv - ${a.input2} - #end for - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - **Tool Description** - - This tool takes a non-gene-centric approach to assessing importance of regions (user defined or sliding windows) to organismal fitness. - - **Options** - - *The csv fitness file(s)*: These are the csv (comma separated values) files containing the fitness values that will be used in downstream analyses. Since they should have been produced by the "Calculate Fitness" tool, each line besides the header should represent the following information for an insertion location: position,strand,count_1,count_2,ratio,mt_freq_t1,mt_freq_t2,pop_freq_t1,pop_freq_t2,gene,D,W,nW - - *Fasta file*: the fasta file for the genome of the organism - - *GenBank reference genome*: the reference genome of whatever model you're working with, which needs to be in standard genbank format. For more on that format see the genbank website. - - *Region definition*: Define the regions by a custom file or by sliding windows at a set size and step. If using the custom region option, provide a tab-delimited file with start and end coordinates of each region, one region per line. If choosing the sliding window option, specify the size of the window over which assessments will be made and the step, how much the sliding window increments at each asssessment. - - *Max*: The maximum number of insertions expected in a window. This is used for creating a null distrubtion upon which the significance of regional essentiality is assessed. An error will be produced if the maximum number of insertions is lower than the actual. Run the Data Overview tool to find the real max number of insertions in a window. This option will be removed later by being data-determined. - - *Weight ceiling*: This value lets you set a weight ceiling for the weights of fitness values. It's only relevant if you're using weighted algorithms. - - *Cutoff*: This value lets you ignore the fitness scores of any insertion locations with an average count (the number of counts from t1 and t2 divided by 2) less than it. - - *Run name*: The name of the run, to be appended to the end of every output file. - - - - -