comparison regionFitness.xml @ 9:0ba0852ed5ad draft

Uploaded
author antmarge
date Tue, 28 Mar 2017 14:03:51 -0400
parents
children
comparison
equal deleted inserted replaced
8:e23da2ebbc5b 9:0ba0852ed5ad
1 <tool id="regionFitness" name="Region Fitness" version="0.1.0">
2
3 <!-- Margaret Antonio 17.01.08 -->
4
5 <requirements>
6 <!--<requirement type="set_environment">LINKYX_PATH</requirement>-->
7 <requirement type="package" version="5.18.1">perl</requirement>
8 <requirement type="package" version="0.25">perl_list_binarysearch</requirement>
9 <requirement type="package" version="2.45">perl_getopt_long</requirement>
10 <requirement type="package" version="0.12">perl_data_random</requirement>
11 <requirement type="package" version="1.6.922">bioperl</requirement>
12 </requirements>
13
14 <description>assess fitness effect of mutations in a region (sliding window or custom)</description>
15
16 <command interpreter="perl">
17 regionFitness.pl
18 -f $fasta
19 -r $ref_genome
20 -c $cutoff
21 -n $run
22 -m $max
23 #if $region.define == "c":
24 -u $region.custom
25 #end if
26 #if $region.define == "s":
27 -size $region.size
28 -step $region.step
29 #end if
30 #if $weight.algorithm == "yes":
31 -w
32 -wc $weight.ceiling
33 #end if
34
35 -f1 $allTAsites
36 -f2 $nullDist
37 -f3 $slidingWindows
38 -f4 $fitWindowscsv
39 -f5 $fitWindowswig
40 -f6 $fitWindowstxt
41
42 $input
43 #for $a in $additionalcsv
44 ${a.input2}
45 #end for
46
47 </command>
48
49 <inputs>
50 <param name="input" type="data" label="CSV Fitness File(s)"/>
51 <repeat name="additionalcsv" title="Additional csv fitness file(s)">
52 <param name="input2" type="data" label="Select" />
53 </repeat>
54 <param format="fasta" name="fasta" type="data" label="Fasta file"/>
55 <param name="ref_genome" type="data" label="GenBank reference genome"/>
56
57 <conditional name="region">
58 <param name="define" type="select" label="Define regions: custom or sliding?">
59 <option value="s">Sliding Windows</option>
60 <option value="c">Custom</option>
61 </param>
62 <when value="s">
63 <param name="size" type="integer" value="500" label="Sliding window size"/>
64 <param name="step" type="integer" value="10" label="Sliding window intervals"/>
65 </when>
66 <when value="c">
67 <param name="custom" type="data" label="File with custom regions" />
68 </when>
69 </conditional>
70
71 <conditional name="weight">
72 <param name="algorithm" type="select" label="Use weighted algorithms?">
73 <option value="yes">Yes</option>
74 <option value="no">No</option>
75 </param>
76 <when value = "yes">
77 <param name="ceiling" type="integer" value="50" label="Weight ceiling"/>
78 </when>
79 </conditional>
80
81 <param name="cutoff" type="integer" value="10" label="Cutoff"/>
82 <param name="max" type="integer" value="100" label="Highest # insertions in region"/>
83 <param name="run" type="text" value="run1" label="Name of run (will be appended to output files)"/>
84
85 </inputs>
86
87 <outputs>
88 <data format="txt" name="allTAsites" label="${run}_allTAsites" />
89 <data format="txt" name="unmatched" label="${run}_unmatched"/>
90 <data format="txt" name="nullDist" label="${run}_nullDist"/>
91 <data format="csv" name="slidingWindows" label="${run}_slidingWindows"/>
92 <data format="csv" name="fitWindowscsv" label="${run}_fitWindows_csv"/>
93 <data format="wig" name="fitWindowswig" label="${run}_fitWindows_wig"/>
94 <data format="txt" name="fitWindowstxt" label="${run}_fitWindows_txt"/>
95 </outputs>
96
97 <help>
98 **Tool Description**
99
100 This tool takes a non-gene-centric approach to assessing importance of regions (user defined or sliding windows) to organismal fitness.
101
102 **Options**
103
104 *The csv fitness file(s)*: These are the csv (comma separated values) files containing the fitness values that will be used in downstream analyses. Since they should have been produced by the "Calculate Fitness" tool, each line besides the header should represent the following information for an insertion location: position,strand,count_1,count_2,ratio,mt_freq_t1,mt_freq_t2,pop_freq_t1,pop_freq_t2,gene,D,W,nW
105
106 *Fasta file*: the fasta file for the genome of the organism
107
108 *GenBank reference genome*: the reference genome of whatever model you're working with, which needs to be in standard genbank format. For more on that format see the genbank website.
109
110 *Region definition*: Define the regions by a custom file or by sliding windows at a set size and step. If using the custom region option, provide a tab-delimited file with start and end coordinates of each region, one region per line. If choosing the sliding window option, specify the size of the window over which assessments will be made and the step, how much the sliding window increments at each asssessment.
111
112 *Max*: The maximum number of insertions expected in a window. This is used for creating a null distrubtion upon which the significance of regional essentiality is assessed. An error will be produced if the maximum number of insertions is lower than the actual. Run the Data Overview tool to find the real max number of insertions in a window. This option will be removed later by being data-determined.
113
114 *Weight ceiling*: This value lets you set a weight ceiling for the weights of fitness values. It's only relevant if you're using weighted algorithms.
115
116 *Cutoff*: This value lets you ignore the fitness scores of any insertion locations with an average count (the number of counts from t1 and t2 divided by 2) less than it.
117
118 *Run name*: The name of the run, to be appended to the end of every output file.
119
120
121 </help>
122
123 </tool>