annotate calc_fitness.xml @ 2:4a08992142ee draft

Uploaded
author kaymccoy
date Thu, 11 Aug 2016 17:41:20 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
1 <tool id="calc_fitness" name="Calculate Fitnesses">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
2 <description>of transposon insertion locations</description>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
3 <requirements>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
4 <requirement type="package" version="1.64">biopython</requirement>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
5 </requirements>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
6 <command interpreter="python">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
7 calc_fitness.py
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
8 -ef $ef
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
9 -el $el
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
10 -wig $output3
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
11 -t1 $t1
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
12 -t2 $t2
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
13 -ref $ref
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
14 -out $output
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
15 -out2 $output2
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
16 -expansion $expansion
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
17 -maxweight $maxweight
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
18 -cutoff $cutoff
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
19 -cutoff2 $cutoff2
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
20 -strand $strand
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
21 #if $normalization.calculations == "yes":
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
22 -normalize $normalization.genes
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
23 #end if
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
24 #if $multiply.choice == "yes":
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
25 -multiply $multiply.factor
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
26 #end if
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
27 #if $reads.uncol == "yes":
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
28 -uncol 1
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
29 #end if
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
30 #if $reads1.choice == "yes":
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
31 -reads1 $reads1.number
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
32 #end if
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
33 #if $reads2.choice == "yes":
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
34 -reads1 $read1.number
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
35 #end if
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
36 </command>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
37 <inputs>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
38 <param name="t1" type="data" label="Map files from t1"/>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
39 <param name="t2" type="data" label="Map files from t2"/>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
40 <param name="ref" type="data" label="GenBank reference genome"/>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
41 <conditional name="normalization">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
42 <param name="calculations" type="select" label="Normalize fitness calculations?">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
43 <option value="no">No</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
44 <option value="yes">Yes</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
45 </param>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
46 <when value="no">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
47 <!-- do nothing -->
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
48 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
49 <when value="yes">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
50 <param name="genes" type="data" label="Genes to normalize by" />
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
51 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
52 </conditional>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
53 <param name="strand" type="select" label="Use reads from which strands?">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
54 <option value="both">both</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
55 <option value="+">Watson (+)</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
56 <option value="-">Crick (-)</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
57 </param>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
58 <param name="expansion" type="float" value="250" label="Expansion factor"/>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
59 <param name="cutoff" type="float" value="0.0" label="Cutoff"/>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
60 <param name="cutoff2" type="float" value="0.0" label="Cutoff2"/>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
61 <param name="ef" type="float" value="0.0" label="Exclude first %"/>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
62 <param name="el" type="float" value="0.0" label="Exclude last %"/>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
63 <param name="maxweight" type="float" value="75" label="Maximum weight of a transposon gene in normalization calculations"/>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
64 <conditional name="multiply">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
65 <param name="choice" type="select" label="Multiply fitness scores by a certain value?">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
66 <option value="no">No</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
67 <option value="yes">Yes</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
68 </param>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
69 <when value="no">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
70 <!-- do nothing -->
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
71 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
72 <when value="yes">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
73 <param name="factor" type="float" value="0.0" label="Multiply by" />
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
74 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
75 </conditional>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
76 <conditional name="reads">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
77 <param name="uncol" type="select" label="Were reads uncollapsed when mapped?">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
78 <option value="no">No</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
79 <option value="yes">Yes</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
80 </param>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
81 <when value="no">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
82 <!-- do nothing -->
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
83 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
84 <when value="yes">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
85 <!-- do nothing -->
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
86 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
87 </conditional>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
88 <conditional name="reads1">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
89 <param name="choice" type="select" label="Set reads1 manually?">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
90 <option value="no">No</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
91 <option value="yes">Yes</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
92 </param>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
93 <when value="no">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
94 <!-- do nothing -->
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
95 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
96 <when value="yes">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
97 <param name="number" type="float" value="0.0" label="Reads1" />
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
98 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
99 </conditional>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
100 <conditional name="reads2">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
101 <param name="choice" type="select" label="Set reads2 manually?">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
102 <option value="no">No</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
103 <option value="yes">Yes</option>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
104 </param>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
105 <when value="no">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
106 <!-- do nothing -->
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
107 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
108 <when value="yes">
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
109 <param name="number" type="float" value="0.0" label="Reads2" />
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
110 </when>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
111 </conditional>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
112 </inputs>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
113 <outputs>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
114 <data format="csv" name="output" />
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
115 <data format="txt" name="output2" />
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
116 <data format="wig" name="output3" />
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
117 </outputs>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
118 <help>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
119
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
120 **What it does**
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
121
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
122 This tool calculates the fitness values of transposon insertion mutations generated by Tn-Seq, by analyzing Illumina sequencing reads from t1 and t2.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
123
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
124 **The options explained**
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
125
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
126 Map files from t1: a bowtie mapfile containing the mapped flanking reads from t1
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
127
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
128 Map files from t2: a bowtie mapfile containing the mapped flanking reads from t2
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
129
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
130 GenBank reference genome: the reference genome of whatever model you're working with, which needs to be in standard genbank format. For more on that format see the genbank website.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
131
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
132 Normalizing fitness calculations: our normalization relies on the fitness scores of insertions within transposon genes, which ought to have a neutral fitness of 1. The file of normalization genes should be formatted so that each line is a single gene loci like "SP_0017"
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
133
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
134 Using reads from certain strands: typically users will use reads from both strands, but this lets you do things like comparing reads between strands.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
135
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
136 Expansion factor: the expansion factor of the bacteria culture you got your reads from - this is something you should measure when you're growing up the bacteria from t1 to t2. Using the default expansion factor of 250 will give you very rough fitness calculations and so it's not recommended.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
137
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
138 Cutoff: the cutoff for all genes; insertion locations with an average count less than this number will be disregarded, as insertion locations with a low number of reads can have inaccurate fitnesses calculated, for the same reason studies with low sample sizes can be inaccurate.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
139
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
140 Cutoff2: the cutoff for the normalization genes; only has an effect if larger than cutoff
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
141
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
142 Exclude first %: insertions in the very beginning of genes sometimes don't actually interfere with their function, and so you can exclude insertions from the first % of a gene from being counted as within those genes. This mostly affects the aggregate calculations downstream.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
143
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
144 Exclude last %: similarly insertions in the very end of genes sometimes don't actually interfere with their function, and so you can exclude insertions from the last % of a gene. Also mostly affects the aggregate calculations downstream.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
145
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
146 Maximum weight of a transposon gene in normalization calculations: in the normalization calculations, fitnesses within transposon genes are weighted according to their number of reads, as fitnesses calculated from more reads tend to be more accurate. However, to keep those fitnesses with huge numbers of reads from vastly outweighing the others, you can limit the max weight.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
147
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
148 Multiplying fitness scores by a certain value: what it says on the lid; you can multiply the normalized fitness scores by a certain value. This can be helpful for genetic interaction screens, where Tn-seq is performed as usual except there's one background knockout all the mutants share. This is because a combination of independent mutations should have a fitness value that's equal to their individual fitness values multipled, but related mutations will deviate from that; to find those deviations you'd multiply all the fitness values from mutants from a normal library by the fitness of the background knockout and compare that to the fitness values found from the knockout library!
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
149
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
150 Setting reads1 / reads2 manually: these are related to the correction factor calculations; it's not recommended that you set them manually. If this number is too low it will cause a mathematical error and Calculate Fitness will not work.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
151
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
152 Output: the output is a csv (comma separated values) file containing the fitness values calculated. Each line besides the header will represent the following information for an insertion location: position, strand, count_1, count_2, ratio, mt_freq_t1, mt_freq_t2, pop_freq_t1, pop_freq_t2, gene, D, W, nW
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
153
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
154 Output2: a txt file containing the percent blanks to be used in the Aggregate tool for normalization
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
155
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
156 Output3: a wig file that can be used for visualization of the fitness values; each line besides the header will be an insertion location and its (possibly normalized) fitness.
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
157
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
158 </help>
4a08992142ee Uploaded
kaymccoy
parents:
diff changeset
159 </tool>