annotate calc_fitness.xml @ 10:00a4fcfdef53 draft

Uploaded
author kaymccoy
date Sun, 11 Dec 2016 17:02:36 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
1 <tool id="calc_fitness" name="Calculate Fitness">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
2 <description>of transposon insertion locations</description>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
3 <requirements>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
4 <requirement type="package" version="1.64">biopython</requirement>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
5 </requirements>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
6 <command interpreter="python">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
7 calc_fitness.py
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
8 -ef $ef
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
9 -el $el
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
10 -wig $output3
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
11 -t1 $t1
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
12 -t2 $t2
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
13 -ref $ref
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
14 -out $output
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
15 -out2 $output2
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
16 -expansion $expansion
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
17 -maxweight $maxweight
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
18 -cutoff $cutoff
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
19 -cutoff2 $cutoff2
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
20 -strand $strand
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
21 #if $normalization.calculations == "yes":
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
22 -normalize $normalization.genes
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
23 #end if
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
24 #if $multiply.choice == "yes":
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
25 -multiply $multiply.factor
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
26 #end if
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
27 #if $reads.uncol == "yes":
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
28 -uncol 1
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
29 #end if
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
30 #if $bottle.all == "yes":
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
31 -b 1
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
32 #end if
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
33 </command>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
34 <inputs>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
35 <param name="t1" type="data" label="Map files from t1"/>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
36 <param name="t2" type="data" label="Map files from t2"/>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
37 <param name="ref" type="data" label="GenBank reference genome"/>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
38 <conditional name="normalization">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
39 <param name="calculations" type="select" label="Normalize fitness calculations?">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
40 <option value="no">No</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
41 <option value="yes">Yes</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
42 </param>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
43 <when value="no">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
44 <!-- do nothing -->
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
45 </when>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
46 <when value="yes">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
47 <param name="genes" type="data" label="Genes to normalize by" />
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
48 </when>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
49 </conditional>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
50 <param name="strand" type="select" label="Use reads from which strands?">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
51 <option value="both">both</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
52 <option value="+">Watson (+)</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
53 <option value="-">Crick (-)</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
54 </param>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
55 <param name="expansion" type="float" value="250" label="Expansion factor"/>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
56 <param name="cutoff" type="float" value="0.0" label="Cutoff1"/>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
57 <param name="cutoff2" type="float" value="0.0" label="Cutoff2"/>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
58 <param name="ef" type="float" value="0.0" label="Exclude first %"/>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
59 <param name="el" type="float" value="0.0" label="Exclude last %"/>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
60 <param name="maxweight" type="float" value="75" label="Maximum weight of a transposon gene in normalization calculations"/>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
61 <conditional name="multiply">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
62 <param name="choice" type="select" label="Multiply fitness scores by a certain value?">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
63 <option value="no">No</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
64 <option value="yes">Yes</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
65 </param>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
66 <when value="no">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
67 <!-- do nothing -->
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
68 </when>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
69 <when value="yes">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
70 <param name="factor" type="float" value="0.0" label="Multiply by" />
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
71 </when>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
72 </conditional>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
73 <conditional name="bottle">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
74 <param name="all" type="select" label="Calculate bottleneck value from all genes (rather than only normalization genes)?">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
75 <option value="no">No</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
76 <option value="yes">Yes</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
77 </param>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
78 <when value="no">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
79 <!-- do nothing -->
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
80 </when>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
81 <when value="yes">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
82 <!-- do nothing -->
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
83 </when>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
84 </conditional>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
85 <conditional name="reads">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
86 <param name="uncol" type="select" label="Were reads uncollapsed when mapped?">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
87 <option value="no">No</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
88 <option value="yes">Yes</option>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
89 </param>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
90 <when value="no">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
91 <!-- do nothing -->
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
92 </when>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
93 <when value="yes">
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
94 <!-- do nothing -->
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
95 </when>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
96 </conditional>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
97 </inputs>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
98 <outputs>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
99 <data format="csv" name="output" />
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
100 <data format="txt" name="output2" />
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
101 <data format="wig" name="output3" />
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
102 </outputs>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
103 <help>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
104
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
105 **What it does**
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
106
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
107 This tool calculates the fitness values of transposon insertion mutations generated by Tn-Seq, by analyzing Illumina sequencing reads from t1 and t2.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
108
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
109 **The options explained**
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
110
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
111 Map files from t1: a bowtie mapfile containing the mapped flanking reads from t1
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
112
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
113 Map files from t2: a bowtie mapfile containing the mapped flanking reads from t2
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
114
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
115 GenBank reference genome: the reference genome of whatever model you're working with, which needs to be in standard genbank format. For more on that format see the genbank website.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
116
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
117 Normalizing fitness calculations: our normalization relies on the fitness scores of insertions within transposon genes, which ought to have a neutral fitness of 1. The file of normalization genes should be formatted so that each line is a single gene loci like "SP_0017"
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
118
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
119 Expansion factor: the expansion factor of the bacteria culture you got your reads from - this is something you should measure when you're growing up the bacteria from t1 to t2. Using the default expansion factor of 250 will give you very rough fitness calculations and so it's not recommended.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
120
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
121 Cutoff1: the cutoff for all genes; insertion locations with an average count less than this number will be disregarded, as insertion locations with a low number of reads can have inaccurate fitnesses calculated, for the same reason studies with low sample sizes can be inaccurate.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
122
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
123 Cutoff2: the cutoff for the normalization genes; only has an effect if larger than cutoff
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
124
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
125 Exclude first %: insertions in the very beginning of genes sometimes don't actually interfere with their function, and so you can exclude insertions from the first % of a gene from being counted as within those genes. This mostly affects the aggregate calculations downstream.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
126
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
127 Exclude last %: similarly insertions in the very end of genes sometimes don't actually interfere with their function, and so you can exclude insertions from the last % of a gene. Also mostly affects the aggregate calculations downstream.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
128
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
129 Maximum weight of a transposon gene in normalization calculations: in the normalization calculations, fitnesses within transposon genes are weighted according to their number of reads, as fitnesses calculated from more reads tend to be more accurate. However, to keep those fitnesses with huge numbers of reads from vastly outweighing the others, you can limit the max weight.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
130
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
131 Multiplying fitness scores by a certain value: what it says on the lid; you can multiply the normalized fitness scores by a certain value. This can be helpful for genetic interaction screens, where Tn-seq is performed as usual except there's one background knockout all the mutants share. This is because a combination of independent mutations should have a fitness value that's equal to their individual fitness values multipled, but related mutations will deviate from that; to find those deviations you'd multiply all the fitness values from mutants from a normal library by the fitness of the background knockout and compare that to the fitness values found from the knockout library!
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
132
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
133 Calculate bottleneck value from all genes (rather than only normalization genes): Bottleneck value: The bottleneck value is an approximation of what percentage of insertions are randomly lost, either estimated from normalization genes or all genes.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
134
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
135 Were reads uncollapsed when mapped: only select "yes" if reads were never collapsed upstream.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
136
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
137 Output: the output is a csv (comma separated values) file containing the fitness values calculated. Each line besides the header will represent the following information for an insertion location: position, strand, count_1, count_2, ratio, mt_freq_t1, mt_freq_t2, pop_freq_t1, pop_freq_t2, gene, D, W, nW
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
138
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
139 Output2: a txt file containing the percent blanks and other info to be used in the Aggregate tool for normalization
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
140
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
141 Output3: a wig file that can be used for visualization of the fitness values; each line besides the header will be an insertion location and its (possibly normalized) fitness.
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
142
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
143 </help>
00a4fcfdef53 Uploaded
kaymccoy
parents:
diff changeset
144 </tool>