annotate calc_fitness.xml @ 11:1bc740b6462d draft default tip

Uploaded
author kaymccoy
date Sun, 06 Nov 2016 20:31:32 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
1 <tool id="calc_fitness" name="Calculate Fitness">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
2 <description>of transposon insertion locations</description>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
3 <requirements>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
4 <requirement type="package" version="1.64">biopython</requirement>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
5 </requirements>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
6 <command interpreter="python">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
7 calc_fitness.py
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
8 -ef $ef
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
9 -el $el
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
10 -wig $output3
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
11 -t1 $t1
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
12 -t2 $t2
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
13 -ref $ref
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
14 -out $output
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
15 -out2 $output2
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
16 -expansion $expansion
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
17 -maxweight $maxweight
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
18 -cutoff $cutoff
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
19 -cutoff2 $cutoff2
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
20 -strand $strand
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
21 #if $normalization.calculations == "yes":
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
22 -normalize $normalization.genes
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
23 #end if
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
24 #if $multiply.choice == "yes":
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
25 -multiply $multiply.factor
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
26 #end if
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
27 #if $reads.uncol == "yes":
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
28 -uncol 1
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
29 #end if
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
30 #if $bottle.all == "yes":
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
31 -b 1
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
32 #end if
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
33 </command>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
34 <inputs>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
35 <param name="t1" type="data" label="Map files from t1"/>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
36 <param name="t2" type="data" label="Map files from t2"/>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
37 <param name="ref" type="data" label="GenBank reference genome"/>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
38 <conditional name="normalization">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
39 <param name="calculations" type="select" label="Normalize fitness calculations?">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
40 <option value="no">No</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
41 <option value="yes">Yes</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
42 </param>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
43 <when value="no">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
44 <!-- do nothing -->
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
45 </when>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
46 <when value="yes">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
47 <param name="genes" type="data" label="Genes to normalize by" />
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
48 </when>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
49 </conditional>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
50 <param name="strand" type="select" label="Use reads from which strands?">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
51 <option value="both">both</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
52 <option value="+">Watson (+)</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
53 <option value="-">Crick (-)</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
54 </param>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
55 <param name="expansion" type="float" value="250" label="Expansion factor"/>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
56 <param name="cutoff" type="float" value="0.0" label="Cutoff1"/>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
57 <param name="cutoff2" type="float" value="0.0" label="Cutoff2"/>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
58 <param name="ef" type="float" value="0.0" label="Exclude first %"/>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
59 <param name="el" type="float" value="0.0" label="Exclude last %"/>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
60 <param name="maxweight" type="float" value="75" label="Maximum weight of a transposon gene in normalization calculations"/>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
61 <conditional name="multiply">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
62 <param name="choice" type="select" label="Multiply fitness scores by a certain value?">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
63 <option value="no">No</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
64 <option value="yes">Yes</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
65 </param>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
66 <when value="no">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
67 <!-- do nothing -->
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
68 </when>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
69 <when value="yes">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
70 <param name="factor" type="float" value="0.0" label="Multiply by" />
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
71 </when>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
72 </conditional>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
73 <conditional name="bottle">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
74 <param name="all" type="select" label="Calculate bottleneck value from all genes (rather than only normalization genes)?">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
75 <option value="no">No</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
76 <option value="yes">Yes</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
77 </param>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
78 <when value="no">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
79 <!-- do nothing -->
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
80 </when>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
81 <when value="yes">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
82 <!-- do nothing -->
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
83 </when>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
84 </conditional>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
85 <conditional name="reads">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
86 <param name="uncol" type="select" label="Were reads uncollapsed when mapped?">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
87 <option value="no">No</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
88 <option value="yes">Yes</option>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
89 </param>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
90 <when value="no">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
91 <!-- do nothing -->
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
92 </when>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
93 <when value="yes">
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
94 <!-- do nothing -->
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
95 </when>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
96 </conditional>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
97 </inputs>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
98 <outputs>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
99 <data format="csv" name="output" />
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
100 <data format="txt" name="output2" />
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
101 <data format="wig" name="output3" />
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
102 </outputs>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
103 <help>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
104
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
105 **What it does**
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
106
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
107 This tool calculates the fitness values of transposon insertion mutations generated by Tn-Seq, by analyzing Illumina sequencing reads from t1 and t2.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
108
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
109 **The options explained**
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
110
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
111 Map files from t1: a bowtie mapfile containing the mapped flanking reads from t1
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
112
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
113 Map files from t2: a bowtie mapfile containing the mapped flanking reads from t2
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
114
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
115 GenBank reference genome: the reference genome of whatever model you're working with, which needs to be in standard genbank format. For more on that format see the genbank website.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
116
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
117 Normalizing fitness calculations: our normalization relies on the fitness scores of insertions within transposon genes, which ought to have a neutral fitness of 1. The file of normalization genes should be formatted so that each line is a single gene loci like "SP_0017"
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
118
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
119 Using reads from certain strands: typically users will use reads from both strands, but this lets you do things like comparing reads between strands.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
120
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
121 Expansion factor: the expansion factor of the bacteria culture you got your reads from - this is something you should measure when you're growing up the bacteria from t1 to t2. Using the default expansion factor of 250 will give you very rough fitness calculations and so it's not recommended.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
122
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
123 Cutoff1: the cutoff for all genes; insertion locations with an average count less than this number will be disregarded, as insertion locations with a low number of reads can have inaccurate fitnesses calculated, for the same reason studies with low sample sizes can be inaccurate.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
124
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
125 Cutoff2: the cutoff for the normalization genes; only has an effect if larger than cutoff
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
126
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
127 Exclude first %: insertions in the very beginning of genes sometimes don't actually interfere with their function, and so you can exclude insertions from the first % of a gene from being counted as within those genes. This mostly affects the aggregate calculations downstream.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
128
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
129 Exclude last %: similarly insertions in the very end of genes sometimes don't actually interfere with their function, and so you can exclude insertions from the last % of a gene. Also mostly affects the aggregate calculations downstream.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
130
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
131 Maximum weight of a transposon gene in normalization calculations: in the normalization calculations, fitnesses within transposon genes are weighted according to their number of reads, as fitnesses calculated from more reads tend to be more accurate. However, to keep those fitnesses with huge numbers of reads from vastly outweighing the others, you can limit the max weight.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
132
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
133 Multiplying fitness scores by a certain value: what it says on the lid; you can multiply the normalized fitness scores by a certain value. This can be helpful for genetic interaction screens, where Tn-seq is performed as usual except there's one background knockout all the mutants share. This is because a combination of independent mutations should have a fitness value that's equal to their individual fitness values multipled, but related mutations will deviate from that; to find those deviations you'd multiply all the fitness values from mutants from a normal library by the fitness of the background knockout and compare that to the fitness values found from the knockout library!
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
134
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
135 Setting reads1 / reads2 manually: these are related to the correction factor calculations; it's not recommended that you set them manually. If this number is too low it will cause a mathematical error and Calculate Fitness will not work.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
136
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
137 Output: the output is a csv (comma separated values) file containing the fitness values calculated. Each line besides the header will represent the following information for an insertion location: position, strand, count_1, count_2, ratio, mt_freq_t1, mt_freq_t2, pop_freq_t1, pop_freq_t2, gene, D, W, nW
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
138
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
139 Output2: a txt file containing the percent blanks to be used in the Aggregate tool for normalization
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
140
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
141 Output3: a wig file that can be used for visualization of the fitness values; each line besides the header will be an insertion location and its (possibly normalized) fitness.
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
142
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
143 </help>
1bc740b6462d Uploaded
kaymccoy
parents:
diff changeset
144 </tool>