annotate aggregate.xml @ 10:4ea9c69aa74d draft default tip

Uploaded
author kaymccoy
date Mon, 01 May 2017 22:56:46 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
1 <tool id="aggregate" name="Aggregate">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
2 <description>fitness calculations by gene</description>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
3 <requirements>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
4 <requirement type="package" version="1.64">biopython</requirement>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
5 </requirements>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
6 <command interpreter="python">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
7 aggregate.py
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
8 #if $mark.certain == "yes":
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
9 -m $mark.genes
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
10 #end if
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
11 #if $weighted.algorithms == "yes":
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
12 -w 1
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
13 #end if
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
14 -x $cutoff
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
15 -l $weightceiling
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
16 #if $blank.count == "yes":
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
17 -b $blank.custom_blanks
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
18 #end if
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
19 #if $blank.count == "no":
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
20 -f $blank.txt_blanks
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
21 #end if
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
22 -c $ref
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
23 -o $output
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
24 $input
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
25 #for $a in $additionalcsv
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
26 ${a.input2}
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
27 #end for
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
28 </command>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
29 <inputs>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
30 <param name="input" type="data" label="csv fitness file"/>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
31 <repeat name="additionalcsv" title="Additional csv fitness file(s)">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
32 <param name="input2" type="data" label="Select" />
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
33 </repeat>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
34 <param name="ref" type="data" label="GenBank reference genome"/>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
35 <conditional name="mark">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
36 <param name="certain" type="select" label="Mark certain genes?">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
37 <option value="no">No</option>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
38 <option value="yes">Yes</option>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
39 </param>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
40 <when value="no">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
41 <!-- do nothing -->
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
42 </when>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
43 <when value="yes">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
44 <param name="genes" type="data" label="Genes to mark" />
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
45 </when>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
46 </conditional>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
47 <conditional name="weighted">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
48 <param name="algorithms" type="select" label="Use weighted algorithms?">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
49 <option value="no">No</option>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
50 <option value="yes">Yes</option>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
51 </param>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
52 <when value="-w 1 "/>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
53 <when value=""/>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
54 </conditional>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
55 <param name="weightceiling" type="float" value="50.0" label="Weight ceiling"/>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
56 <param name="cutoff" type="float" value="10.0" label="Cutoff3"/>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
57 <conditional name="blank">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
58 <param name="count" type="select" label="Enter custom bottleneck correction value?">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
59 <option value="no">No</option>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
60 <option value="yes">Yes</option>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
61 </param>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
62 <when value="no">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
63 <param name="txt_blanks" type="data" label="txt output from Calc_fit"/>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
64 </when>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
65 <when value="yes">
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
66 <param name="custom_blanks" type="float" value="0.0" label="bottleneck value (a number from 0.0 to 1.0)"/>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
67 </when>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
68 </conditional>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
69 </inputs>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
70 <outputs>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
71 <data name="output" format="csv"/>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
72 </outputs>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
73 <help>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
74
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
75 **What it does**
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
76
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
77 This tool calculates the aggregate fitness values of mutations by gene.
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
78
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
79 **The options explained**
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
80
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
81 The csv fitness file(s): These are the csv (comma separated values) files containing the fitness values you want to aggregate by gene. Since they should have been produced by the "Calculate Fitness" tool, each line besides the header should represent the following information for an insertion location: position,strand,count_1,count_2,ratio,mt_freq_t1,mt_freq_t2,pop_freq_t1,pop_freq_t2,gene,D,W,nW
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
82
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
83 GenBank reference genome: the reference genome of whatever model you're working with, which needs to be in standard genbank format. For more on that format see the genbank website.
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
84
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
85 Marking certain genes: If you chose to mark certain genes, those genes will have an "M" under the M column of the resulting aggregate file.
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
86
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
87 Using weighted algorithms: Recommended. If you chose to use weighted algorithms, scores will be weighted by the number of reads their insertion location has, as insertions with more reads tend to be more accurate.
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
88
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
89 Weight ceiling: This value lets you set a weight ceiling for the weights of fitness values. It's only relevant if you're using weighted algorithms.
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
90
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
91 Cutoff3: This value lets you ignore the fitness scores of any insertion locations with an average count (the number of counts from t1 and t2 divided by 2) less than it.
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
92
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
93 Bottleneck value: The percentage of insertions randomly lost, which will be discounted for all genes (for example, 20% would be entered as 0.20; default 0 if entered by hand). You can just use the blank % calculated from the normalization genes by calc_fit by entering its txt output file
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
94
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
95 The name of your output file: self-explanatory. Remember to have it end in ".csv".
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
96
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
97 **Additional notes**
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
98
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
99 The output file should have each line (besides the header) represent the following information for a particular gene: locus,mean,var,sd,se,gene,Total,Blank,Not Blank,Blank Removed,M
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
100
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
101 </help>
4ea9c69aa74d Uploaded
kaymccoy
parents:
diff changeset
102 </tool>