comparison phylogenetic_tree.xml @ 14:8ae67e9fb6ff

Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
author miller-lab
date Fri, 28 Sep 2012 11:35:56 -0400
parents
children f04f40a36cc8
comparison
equal deleted inserted replaced
13:fdb4240fb565 14:8ae67e9fb6ff
1 <tool id="gd_phylogenetic_tree" name="Phylogenetic Tree" version="1.0.0">
2 <description>: Show genetic relationships among individuals</description>
3
4 <command interpreter="python">
5 phylogenetic_tree.py "$input"
6 #if $individuals.choice == '0'
7 "all_individuals"
8 #else if $individuals.choice == '1'
9 "$p1_input"
10 #end if
11 "$output" "$output.files_path" "$minimum_coverage" "$minimum_quality"
12 #if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0')
13 "none"
14 #else
15 "$input.metadata.dbkey"
16 #end if
17 "$data_source"
18 #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style])
19 #if $draw_tree_options == ''
20 ""
21 #else
22 "-$draw_tree_options"
23 #end if
24 #for $individual_name, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
25 #set $arg = '%s:%s' % ($individual_col, $individual_name)
26 "$arg"
27 #end for
28 </command>
29
30 <inputs>
31 <param name="input" type="data" format="gd_snp" label="SNP dataset" />
32
33 <conditional name="individuals">
34 <param name="choice" type="select" label="Individuals">
35 <option value="0" selected="true">All individuals</option>
36 <option value="1">Individuals in a population</option>
37 </param>
38 <when value="0" />
39 <when value="1">
40 <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
41 </when>
42 </conditional>
43
44 <param name="minimum_coverage" type="integer" min="0" value="0" label="Minimum coverage" />
45
46 <param name="minimum_quality" type="integer" min="0" value="0" label="Minimum quality" help="Note: minimum coverage and minimum quality cannot both be 0" />
47
48 <param name="include_reference" type="select" format="integer" label="Include reference sequence">
49 <option value="1" selected="true">Yes</option>
50 <option value="0">No</option>
51 </param>
52
53 <param name="data_source" type="select" format="integer" label="Data source">
54 <option value="0" selected="true">sequence coverage</option>
55 <option value="1">estimated genotype</option>
56 </param>
57
58 <param name="branch_style" type="select" display="radio">
59 <label>Branch type</label>
60 <option value="" selected="true">square</option>
61 <option value="d">diagonal</option>
62 </param>
63
64 <param name="scale_style" type="select" display="radio">
65 <label>Draw branches to scale</label>
66 <option value="" selected="true">yes</option>
67 <option value="s">no</option>
68 </param>
69
70 <param name="length_style" type="select" display="radio">
71 <label>Show branch lengths</label>
72 <option value="" selected="true">yes</option>
73 <option value="b">no</option>
74 </param>
75
76 <param name="layout_style" type="select" display="radio">
77 <label>Tree layout</label>
78 <option value="" selected="true">horizontal</option>
79 <option value="v">vertical</option>
80 </param>
81 </inputs>
82
83 <outputs>
84 <data name="output" format="html" />
85 </outputs>
86
87 <tests>
88 <test>
89 <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
90 <param name="choice" value="0" />
91 <param name="minimum_coverage" value="3" />
92 <param name="minimum_quality" value="30" />
93 <param name="data_source" value="0" />
94 <param name="branch_style" value="" />
95 <param name="scale_style" value="" />
96 <param name="length_style" value="" />
97 <param name="layout_style" value="" />
98 <output name="output" file="test_out/phylogenetic_tree/phylogenetic_tree.html" ftype="html" compare="diff" lines_diff="2">
99 <extra_files type="file" name="distance_matrix.phylip" value="test_out/phylogenetic_tree/distance_matrix.phylip" />
100 <extra_files type="file" name="informative_snps.txt" value="test_out/phylogenetic_tree/informative_snps.txt" />
101 <extra_files type="file" name="mega_distance_matrix.txt" value="test_out/phylogenetic_tree/mega_distance_matrix.txt" />
102 <extra_files type="file" name="phylogenetic_tree.newick" value="test_out/phylogenetic_tree/phylogenetic_tree.newick" />
103 <extra_files type="file" name="tree.pdf" value="test_out/phylogenetic_tree/tree.pdf" compare="sim_size" delta = "1000"/>
104 </output>
105 </test>
106 </tests>
107
108 <help>
109
110 **Dataset formats**
111
112 The input dataset is in gd_snp_ format.
113 The output is a composite dataset, containing the tree in both text (Newick_)
114 and PostScript formats, as well as supplemental text information.
115 (`Dataset missing?`_)
116
117 .. _gd_snp: ./static/formatHelp.html#gd_snp
118 .. _Newick: http://evolution.genetics.washington.edu/phylip/newicktree.html
119 .. _Dataset missing?: ./static/formatHelp.html
120
121 -----
122
123 **What it does**
124
125 This tool uses a gd_snp dataset to determine a kind of "genetic distance"
126 between each pair of individuals. That information is used to
127 produce a tree-shaped figure that depicts how the individuals are related,
128 both as a text files and as a diagram.
129 The text files include a common tree format, Newick, as well as distance
130 matrices and counts of informative SNPs for each pairwise comparison.
131 The informative SNPs can be used as a guide to how reliable the tree is.
132
133 The input parameters are:
134
135 SNP dataset
136 A table of SNPs for various individuals, in gd_snp format.
137
138 Individuals
139 By default all individuals are included in the analysis, but this can
140 optionally be restricted to a subset that has been defined using the
141 Specify Individuals tool.
142
143 Minimum coverage
144 For each pair of individuals, the tool looks for informative SNPs, i.e.,
145 where the sequence data for both individuals is adequate according to
146 some criterion. Specifying, say, 7 for this option instructs the tool
147 to consider only SNPs with coverage at least 7 in both individuals
148 when estimating their "genetic distance".
149
150 Minimum quality
151 Specifying, say, 37 for this option instructs the tool to consider
152 only SNPs with SAMtools quality value at least 37 in both individuals
153 when estimating their "genetic distance".
154
155 Include reference sequence
156 For gd_snp datasets containing columns for a reference sequence, the
157 user can ask that the reference be indicated in the tree, to help with
158 rooting it. If the dataset has no reference columns, this option has
159 no effect.
160
161 Data source
162 The genetic distance between two individuals at a given SNP can
163 be estimated two ways. One method is to use the absolute value of the
164 difference in the frequency of the first allele (or equivalently, the
165 second allele). For instance, if the first individual has 5 reads of
166 each allele and the second individual has respectively 3 and 6 reads,
167 then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that
168 SNP. The other approach is to use the SAMtools genotypes to estimate
169 the difference in the number of occurrences of the first allele.
170 For instance, if the two genotypes are 2 and 1, i.e., the individuals
171 are estimated to have respectively 2 and 1 occurrences of the first
172 allele at this location, then the distance is 1 (the absolute value
173 of the difference of the two numbers).
174
175 Output options
176 The final four options apply mostly to the graphical drawing of the
177 tree, except that the branch lengths are also added to the Newick text
178 file.
179
180 -----
181
182 **Acknowledgments**
183
184 To convert the distance matrix to a Newick-formatted tree, we use the
185 QuickTree program from
186 http://www.sanger.ac.uk/resources/software/quicktree/ .
187
188 To make the diagram we use draw_tree, available at
189 http://compgen.bscb.cornell.edu/phast/ .
190
191 </help>
192 </tool>