annotate vsnp_build_tables.xml @ 2:85384a9bfba2 draft

Uploaded
author greg
date Thu, 30 Apr 2020 15:55:22 -0400
parents b60858c3eb91
children abfb861df879
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
1 <tool id="vsnp_build_tables" name="vSNP: build tables" version="1.0.0">
38a38babcb31 Uploaded
greg
parents:
diff changeset
2 <description></description>
38a38babcb31 Uploaded
greg
parents:
diff changeset
3 <requirements>
38a38babcb31 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="1.76">biopython</requirement>
38a38babcb31 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="0.25.3">pandas</requirement>
38a38babcb31 Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="1.2.8">xlsxwriter</requirement>
38a38babcb31 Uploaded
greg
parents:
diff changeset
7 </requirements>
38a38babcb31 Uploaded
greg
parents:
diff changeset
8 <command detect_errors="exit_code"><![CDATA[
38a38babcb31 Uploaded
greg
parents:
diff changeset
9 #import re
38a38babcb31 Uploaded
greg
parents:
diff changeset
10 #set output_excel_dir = 'output_excel_dir'
38a38babcb31 Uploaded
greg
parents:
diff changeset
11 #set input_type = $input_type_cond.input_type
38a38babcb31 Uploaded
greg
parents:
diff changeset
12 mkdir $output_excel_dir &&
38a38babcb31 Uploaded
greg
parents:
diff changeset
13 #if $input_type == "collection":
38a38babcb31 Uploaded
greg
parents:
diff changeset
14 #set input_newick_dir = 'input_newick_dir'
38a38babcb31 Uploaded
greg
parents:
diff changeset
15 mkdir $input_newick_dir &&
38a38babcb31 Uploaded
greg
parents:
diff changeset
16 #set input_json_avg_mq_dir = 'input_json_avg_mq_dir'
38a38babcb31 Uploaded
greg
parents:
diff changeset
17 mkdir $input_json_avg_mq_dir &&
38a38babcb31 Uploaded
greg
parents:
diff changeset
18 #set input_json_dir = 'input_json_dir'
38a38babcb31 Uploaded
greg
parents:
diff changeset
19 mkdir $input_json_dir &&
38a38babcb31 Uploaded
greg
parents:
diff changeset
20 #for $i in $input_type_cond.input_avg_mq_json_collection:
38a38babcb31 Uploaded
greg
parents:
diff changeset
21 #set file_name = $i.file_name
38a38babcb31 Uploaded
greg
parents:
diff changeset
22 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
1
b60858c3eb91 Uploaded
greg
parents: 0
diff changeset
23 ln -s '$file_name' '$input_json_avg_mq_dir/$identifier' &&
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
24 #end for
38a38babcb31 Uploaded
greg
parents:
diff changeset
25 #for $i in $input_type_cond.input_snps_json_collection:
38a38babcb31 Uploaded
greg
parents:
diff changeset
26 #set file_name = $i.file_name
38a38babcb31 Uploaded
greg
parents:
diff changeset
27 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
1
b60858c3eb91 Uploaded
greg
parents: 0
diff changeset
28 ln -s '$file_name' '$input_json_dir/$identifier' &&
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
29 #end for
38a38babcb31 Uploaded
greg
parents:
diff changeset
30 #for $i in $input_type_cond.input_newick_collection:
38a38babcb31 Uploaded
greg
parents:
diff changeset
31 #set file_name = $i.file_name
38a38babcb31 Uploaded
greg
parents:
diff changeset
32 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
1
b60858c3eb91 Uploaded
greg
parents: 0
diff changeset
33 ln -s '$file_name' '$input_newick_dir/$identifier' &&
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
34 #end for
38a38babcb31 Uploaded
greg
parents:
diff changeset
35 #end if
38a38babcb31 Uploaded
greg
parents:
diff changeset
36 python '$__tool_directory__/vsnp_build_tables.py'
1
b60858c3eb91 Uploaded
greg
parents: 0
diff changeset
37 --processes \${GALAXY_SLOTS:-4}
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
38 #if $input_type == "single":
38a38babcb31 Uploaded
greg
parents:
diff changeset
39 --input_avg_mq_json '$input_avg_mq_json'
38a38babcb31 Uploaded
greg
parents:
diff changeset
40 --input_snps_json '$input_snps_json'
38a38babcb31 Uploaded
greg
parents:
diff changeset
41 --input_newick '$input_newick'
38a38babcb31 Uploaded
greg
parents:
diff changeset
42 #end if:
38a38babcb31 Uploaded
greg
parents:
diff changeset
43 #if str($gbk_cond.gbk_param) == "yes":
38a38babcb31 Uploaded
greg
parents:
diff changeset
44 #set gbk_source_cond = $gbk_cond.gbk_source_cond
38a38babcb31 Uploaded
greg
parents:
diff changeset
45 #set gbk_source = $gbk_source_cond.gbk_source
38a38babcb31 Uploaded
greg
parents:
diff changeset
46 #if str($gbk_source) == "cached":
38a38babcb31 Uploaded
greg
parents:
diff changeset
47 --gbk_file '$gbk_source_cond.gbk_file.fields.path'
38a38babcb31 Uploaded
greg
parents:
diff changeset
48 #else:
38a38babcb31 Uploaded
greg
parents:
diff changeset
49 --gbk_file '$gbk_source_cond.gbk_file'
38a38babcb31 Uploaded
greg
parents:
diff changeset
50 #end if
38a38babcb31 Uploaded
greg
parents:
diff changeset
51 #end if
38a38babcb31 Uploaded
greg
parents:
diff changeset
52 ]]></command>
38a38babcb31 Uploaded
greg
parents:
diff changeset
53 <inputs>
38a38babcb31 Uploaded
greg
parents:
diff changeset
54 <conditional name="input_type_cond">
38a38babcb31 Uploaded
greg
parents:
diff changeset
55 <param name="input_type" type="select" label="Choose the category for the files to be analyzed">
38a38babcb31 Uploaded
greg
parents:
diff changeset
56 <option value="single" selected="true">Single files</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
57 <option value="collection">Collection of files</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
58 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
59 <when value="single">
38a38babcb31 Uploaded
greg
parents:
diff changeset
60 <param name="input_snps_json" type="data" format="json" label="SNPs json file">
38a38babcb31 Uploaded
greg
parents:
diff changeset
61 <validator type="unspecified_build"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
62 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
63 <param name="input_avg_mq_json" type="data" format="json" label="Average MQ json file">
38a38babcb31 Uploaded
greg
parents:
diff changeset
64 <validator type="unspecified_build"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
65 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
66 <param name="input_newick" type="data" format="newick" label="Best-scoring ML tree file">
38a38babcb31 Uploaded
greg
parents:
diff changeset
67 <validator type="unspecified_build"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
68 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
69 </when>
38a38babcb31 Uploaded
greg
parents:
diff changeset
70 <when value="collection">
38a38babcb31 Uploaded
greg
parents:
diff changeset
71 <param name="input_snps_json_collection" format="json" type="data_collection" collection_type="list" label="Collection of SNPs json files">
38a38babcb31 Uploaded
greg
parents:
diff changeset
72 <validator type="unspecified_build"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
73 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
74 <param name="input_avg_mq_json_collection" format="json" type="data_collection" collection_type="list" label="Collection of average MQ json files">
38a38babcb31 Uploaded
greg
parents:
diff changeset
75 <validator type="unspecified_build"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
76 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
77 <param name="input_newick_collection" format="newick" type="data_collection" collection_type="list" label="Collection of best-scoring ML tree files">
38a38babcb31 Uploaded
greg
parents:
diff changeset
78 <validator type="unspecified_build"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
79 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
80 </when>
38a38babcb31 Uploaded
greg
parents:
diff changeset
81 </conditional>
38a38babcb31 Uploaded
greg
parents:
diff changeset
82 <conditional name="gbk_cond">
38a38babcb31 Uploaded
greg
parents:
diff changeset
83 <param name="gbk_param" type="select" label="Use Genbank file?">
38a38babcb31 Uploaded
greg
parents:
diff changeset
84 <option value="yes" selected="true">yes</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
85 <option value="no">No</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
86 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
87 <when value="yes">
38a38babcb31 Uploaded
greg
parents:
diff changeset
88 <conditional name="gbk_source_cond">
38a38babcb31 Uploaded
greg
parents:
diff changeset
89 <param name="gbk_source" type="select" label="Choose the source for the Genbank file">
38a38babcb31 Uploaded
greg
parents:
diff changeset
90 <option value="cached" selected="true">locally cached</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
91 <option value="history">from history</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
92 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
93 <when value="cached">
38a38babcb31 Uploaded
greg
parents:
diff changeset
94 <param name="gbk_file" type="select" label="Genbank file">
38a38babcb31 Uploaded
greg
parents:
diff changeset
95 <options from_data_table="vsnp_genbank">
38a38babcb31 Uploaded
greg
parents:
diff changeset
96 <!-- No filter here! -->
38a38babcb31 Uploaded
greg
parents:
diff changeset
97 </options>
38a38babcb31 Uploaded
greg
parents:
diff changeset
98 <validator type="no_options" message="A cached Genbank file is not available for the build associated with the selected average MQ json file"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
99 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
100 </when>
38a38babcb31 Uploaded
greg
parents:
diff changeset
101 <when value="history">
38a38babcb31 Uploaded
greg
parents:
diff changeset
102 <param name="gbk_file" type="data" format="genbank" label="Genbank file">
38a38babcb31 Uploaded
greg
parents:
diff changeset
103 <validator type="no_options" message="The current history does not include a genbank dataset"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
104 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
105 </when>
38a38babcb31 Uploaded
greg
parents:
diff changeset
106 </conditional>
38a38babcb31 Uploaded
greg
parents:
diff changeset
107 </when>
38a38babcb31 Uploaded
greg
parents:
diff changeset
108 <when value="no"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
109 </conditional>
38a38babcb31 Uploaded
greg
parents:
diff changeset
110 </inputs>
38a38babcb31 Uploaded
greg
parents:
diff changeset
111 <outputs>
38a38babcb31 Uploaded
greg
parents:
diff changeset
112 <collection name="excel" type="list">
38a38babcb31 Uploaded
greg
parents:
diff changeset
113 <discover_datasets pattern="__name__" directory="output_excel_dir" format="xlsx" />
38a38babcb31 Uploaded
greg
parents:
diff changeset
114 </collection>
38a38babcb31 Uploaded
greg
parents:
diff changeset
115 </outputs>
38a38babcb31 Uploaded
greg
parents:
diff changeset
116 <tests>
38a38babcb31 Uploaded
greg
parents:
diff changeset
117 <test>
38a38babcb31 Uploaded
greg
parents:
diff changeset
118 <param name="input_snps_json" value="input_snps_json.json" ftype="json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
119 <param name="input_newick" value="input_newick.newick" ftype="newick" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
120 <param name="input_avg_mq_json" value="input_avg_mq_json.json" ftype="json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
121 <param name="gbk_param" value="no"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
122 <output_collection name="excel" type="list">
38a38babcb31 Uploaded
greg
parents:
diff changeset
123 <element name="cascade_table.xlsx" file="cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
124 <element name="sort_table.xlsx" file="sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
125 </output_collection>
38a38babcb31 Uploaded
greg
parents:
diff changeset
126 </test>
38a38babcb31 Uploaded
greg
parents:
diff changeset
127 <test>
38a38babcb31 Uploaded
greg
parents:
diff changeset
128 <param name="input_type" value="collection"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
129 <param name="input_snps_json_collection">
38a38babcb31 Uploaded
greg
parents:
diff changeset
130 <collection type="list">
38a38babcb31 Uploaded
greg
parents:
diff changeset
131 <element name="Mbovis-01_snps.json" value="Mbovis-01_snps.json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
132 <element name="Mbovis-01D_snps.json" value="Mbovis-01D_snps.json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
133 <element name="Mbovis-01D6_snps.json" value="Mbovis-01D6_snps.json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
134 </collection>
38a38babcb31 Uploaded
greg
parents:
diff changeset
135 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
136 <param name="input_newick_collection">
38a38babcb31 Uploaded
greg
parents:
diff changeset
137 <collection type="list">
1
b60858c3eb91 Uploaded
greg
parents: 0
diff changeset
138 <element name="Mbovis-01_snps.newick" value="Mbovis-01_snps.newick" dbkey="89"/>
b60858c3eb91 Uploaded
greg
parents: 0
diff changeset
139 <element name="Mbovis-01D_snps.newick" value="Mbovis-01D_snps.newick" dbkey="89"/>
b60858c3eb91 Uploaded
greg
parents: 0
diff changeset
140 <element name="Mbovis-01D6_snps.newick" value="Mbovis-01D6_snps.newick" dbkey="89"/>
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
141 </collection>
38a38babcb31 Uploaded
greg
parents:
diff changeset
142 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
143 <param name="input_avg_mq_json_collection">
38a38babcb31 Uploaded
greg
parents:
diff changeset
144 <collection type="list">
38a38babcb31 Uploaded
greg
parents:
diff changeset
145 <element name="Mbovis-01_snps.json" value="Mbovis-01_avg_mq.json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
146 <element name="Mbovis-01D_snps.json" value="Mbovis-01D_avg_mq.json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
147 <element name="Mbovis-01D6_snps.json" value="Mbovis-01D6_avg_mq.json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
148 </collection>
38a38babcb31 Uploaded
greg
parents:
diff changeset
149 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
150 <param name="gbk_param" value="no"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
151 <output_collection name="excel" type="list">
38a38babcb31 Uploaded
greg
parents:
diff changeset
152 <element name="Mbovis-01D6_snps_cascade_table.xlsx" file="Mbovis-01D6_cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
153 <element name="Mbovis-01D6_snps_sort_table.xlsx" file="Mbovis-01D6_sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
154 <element name="Mbovis-01D_snps_cascade_table.xlsx" file="Mbovis-01D_cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
155 <element name="Mbovis-01D_snps_sort_table.xlsx" file="Mbovis-01D_sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
156 <element name="Mbovis-01_snps_cascade_table.xlsx" file="Mbovis-01_cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
157 <element name="Mbovis-01_snps_sort_table.xlsx" file="Mbovis-01_sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
158 </output_collection>
38a38babcb31 Uploaded
greg
parents:
diff changeset
159 </test>
38a38babcb31 Uploaded
greg
parents:
diff changeset
160 </tests>
38a38babcb31 Uploaded
greg
parents:
diff changeset
161 <help>
38a38babcb31 Uploaded
greg
parents:
diff changeset
162 **What it does**
38a38babcb31 Uploaded
greg
parents:
diff changeset
163
38a38babcb31 Uploaded
greg
parents:
diff changeset
164 Accepts a combination of single SNPs json, average MQ json and newick files (or associated collections of
38a38babcb31 Uploaded
greg
parents:
diff changeset
165 each) to produce annotated SNPs tables in the form of Excel spreadsheets. The SNPs json and average MQ json
38a38babcb31 Uploaded
greg
parents:
diff changeset
166 files are typically produced by the **vSNP: get SNPs** tool and the newick files are typically produced by
38a38babcb31 Uploaded
greg
parents:
diff changeset
167 the **Phyogenetic reconstruction with RaXML** tool.
38a38babcb31 Uploaded
greg
parents:
diff changeset
168
38a38babcb31 Uploaded
greg
parents:
diff changeset
169 The SNPs tables display closely related isolates and enables identification of mixed SNPs when multiple
38a38babcb31 Uploaded
greg
parents:
diff changeset
170 bacterial strains are infecting an organism. The table structure is shown below. The columns identify the
38a38babcb31 Uploaded
greg
parents:
diff changeset
171 genome location of the SNP calls and the isolates are contained within the rows. The reference (or ancestral
38a38babcb31 Uploaded
greg
parents:
diff changeset
172 strain if the reference is an outgroup) is listed across the top, identified as the "reference call". SNPs
38a38babcb31 Uploaded
greg
parents:
diff changeset
173 that are not highlighted will match the reference. The map-quality row values are the average of the map
38a38babcb31 Uploaded
greg
parents:
diff changeset
174 quality scores of each isolate in that position. These scores measure the confidence that the read has been
38a38babcb31 Uploaded
greg
parents:
diff changeset
175 mapped to the correct location on the genome. The maximum score possible is 60, and lower scores lessen the
38a38babcb31 Uploaded
greg
parents:
diff changeset
176 confidence that the SNP was correctly identified. The annotation of the position is provided at the bottom
38a38babcb31 Uploaded
greg
parents:
diff changeset
177 of the table.
38a38babcb31 Uploaded
greg
parents:
diff changeset
178
38a38babcb31 Uploaded
greg
parents:
diff changeset
179 .. image:: table_description.png
38a38babcb31 Uploaded
greg
parents:
diff changeset
180
38a38babcb31 Uploaded
greg
parents:
diff changeset
181 SNPs are sorted according to their evolutionary age within the table. The oldest SNPs (encompassing the most
38a38babcb31 Uploaded
greg
parents:
diff changeset
182 isolates) are furthest to the left. This sorting is somewhat crude - the intent is to improve readibility or
38a38babcb31 Uploaded
greg
parents:
diff changeset
183 more easily match a related tree.
38a38babcb31 Uploaded
greg
parents:
diff changeset
184
38a38babcb31 Uploaded
greg
parents:
diff changeset
185 For a more detailed discussion, see the **Validating and correcting SNP calls** section of
2
85384a9bfba2 Uploaded
greg
parents: 1
diff changeset
186 the `vSNP documentation`_.
85384a9bfba2 Uploaded
greg
parents: 1
diff changeset
187
85384a9bfba2 Uploaded
greg
parents: 1
diff changeset
188 .. _vSNP documentation: https://github.com/USDA-VS/vSNP/blob/master/docs/detailed_usage.md
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
189
38a38babcb31 Uploaded
greg
parents:
diff changeset
190 **Required Options**
38a38babcb31 Uploaded
greg
parents:
diff changeset
191
38a38babcb31 Uploaded
greg
parents:
diff changeset
192 * **Choose the category for the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single SNPs json, average MQ json and newick files, or collections of each) based on the selected option.
38a38babcb31 Uploaded
greg
parents:
diff changeset
193 * **Use Genbank file** - Select "yes" to annotate the tables using the information in the Genbank file. Locally cached files, if available, provide the most widely used annotations, but more custom Genbank files can be chosen from the current history.
38a38babcb31 Uploaded
greg
parents:
diff changeset
194 </help>
38a38babcb31 Uploaded
greg
parents:
diff changeset
195 <citations>
38a38babcb31 Uploaded
greg
parents:
diff changeset
196 <citation type="bibtex">
38a38babcb31 Uploaded
greg
parents:
diff changeset
197 @misc{None,
38a38babcb31 Uploaded
greg
parents:
diff changeset
198 journal = {None},
38a38babcb31 Uploaded
greg
parents:
diff changeset
199 author = {1. Stuber T},
38a38babcb31 Uploaded
greg
parents:
diff changeset
200 title = {Manuscript in preparation},
38a38babcb31 Uploaded
greg
parents:
diff changeset
201 year = {None},
38a38babcb31 Uploaded
greg
parents:
diff changeset
202 url = {https://github.com/USDA-VS/vSNP},}
38a38babcb31 Uploaded
greg
parents:
diff changeset
203 </citation>
38a38babcb31 Uploaded
greg
parents:
diff changeset
204 </citations>
38a38babcb31 Uploaded
greg
parents:
diff changeset
205 </tool>
38a38babcb31 Uploaded
greg
parents:
diff changeset
206