comparison vsnp_build_tables.xml @ 0:5e258fba246c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c67950184792952302f0e89354c311d4e5ed774c"
author iuc
date Fri, 08 May 2020 12:59:27 -0400
parents
children 0bc0009f9ea0
comparison
equal deleted inserted replaced
-1:000000000000 0:5e258fba246c
1 <tool id="vsnp_build_tables" name="vSNP: build tables" version="1.0.0">
2 <description></description>
3 <requirements>
4 <requirement type="package" version="1.76">biopython</requirement>
5 <requirement type="package" version="0.25.3">pandas</requirement>
6 <requirement type="package" version="1.2.8">xlsxwriter</requirement>
7 </requirements>
8 <command detect_errors="exit_code"><![CDATA[
9 #import re
10 #set output_excel_dir = 'output_excel_dir'
11 #set input_type = $input_type_cond.input_type
12 mkdir $output_excel_dir &&
13 #if $input_type == "collection":
14 #set input_newick_dir = 'input_newick_dir'
15 mkdir $input_newick_dir &&
16 #set input_json_avg_mq_dir = 'input_json_avg_mq_dir'
17 mkdir $input_json_avg_mq_dir &&
18 #set input_json_dir = 'input_json_dir'
19 mkdir $input_json_dir &&
20 #for $i in $input_type_cond.input_avg_mq_json_collection:
21 #set file_name = $i.file_name
22 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
23 ln -s '$file_name' '$input_json_avg_mq_dir/$identifier' &&
24 #end for
25 #for $i in $input_type_cond.input_snps_json_collection:
26 #set file_name = $i.file_name
27 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
28 ln -s '$file_name' '$input_json_dir/$identifier' &&
29 #end for
30 #for $i in $input_type_cond.input_newick_collection:
31 #set file_name = $i.file_name
32 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
33 ln -s '$file_name' '$input_newick_dir/$identifier' &&
34 #end for
35 #end if
36 python '$__tool_directory__/vsnp_build_tables.py'
37 --processes \${GALAXY_SLOTS:-4}
38 #if $input_type == "single":
39 --input_avg_mq_json '$input_avg_mq_json'
40 --input_snps_json '$input_snps_json'
41 --input_newick '$input_newick'
42 #end if:
43 #if str($gbk_cond.gbk_param) == "yes":
44 #set gbk_source_cond = $gbk_cond.gbk_source_cond
45 #set gbk_source = $gbk_source_cond.gbk_source
46 #if str($gbk_source) == "cached":
47 --gbk_file '$gbk_source_cond.gbk_file.fields.path'
48 #else:
49 --gbk_file '$gbk_source_cond.gbk_file'
50 #end if
51 #end if
52 ]]></command>
53 <inputs>
54 <conditional name="input_type_cond">
55 <param name="input_type" type="select" label="Choose the category for the files to be analyzed">
56 <option value="single" selected="true">Single files</option>
57 <option value="collection">Collection of files</option>
58 </param>
59 <when value="single">
60 <param name="input_snps_json" type="data" format="json" label="SNPs json file">
61 <validator type="unspecified_build"/>
62 </param>
63 <param name="input_avg_mq_json" type="data" format="json" label="Average MQ json file">
64 <validator type="unspecified_build"/>
65 </param>
66 <param name="input_newick" type="data" format="newick" label="Best-scoring ML tree file">
67 <validator type="unspecified_build"/>
68 </param>
69 </when>
70 <when value="collection">
71 <param name="input_snps_json_collection" format="json" type="data_collection" collection_type="list" label="Collection of SNPs json files">
72 <validator type="unspecified_build"/>
73 </param>
74 <param name="input_avg_mq_json_collection" format="json" type="data_collection" collection_type="list" label="Collection of average MQ json files">
75 <validator type="unspecified_build"/>
76 </param>
77 <param name="input_newick_collection" format="newick" type="data_collection" collection_type="list" label="Collection of best-scoring ML tree files">
78 <validator type="unspecified_build"/>
79 </param>
80 </when>
81 </conditional>
82 <conditional name="gbk_cond">
83 <param name="gbk_param" type="select" label="Use Genbank file?">
84 <option value="yes" selected="true">yes</option>
85 <option value="no">No</option>
86 </param>
87 <when value="yes">
88 <conditional name="gbk_source_cond">
89 <param name="gbk_source" type="select" label="Choose the source for the Genbank file">
90 <option value="cached" selected="true">locally cached</option>
91 <option value="history">from history</option>
92 </param>
93 <when value="cached">
94 <param name="gbk_file" type="select" label="Genbank file">
95 <options from_data_table="vsnp_genbank">
96 <!-- No filter here! -->
97 </options>
98 <validator type="no_options" message="A cached Genbank file is not available for the build associated with the selected average MQ json file"/>
99 </param>
100 </when>
101 <when value="history">
102 <param name="gbk_file" type="data" format="genbank" label="Genbank file">
103 <validator type="no_options" message="The current history does not include a genbank dataset"/>
104 </param>
105 </when>
106 </conditional>
107 </when>
108 <when value="no"/>
109 </conditional>
110 </inputs>
111 <outputs>
112 <collection name="excel" type="list">
113 <discover_datasets pattern="__name__" directory="output_excel_dir" format="xlsx" />
114 </collection>
115 </outputs>
116 <tests>
117 <test>
118 <param name="input_snps_json" value="input_snps_json.json" ftype="json" dbkey="89"/>
119 <param name="input_newick" value="input_newick.newick" ftype="newick" dbkey="89"/>
120 <param name="input_avg_mq_json" value="input_avg_mq_json.json" ftype="json" dbkey="89"/>
121 <param name="gbk_param" value="no"/>
122 <output_collection name="excel" type="list">
123 <element name="cascade_table.xlsx" file="cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
124 <element name="sort_table.xlsx" file="sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
125 </output_collection>
126 </test>
127 <test>
128 <param name="input_type" value="collection"/>
129 <param name="input_snps_json_collection">
130 <collection type="list">
131 <element name="Mbovis-01_snps.json" value="Mbovis-01_snps.json" dbkey="89"/>
132 <element name="Mbovis-01D_snps.json" value="Mbovis-01D_snps.json" dbkey="89"/>
133 <element name="Mbovis-01D6_snps.json" value="Mbovis-01D6_snps.json" dbkey="89"/>
134 </collection>
135 </param>
136 <param name="input_newick_collection">
137 <collection type="list">
138 <element name="Mbovis-01_snps.newick" value="Mbovis-01_snps.newick" dbkey="89"/>
139 <element name="Mbovis-01D_snps.newick" value="Mbovis-01D_snps.newick" dbkey="89"/>
140 <element name="Mbovis-01D6_snps.newick" value="Mbovis-01D6_snps.newick" dbkey="89"/>
141 </collection>
142 </param>
143 <param name="input_avg_mq_json_collection">
144 <collection type="list">
145 <element name="Mbovis-01_snps.json" value="Mbovis-01_avg_mq.json" dbkey="89"/>
146 <element name="Mbovis-01D_snps.json" value="Mbovis-01D_avg_mq.json" dbkey="89"/>
147 <element name="Mbovis-01D6_snps.json" value="Mbovis-01D6_avg_mq.json" dbkey="89"/>
148 </collection>
149 </param>
150 <param name="gbk_param" value="no"/>
151 <output_collection name="excel" type="list">
152 <element name="Mbovis-01D6_snps_cascade_table.xlsx" file="Mbovis-01D6_cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
153 <element name="Mbovis-01D6_snps_sort_table.xlsx" file="Mbovis-01D6_sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
154 <element name="Mbovis-01D_snps_cascade_table.xlsx" file="Mbovis-01D_cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
155 <element name="Mbovis-01D_snps_sort_table.xlsx" file="Mbovis-01D_sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
156 <element name="Mbovis-01_snps_cascade_table.xlsx" file="Mbovis-01_cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
157 <element name="Mbovis-01_snps_sort_table.xlsx" file="Mbovis-01_sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
158 </output_collection>
159 </test>
160 </tests>
161 <help>
162 **What it does**
163
164 Accepts a combination of single SNPs json, average MQ json and newick files (or associated collections of
165 each) to produce annotated SNPs tables in the form of Excel spreadsheets. The SNPs json and average MQ json
166 files are typically produced by the **vSNP: get SNPs** tool and the newick files are typically produced by
167 the **Phyogenetic reconstruction with RaXML** tool.
168
169 The SNPs tables display closely related isolates and enables identification of mixed SNPs when multiple
170 bacterial strains are infecting an organism. The table structure is shown below. The columns identify the
171 genome location of the SNP calls and the isolates are contained within the rows. The reference (or ancestral
172 strain if the reference is an outgroup) is listed across the top, identified as the "reference call". SNPs
173 that are not highlighted will match the reference. The map-quality row values are the average of the map
174 quality scores of each isolate in that position. These scores measure the confidence that the read has been
175 mapped to the correct location on the genome. The maximum score possible is 60, and lower scores lessen the
176 confidence that the SNP was correctly identified. The annotation of the position is provided at the bottom
177 of the table.
178
179 .. image:: table_description.png
180
181 SNPs are sorted according to their evolutionary age within the table. The oldest SNPs (encompassing the most
182 isolates) are furthest to the left. This sorting is somewhat crude - the intent is to improve readibility or
183 more easily match a related tree.
184
185 For a more detailed discussion, see the **Validating and correcting SNP calls** section of
186 the `vSNP documentation`_.
187
188 .. _vSNP documentation: https://github.com/USDA-VS/vSNP/blob/master/docs/detailed_usage.md
189
190 **Required Options**
191
192 * **Choose the category for the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single SNPs json, average MQ json and newick files, or collections of each) based on the selected option.
193 * **Use Genbank file** - Select "yes" to annotate the tables using the information in the Genbank file. Locally cached files, if available, provide the most widely used annotations, but more custom Genbank files can be chosen from the current history.
194 </help>
195 <citations>
196 <citation type="bibtex">
197 @misc{None,
198 journal = {None},
199 author = {1. Stuber T},
200 title = {Manuscript in preparation},
201 year = {None},
202 url = {https://github.com/USDA-VS/vSNP},}
203 </citation>
204 </citations>
205 </tool>
206