annotate vsnp_build_tables.xml @ 5:2ec51816eaac draft

Uploaded
author greg
date Wed, 16 Jun 2021 17:43:30 +0000
parents b67a6326a96b
children 8399428b1e57
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
1 <tool id="vsnp_build_tables" name="vSNP: build tables" version="@WRAPPER_VERSION@.2" profile="@PROFILE@">
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
2 <description></description>
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
3 <macros>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
4 <import>macros.xml</import>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
5 </macros>
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
6 <requirements>
38a38babcb31 Uploaded
greg
parents:
diff changeset
7 <requirement type="package" version="1.76">biopython</requirement>
38a38babcb31 Uploaded
greg
parents:
diff changeset
8 <requirement type="package" version="0.25.3">pandas</requirement>
38a38babcb31 Uploaded
greg
parents:
diff changeset
9 <requirement type="package" version="1.2.8">xlsxwriter</requirement>
38a38babcb31 Uploaded
greg
parents:
diff changeset
10 </requirements>
38a38babcb31 Uploaded
greg
parents:
diff changeset
11 <command detect_errors="exit_code"><![CDATA[
38a38babcb31 Uploaded
greg
parents:
diff changeset
12 #import re
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
13
abfb861df879 Uploaded
greg
parents: 2
diff changeset
14 mkdir 'output_excel_dir' &&
abfb861df879 Uploaded
greg
parents: 2
diff changeset
15
abfb861df879 Uploaded
greg
parents: 2
diff changeset
16 ## The input_snps_json and input_avg_mq_json identifiers
abfb861df879 Uploaded
greg
parents: 2
diff changeset
17 ## are typically the same string, so we append a uniquq
abfb861df879 Uploaded
greg
parents: 2
diff changeset
18 ## extension to enable the links.
abfb861df879 Uploaded
greg
parents: 2
diff changeset
19 #set input_snps_json_identifier = re.sub('[^\s\w\-]', '_', str($input_snps_json.element_identifier)) + '.snps'
abfb861df879 Uploaded
greg
parents: 2
diff changeset
20 ln -s '${input_snps_json}' '${input_snps_json_identifier}' &&
abfb861df879 Uploaded
greg
parents: 2
diff changeset
21 #set input_avg_mq_json_identifier = re.sub('[^\s\w\-]', '_', str($input_avg_mq_json.element_identifier)) + '.avg_mq'
abfb861df879 Uploaded
greg
parents: 2
diff changeset
22 ln -s '${input_avg_mq_json}' '${input_avg_mq_json_identifier}' &&
abfb861df879 Uploaded
greg
parents: 2
diff changeset
23 #set input_newick_identifier = re.sub('[^\s\w\-]', '_', str($input_newick.element_identifier))
abfb861df879 Uploaded
greg
parents: 2
diff changeset
24 ln -s '${input_newick}' '${input_newick_identifier}' &&
abfb861df879 Uploaded
greg
parents: 2
diff changeset
25
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
26 python '$__tool_directory__/vsnp_build_tables.py'
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
27 --input_snps_json '${input_snps_json_identifier}'
abfb861df879 Uploaded
greg
parents: 2
diff changeset
28 --input_avg_mq_json '${input_avg_mq_json_identifier}'
abfb861df879 Uploaded
greg
parents: 2
diff changeset
29 --input_newick '${input_newick_identifier}'
abfb861df879 Uploaded
greg
parents: 2
diff changeset
30 #if str($gbk_cond.gbk_param) == 'yes':
abfb861df879 Uploaded
greg
parents: 2
diff changeset
31 #if str($gbk_cond.gbk_source_cond.gbk_source) == 'cached':
abfb861df879 Uploaded
greg
parents: 2
diff changeset
32 --gbk_file '$gbk_cond.gbk_source_cond.gbk_file.fields.path'
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
33 #else:
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
34 --gbk_file '$gbk_cond.gbk_source_cond.gbk_file'
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
35 #end if
38a38babcb31 Uploaded
greg
parents:
diff changeset
36 #end if
38a38babcb31 Uploaded
greg
parents:
diff changeset
37 ]]></command>
38a38babcb31 Uploaded
greg
parents:
diff changeset
38 <inputs>
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
39 <param name="input_snps_json" type="data" format="json" label="SNPs json file"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
40 <param name="input_avg_mq_json" type="data" format="json" label="Average MQ json file"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
41 <param name="input_newick" type="data" format="newick" label="Best-scoring ML tree file"/>
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
42 <conditional name="gbk_cond">
38a38babcb31 Uploaded
greg
parents:
diff changeset
43 <param name="gbk_param" type="select" label="Use Genbank file?">
38a38babcb31 Uploaded
greg
parents:
diff changeset
44 <option value="yes" selected="true">yes</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
45 <option value="no">No</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
46 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
47 <when value="yes">
38a38babcb31 Uploaded
greg
parents:
diff changeset
48 <conditional name="gbk_source_cond">
38a38babcb31 Uploaded
greg
parents:
diff changeset
49 <param name="gbk_source" type="select" label="Choose the source for the Genbank file">
38a38babcb31 Uploaded
greg
parents:
diff changeset
50 <option value="cached" selected="true">locally cached</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
51 <option value="history">from history</option>
38a38babcb31 Uploaded
greg
parents:
diff changeset
52 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
53 <when value="cached">
38a38babcb31 Uploaded
greg
parents:
diff changeset
54 <param name="gbk_file" type="select" label="Genbank file">
38a38babcb31 Uploaded
greg
parents:
diff changeset
55 <options from_data_table="vsnp_genbank">
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
56 <filter type="data_meta" column="0" key="dbkey" ref="input_avg_mq_json"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
57 <validator type="no_options" message="A cached Genbank file is not available for the build associated with the selected average MQ json file"/>
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
58 </options>
38a38babcb31 Uploaded
greg
parents:
diff changeset
59 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
60 </when>
38a38babcb31 Uploaded
greg
parents:
diff changeset
61 <when value="history">
38a38babcb31 Uploaded
greg
parents:
diff changeset
62 <param name="gbk_file" type="data" format="genbank" label="Genbank file">
38a38babcb31 Uploaded
greg
parents:
diff changeset
63 <validator type="no_options" message="The current history does not include a genbank dataset"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
64 </param>
38a38babcb31 Uploaded
greg
parents:
diff changeset
65 </when>
38a38babcb31 Uploaded
greg
parents:
diff changeset
66 </conditional>
38a38babcb31 Uploaded
greg
parents:
diff changeset
67 </when>
38a38babcb31 Uploaded
greg
parents:
diff changeset
68 <when value="no"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
69 </conditional>
38a38babcb31 Uploaded
greg
parents:
diff changeset
70 </inputs>
38a38babcb31 Uploaded
greg
parents:
diff changeset
71 <outputs>
4
b67a6326a96b "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 23762af1ec5da6c6f7407f6194d7a2bf9d242618"
greg
parents: 3
diff changeset
72 <collection name="excel" type="list" format="xlsx" label="${tool.name} on ${on_string}">
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
73 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;xlsx)" directory="output_excel_dir"/>
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
74 </collection>
38a38babcb31 Uploaded
greg
parents:
diff changeset
75 </outputs>
38a38babcb31 Uploaded
greg
parents:
diff changeset
76 <tests>
38a38babcb31 Uploaded
greg
parents:
diff changeset
77 <test>
38a38babcb31 Uploaded
greg
parents:
diff changeset
78 <param name="input_snps_json" value="input_snps_json.json" ftype="json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
79 <param name="input_newick" value="input_newick.newick" ftype="newick" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
80 <param name="input_avg_mq_json" value="input_avg_mq_json.json" ftype="json" dbkey="89"/>
38a38babcb31 Uploaded
greg
parents:
diff changeset
81 <param name="gbk_param" value="no"/>
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
82 <output_collection name="excel" type="list" count="2">
abfb861df879 Uploaded
greg
parents: 2
diff changeset
83 <element name="input_newick_newick_cascade_table" file="cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
84 <element name="input_newick_newick_sort_table" file="sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
85 </output_collection>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
86 </test>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
87 <test>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
88 <param name="input_snps_json" value="Mbovis-01_snps.json" ftype="json" dbkey="89"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
89 <param name="input_newick" value="Mbovis-01_snps.newick" ftype="newick" dbkey="89"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
90 <param name="input_avg_mq_json" value="Mbovis-01_avg_mq.json" ftype="json" dbkey="89"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
91 <param name="gbk_param" value="no"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
92 <output_collection name="excel" type="list" count="2">
4
b67a6326a96b "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 23762af1ec5da6c6f7407f6194d7a2bf9d242618"
greg
parents: 3
diff changeset
93 <element name="Mbovis-01_snps_newick_cascade_table" file="Mbovis-01_cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
b67a6326a96b "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 23762af1ec5da6c6f7407f6194d7a2bf9d242618"
greg
parents: 3
diff changeset
94 <element name="Mbovis-01_snps_newick_sort_table" file="Mbovis-01_sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
95 </output_collection>
38a38babcb31 Uploaded
greg
parents:
diff changeset
96 </test>
38a38babcb31 Uploaded
greg
parents:
diff changeset
97 <test>
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
98 <param name="input_snps_json" value="Mbovis-01D_snps.json" ftype="json" dbkey="89"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
99 <param name="input_newick" value="Mbovis-01D_snps.newick" ftype="newick" dbkey="89"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
100 <param name="input_avg_mq_json" value="Mbovis-01D_avg_mq.json" ftype="json" dbkey="89"/>
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
101 <param name="gbk_param" value="no"/>
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
102 <output_collection name="excel" type="list" count="2">
4
b67a6326a96b "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 23762af1ec5da6c6f7407f6194d7a2bf9d242618"
greg
parents: 3
diff changeset
103 <element name="Mbovis-01D_snps_newick_cascade_table" file="Mbovis-01D_cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
b67a6326a96b "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 23762af1ec5da6c6f7407f6194d7a2bf9d242618"
greg
parents: 3
diff changeset
104 <element name="Mbovis-01D_snps_newick_sort_table" file="Mbovis-01D_sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
105 </output_collection>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
106 </test>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
107 <test>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
108 <param name="input_snps_json" value="Mbovis-01D6_snps.json" ftype="json" dbkey="89"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
109 <param name="input_newick" value="Mbovis-01D6_snps.newick" ftype="newick" dbkey="89"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
110 <param name="input_avg_mq_json" value="Mbovis-01D6_avg_mq.json" ftype="json" dbkey="89"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
111 <param name="gbk_param" value="no"/>
abfb861df879 Uploaded
greg
parents: 2
diff changeset
112 <output_collection name="excel" type="list" count="2">
4
b67a6326a96b "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 23762af1ec5da6c6f7407f6194d7a2bf9d242618"
greg
parents: 3
diff changeset
113 <element name="Mbovis-01D6_snps_newick_cascade_table" file="Mbovis-01D6_cascade_table.xlsx" ftype="xlsx" compare="sim_size"/>
b67a6326a96b "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 23762af1ec5da6c6f7407f6194d7a2bf9d242618"
greg
parents: 3
diff changeset
114 <element name="Mbovis-01D6_snps_newick_sort_table" file="Mbovis-01D6_sort_table.xlsx" ftype="xlsx" compare="sim_size"/>
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
115 </output_collection>
38a38babcb31 Uploaded
greg
parents:
diff changeset
116 </test>
38a38babcb31 Uploaded
greg
parents:
diff changeset
117 </tests>
38a38babcb31 Uploaded
greg
parents:
diff changeset
118 <help>
38a38babcb31 Uploaded
greg
parents:
diff changeset
119 **What it does**
38a38babcb31 Uploaded
greg
parents:
diff changeset
120
38a38babcb31 Uploaded
greg
parents:
diff changeset
121 Accepts a combination of single SNPs json, average MQ json and newick files (or associated collections of
38a38babcb31 Uploaded
greg
parents:
diff changeset
122 each) to produce annotated SNPs tables in the form of Excel spreadsheets. The SNPs json and average MQ json
38a38babcb31 Uploaded
greg
parents:
diff changeset
123 files are typically produced by the **vSNP: get SNPs** tool and the newick files are typically produced by
38a38babcb31 Uploaded
greg
parents:
diff changeset
124 the **Phyogenetic reconstruction with RaXML** tool.
38a38babcb31 Uploaded
greg
parents:
diff changeset
125
38a38babcb31 Uploaded
greg
parents:
diff changeset
126 The SNPs tables display closely related isolates and enables identification of mixed SNPs when multiple
38a38babcb31 Uploaded
greg
parents:
diff changeset
127 bacterial strains are infecting an organism. The table structure is shown below. The columns identify the
38a38babcb31 Uploaded
greg
parents:
diff changeset
128 genome location of the SNP calls and the isolates are contained within the rows. The reference (or ancestral
38a38babcb31 Uploaded
greg
parents:
diff changeset
129 strain if the reference is an outgroup) is listed across the top, identified as the "reference call". SNPs
38a38babcb31 Uploaded
greg
parents:
diff changeset
130 that are not highlighted will match the reference. The map-quality row values are the average of the map
38a38babcb31 Uploaded
greg
parents:
diff changeset
131 quality scores of each isolate in that position. These scores measure the confidence that the read has been
38a38babcb31 Uploaded
greg
parents:
diff changeset
132 mapped to the correct location on the genome. The maximum score possible is 60, and lower scores lessen the
38a38babcb31 Uploaded
greg
parents:
diff changeset
133 confidence that the SNP was correctly identified. The annotation of the position is provided at the bottom
38a38babcb31 Uploaded
greg
parents:
diff changeset
134 of the table.
38a38babcb31 Uploaded
greg
parents:
diff changeset
135
38a38babcb31 Uploaded
greg
parents:
diff changeset
136 .. image:: table_description.png
38a38babcb31 Uploaded
greg
parents:
diff changeset
137
38a38babcb31 Uploaded
greg
parents:
diff changeset
138 SNPs are sorted according to their evolutionary age within the table. The oldest SNPs (encompassing the most
38a38babcb31 Uploaded
greg
parents:
diff changeset
139 isolates) are furthest to the left. This sorting is somewhat crude - the intent is to improve readibility or
38a38babcb31 Uploaded
greg
parents:
diff changeset
140 more easily match a related tree.
38a38babcb31 Uploaded
greg
parents:
diff changeset
141
38a38babcb31 Uploaded
greg
parents:
diff changeset
142 For a more detailed discussion, see the **Validating and correcting SNP calls** section of
2
85384a9bfba2 Uploaded
greg
parents: 1
diff changeset
143 the `vSNP documentation`_.
85384a9bfba2 Uploaded
greg
parents: 1
diff changeset
144
85384a9bfba2 Uploaded
greg
parents: 1
diff changeset
145 .. _vSNP documentation: https://github.com/USDA-VS/vSNP/blob/master/docs/detailed_usage.md
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
146
38a38babcb31 Uploaded
greg
parents:
diff changeset
147 **Required Options**
38a38babcb31 Uploaded
greg
parents:
diff changeset
148
38a38babcb31 Uploaded
greg
parents:
diff changeset
149 * **Use Genbank file** - Select "yes" to annotate the tables using the information in the Genbank file. Locally cached files, if available, provide the most widely used annotations, but more custom Genbank files can be chosen from the current history.
38a38babcb31 Uploaded
greg
parents:
diff changeset
150 </help>
3
abfb861df879 Uploaded
greg
parents: 2
diff changeset
151 <expand macro="citations"/>
0
38a38babcb31 Uploaded
greg
parents:
diff changeset
152 </tool>
38a38babcb31 Uploaded
greg
parents:
diff changeset
153