annotate vsnp_get_snps.xml @ 2:7471707d3fb4 draft

Uploaded
author greg
date Sat, 14 Nov 2020 09:16:04 +0000
parents 770834ba75e4
children 14285a94fb13
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="1.0.0">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
2 <description></description>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
3 <requirements>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="0.25.3">pandas</requirement>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="0.6.8">pyvcf</requirement>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="1.2.0">xlrd</requirement>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
7 </requirements>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
8 <command detect_errors="exit_code"><![CDATA[
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
9 #import os
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
10 #set input_vcf_dir = 'input_vcf_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
11 #set input_zc_vcf_type = $input_zc_vcf_type_cond.input_zc_vcf_type
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
12 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
13 #set output_json_snps_dir = 'output_json_snps_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
14 #set output_snps_dir = 'output_snps_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
15 mkdir -p $input_vcf_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
16 mkdir -p $output_json_avg_mq_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
17 mkdir -p $output_json_snps_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
18 mkdir -p $output_snps_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
19 #set reference = '?'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
20 #for $i in $input_vcf_collection:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
21 #set reference = $i.metadata.dbkey
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
22 #set filename = $i.file_name
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
23 #set name = $i.name
1
770834ba75e4 Uploaded
greg
parents: 0
diff changeset
24 ln -s '$filename' '$input_vcf_dir/$name' &&
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
25 #end for
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
26 #if str($input_zc_vcf_type) == "single":
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
27 #set input_zc_vcf = $input_zc_vcf_type_cond.input_zc_vcf
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
28 #set file_name_base = $os.path.basename($input_zc_vcf.file_name)
1
770834ba75e4 Uploaded
greg
parents: 0
diff changeset
29 ln -s '$input_zc_vcf' '$input_vcf_dir/$file_name_base' &&
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
30 #else
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
31 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
32 #set filename = $i.file_name
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
33 #set name = $i.name
1
770834ba75e4 Uploaded
greg
parents: 0
diff changeset
34 ln -s '$filename' '$input_vcf_dir/$name' &&
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
35 #end for
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
36 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
37 #if str($excel_grouper_cond.excel_grouper) == "yes":
1
770834ba75e4 Uploaded
greg
parents: 0
diff changeset
38 #set excel_file = 'No genome specified for input VCF (database) file(s)'
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
39 #set excel_grouper_source = $excel_grouper_cond.excel_grouper_source_cond.excel_grouper_source
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
40 #if str($excel_grouper_source) == "cached":
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
41 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields()
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
42 #for $i in $excel_fields:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
43 #if str($i[0]) == $reference:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
44 #set excel_file = $i[2]
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
45 #break
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
46 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
47 #end for
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
48 #else:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
49 #set excel_file = $excel_grouper_cond.excel_grouper_source_cond.excel_grouper_file
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
50 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
51 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
52 python '$__tool_directory__/vsnp_get_snps.py'
2
7471707d3fb4 Uploaded
greg
parents: 1
diff changeset
53 --processes $processes
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
54 --reference '$reference'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
55 #if str($excel_grouper_cond.excel_grouper) == "yes":
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
56 --excel_grouper_file '$excel_file'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
57 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
58 #if str($all_isolates) == "Yes":
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
59 --all_isolates '$all_isolates'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
60 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
61 --output_summary '$output_summary'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
62 ]]></command>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
63 <inputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
64 <conditional name="input_zc_vcf_type_cond">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
65 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
66 <option value="single" selected="true">A single zero coverage VCF file</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
67 <option value="collection">A collection of zero coverage VCF files</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
68 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
69 <when value="single">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
70 <param name="input_zc_vcf" type="data" format="vcf" label="Zero coverage VCF file">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
71 <validator type="unspecified_build"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
72 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
73 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
74 <when value="collection">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
75 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
76 <validator type="unspecified_build"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
77 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
78 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
79 </conditional>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
80 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of VCF files against which to analyze the zero coverages VCF file(s)">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
81 <validator type="unspecified_build"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
82 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
83 <conditional name="excel_grouper_cond">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
84 <param name="excel_grouper" type="select" label="Use Excel file for grouping and filtering?">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
85 <option value="yes" selected="true">Yes</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
86 <option value="no">No</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
87 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
88 <when value="yes">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
89 <conditional name="excel_grouper_source_cond">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
90 <param name="excel_grouper_source" type="select" label="Choose the source for the Excel file">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
91 <option value="cached">locally cached</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
92 <option value="history">from history</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
93 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
94 <when value="cached">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
95 <param name="excel_grouper_file" type="select" label="Excel file" help="Selection will be overridden if it does not match the dbkeys associated with the collection of VCF files being analyzed">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
96 <options from_data_table="vsnp_excel"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
97 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
98 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
99 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
100 <when value="history">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
101 <param name="excel_grouper_file" type="data" format="xlsx" label="Excel file">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
102 <validator type="no_options" message="The current history does not include an xlsx dataset that can be used for grouping and filtering"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
103 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
104 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
105 </conditional>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
106 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
107 <when value="no"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
108 </conditional>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
109 <param name="all_isolates" type="select" display="radio" label="Create table with all isolates?">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
110 <option value="No" selected="true">No</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
111 <option value="Yes">Yes</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
112 </param>
2
7471707d3fb4 Uploaded
greg
parents: 1
diff changeset
113 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
114 </inputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
115 <outputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
116 <collection name="snps" type="list" label="${tool.name} (SNPs) on ${on_string}">
1
770834ba75e4 Uploaded
greg
parents: 0
diff changeset
117 <discover_datasets pattern="__name__" directory="output_snps_dir" format="fasta"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
118 </collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
119 <collection name="json_avg_mq" type="list" label="${tool.name} (average MQ) on ${on_string}">
1
770834ba75e4 Uploaded
greg
parents: 0
diff changeset
120 <discover_datasets pattern="__name__" directory="output_json_avg_mq_dir" format="json"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
121 </collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
122 <collection name="json_snps" type="list" label="${tool.name} (SNPs as json) on ${on_string}">
1
770834ba75e4 Uploaded
greg
parents: 0
diff changeset
123 <discover_datasets pattern="__name__" directory="output_json_snps_dir" format="json"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
124 </collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
125 <data name="output_summary" format="html" label="${tool.name} (summary) on ${on_string}"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
126 </outputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
127 <tests>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
128 <test>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
129 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
130 <param name="input_vcf_collection">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
131 <collection type="list">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
132 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
133 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
134 </collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
135 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
136 <param name="excel_grouper" value="no"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
137 <output_collection name="snps" type="list">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
138 <element name="all_vcf.fasta" file="all_vcf.fasta" ftype="fasta" compare="contains"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
139 </output_collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
140 <output_collection name="json_avg_mq" type="list">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
141 <element name="all_vcf.json" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
142 </output_collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
143 <output_collection name="json_snps" type="list">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
144 <element name="all_vcf.json" file="json_all_vcf.json" ftype="json" compare="contains"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
145 </output_collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
146 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
147 </test>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
148 </tests>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
149 <help>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
150 **What it does**
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
151
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
152 Accepts a zero-coverage VCF file (or a collection of them) produced by the **vSNP: add zero coverage** tool
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
153 along with a collection of VCF files that have been aligned with the same reference. The inputs are analyzed
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
154 to discover quality parsimonious SNPs in the zero-coverage VCF file(s). An Excel spreadsheet containing
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
155 specified SNPs can optiomally be used to filter desired SNP positions by group. Users can choose whether to
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
156 select a locally cached Excel spreadsheet or one from their current history.
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
157
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
158 **Required Options**
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
159
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
160 * **Choose the category of the files to be analyzed** - select single file or a collection of files, then select the appropriate history item (single VCF item or dataset collection of VCF elements) based on the selected option.
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
161 * **Collection of VCF files against which to analyze the zero coverages VCF file(s)** - select a dataset collection from the current history that is associated with the same reference as the selected zero-coverage VCF file(s).
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
162
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
163 **Additional Options**
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
164
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
165 * **Use Excel file for grouping and filtering?** - select Yes to filter desired SNP positions by group. A cached Excel spreadsheet provides the most widely used SNP positions for grouping, but a custom spreadhseet can be selected from the current history.
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
166 * **Job Resource Parameters** - an administrator for the Galaxy instance must configure this tool to display this option, so it may not be available. If it is, you can choose the number of processors to use for tool execution.
2
7471707d3fb4 Uploaded
greg
parents: 1
diff changeset
167 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
168 </help>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
169 <citations>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
170 <citation type="bibtex">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
171 @misc{None,
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
172 journal = {None},
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
173 author = {1. Stuber T},
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
174 title = {Manuscript in preparation},
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
175 year = {None},
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
176 url = {https://github.com/USDA-VS/vSNP},}
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
177 </citation>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
178 </citations>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
179 </tool>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
180