annotate vsnp_get_snps.xml @ 0:ee4ef1fc23c6 draft

Uploaded
author greg
date Tue, 21 Apr 2020 10:14:11 -0400
parents
children 770834ba75e4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="1.0.0">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
2 <description></description>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
3 <requirements>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="0.25.3">pandas</requirement>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="0.6.8">pyvcf</requirement>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="1.2.0">xlrd</requirement>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
7 </requirements>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
8 <command detect_errors="exit_code"><![CDATA[
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
9 #import os
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
10 #set input_vcf_dir = 'input_vcf_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
11 #set input_zc_vcf_type = $input_zc_vcf_type_cond.input_zc_vcf_type
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
12 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
13 #set output_json_snps_dir = 'output_json_snps_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
14 #set output_snps_dir = 'output_snps_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
15 mkdir -p $input_vcf_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
16 mkdir -p $output_json_avg_mq_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
17 mkdir -p $output_json_snps_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
18 mkdir -p $output_snps_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
19 #set reference = '?'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
20 #for $i in $input_vcf_collection:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
21 #set reference = $i.metadata.dbkey
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
22 #set filename = $i.file_name
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
23 #set name = $i.name
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
24 ln -s $filename $input_vcf_dir/$name &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
25 #end for
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
26 #if str($input_zc_vcf_type) == "single":
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
27 #set input_zc_vcf = $input_zc_vcf_type_cond.input_zc_vcf
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
28 #set file_name_base = $os.path.basename($input_zc_vcf.file_name)
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
29 ln -s $input_zc_vcf $input_vcf_dir/$file_name_base &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
30 #else
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
31 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
32 #set filename = $i.file_name
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
33 #set name = $i.name
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
34 ln -s $filename $input_vcf_dir/$name &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
35 #end for
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
36 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
37 #if str($excel_grouper_cond.excel_grouper) == "yes":
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
38 #set excel_grouper_source = $excel_grouper_cond.excel_grouper_source_cond.excel_grouper_source
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
39 #if str($excel_grouper_source) == "cached":
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
40 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields()
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
41 #for $i in $excel_fields:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
42 #if str($i[0]) == $reference:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
43 #set excel_file = $i[2]
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
44 #break
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
45 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
46 #end for
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
47 #else:
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
48 #set excel_file = $excel_grouper_cond.excel_grouper_source_cond.excel_grouper_file
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
49 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
50 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
51 python '$__tool_directory__/vsnp_get_snps.py'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
52 --processes $processes
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
53 --reference '$reference'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
54 #if str($excel_grouper_cond.excel_grouper) == "yes":
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
55 --excel_grouper_file '$excel_file'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
56 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
57 #if str($all_isolates) == "Yes":
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
58 --all_isolates '$all_isolates'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
59 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
60 --output_summary '$output_summary'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
61 ]]></command>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
62 <inputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
63 <conditional name="input_zc_vcf_type_cond">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
64 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
65 <option value="single" selected="true">A single zero coverage VCF file</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
66 <option value="collection">A collection of zero coverage VCF files</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
67 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
68 <when value="single">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
69 <param name="input_zc_vcf" type="data" format="vcf" label="Zero coverage VCF file">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
70 <validator type="unspecified_build"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
71 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
72 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
73 <when value="collection">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
74 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
75 <validator type="unspecified_build"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
76 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
77 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
78 </conditional>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
79 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of VCF files against which to analyze the zero coverages VCF file(s)">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
80 <validator type="unspecified_build"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
81 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
82 <conditional name="excel_grouper_cond">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
83 <param name="excel_grouper" type="select" label="Use Excel file for grouping and filtering?">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
84 <option value="yes" selected="true">Yes</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
85 <option value="no">No</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
86 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
87 <when value="yes">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
88 <conditional name="excel_grouper_source_cond">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
89 <param name="excel_grouper_source" type="select" label="Choose the source for the Excel file">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
90 <option value="cached">locally cached</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
91 <option value="history">from history</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
92 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
93 <when value="cached">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
94 <param name="excel_grouper_file" type="select" label="Excel file" help="Selection will be overridden if it does not match the dbkeys associated with the collection of VCF files being analyzed">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
95 <options from_data_table="vsnp_excel"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
96 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
97 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
98 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
99 <when value="history">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
100 <param name="excel_grouper_file" type="data" format="xlsx" label="Excel file">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
101 <validator type="no_options" message="The current history does not include an xlsx dataset that can be used for grouping and filtering"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
102 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
103 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
104 </conditional>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
105 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
106 <when value="no"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
107 </conditional>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
108 <param name="all_isolates" type="select" display="radio" label="Create table with all isolates?">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
109 <option value="No" selected="true">No</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
110 <option value="Yes">Yes</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
111 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
112 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
113 </inputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
114 <outputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
115 <collection name="snps" type="list" label="${tool.name} (SNPs) on ${on_string}">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
116 <discover_datasets pattern="__name__" directory="output_snps_dir" format="fasta" />
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
117 </collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
118 <collection name="json_avg_mq" type="list" label="${tool.name} (average MQ) on ${on_string}">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
119 <discover_datasets pattern="__name__" directory="output_json_avg_mq_dir" format="json" />
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
120 </collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
121 <collection name="json_snps" type="list" label="${tool.name} (SNPs as json) on ${on_string}">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
122 <discover_datasets pattern="__name__" directory="output_json_snps_dir" format="json" />
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
123 </collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
124 <data name="output_summary" format="html" label="${tool.name} (summary) on ${on_string}"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
125 </outputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
126 <tests>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
127 <test>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
128 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
129 <param name="input_vcf_collection">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
130 <collection type="list">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
131 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
132 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
133 </collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
134 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
135 <param name="excel_grouper" value="no"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
136 <output_collection name="snps" type="list">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
137 <element name="all_vcf.fasta" file="all_vcf.fasta" ftype="fasta" compare="contains"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
138 </output_collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
139 <output_collection name="json_avg_mq" type="list">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
140 <element name="all_vcf.json" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
141 </output_collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
142 <output_collection name="json_snps" type="list">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
143 <element name="all_vcf.json" file="json_all_vcf.json" ftype="json" compare="contains"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
144 </output_collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
145 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
146 </test>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
147 </tests>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
148 <help>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
149 **What it does**
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
150
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
151 Accepts a zero-coverage VCF file (or a collection of them) produced by the **vSNP: add zero coverage** tool
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
152 along with a collection of VCF files that have been aligned with the same reference. The inputs are analyzed
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
153 to discover quality parsimonious SNPs in the zero-coverage VCF file(s). An Excel spreadsheet containing
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
154 specified SNPs can optiomally be used to filter desired SNP positions by group. Users can choose whether to
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
155 select a locally cached Excel spreadsheet or one from their current history.
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
156
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
157 **Required Options**
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
158
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
159 * **Choose the category of the files to be analyzed** - select single file or a collection of files, then select the appropriate history item (single VCF item or dataset collection of VCF elements) based on the selected option.
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
160 * **Collection of VCF files against which to analyze the zero coverages VCF file(s)** - select a dataset collection from the current history that is associated with the same reference as the selected zero-coverage VCF file(s).
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
161
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
162 **Additional Options**
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
163
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
164 * **Use Excel file for grouping and filtering?** - select Yes to filter desired SNP positions by group. A cached Excel spreadsheet provides the most widely used SNP positions for grouping, but a custom spreadhseet can be selected from the current history.
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
165 * **Job Resource Parameters** - an administrator for the Galaxy instance must configure this tool to display this option, so it may not be available. If it is, you can choose the number of processors to use for tool execution.
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
166 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
167 </help>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
168 <citations>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
169 <citation type="bibtex">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
170 @misc{None,
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
171 journal = {None},
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
172 author = {1. Stuber T},
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
173 title = {Manuscript in preparation},
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
174 year = {None},
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
175 url = {https://github.com/USDA-VS/vSNP},}
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
176 </citation>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
177 </citations>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
178 </tool>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
179