comparison vsnp_get_snps.xml @ 0:ee4ef1fc23c6 draft

Uploaded
author greg
date Tue, 21 Apr 2020 10:14:11 -0400
parents
children 770834ba75e4
comparison
equal deleted inserted replaced
-1:000000000000 0:ee4ef1fc23c6
1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="1.0.0">
2 <description></description>
3 <requirements>
4 <requirement type="package" version="0.25.3">pandas</requirement>
5 <requirement type="package" version="0.6.8">pyvcf</requirement>
6 <requirement type="package" version="1.2.0">xlrd</requirement>
7 </requirements>
8 <command detect_errors="exit_code"><![CDATA[
9 #import os
10 #set input_vcf_dir = 'input_vcf_dir'
11 #set input_zc_vcf_type = $input_zc_vcf_type_cond.input_zc_vcf_type
12 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir'
13 #set output_json_snps_dir = 'output_json_snps_dir'
14 #set output_snps_dir = 'output_snps_dir'
15 mkdir -p $input_vcf_dir &&
16 mkdir -p $output_json_avg_mq_dir &&
17 mkdir -p $output_json_snps_dir &&
18 mkdir -p $output_snps_dir &&
19 #set reference = '?'
20 #for $i in $input_vcf_collection:
21 #set reference = $i.metadata.dbkey
22 #set filename = $i.file_name
23 #set name = $i.name
24 ln -s $filename $input_vcf_dir/$name &&
25 #end for
26 #if str($input_zc_vcf_type) == "single":
27 #set input_zc_vcf = $input_zc_vcf_type_cond.input_zc_vcf
28 #set file_name_base = $os.path.basename($input_zc_vcf.file_name)
29 ln -s $input_zc_vcf $input_vcf_dir/$file_name_base &&
30 #else
31 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection:
32 #set filename = $i.file_name
33 #set name = $i.name
34 ln -s $filename $input_vcf_dir/$name &&
35 #end for
36 #end if
37 #if str($excel_grouper_cond.excel_grouper) == "yes":
38 #set excel_grouper_source = $excel_grouper_cond.excel_grouper_source_cond.excel_grouper_source
39 #if str($excel_grouper_source) == "cached":
40 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields()
41 #for $i in $excel_fields:
42 #if str($i[0]) == $reference:
43 #set excel_file = $i[2]
44 #break
45 #end if
46 #end for
47 #else:
48 #set excel_file = $excel_grouper_cond.excel_grouper_source_cond.excel_grouper_file
49 #end if
50 #end if
51 python '$__tool_directory__/vsnp_get_snps.py'
52 --processes $processes
53 --reference '$reference'
54 #if str($excel_grouper_cond.excel_grouper) == "yes":
55 --excel_grouper_file '$excel_file'
56 #end if
57 #if str($all_isolates) == "Yes":
58 --all_isolates '$all_isolates'
59 #end if
60 --output_summary '$output_summary'
61 ]]></command>
62 <inputs>
63 <conditional name="input_zc_vcf_type_cond">
64 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed">
65 <option value="single" selected="true">A single zero coverage VCF file</option>
66 <option value="collection">A collection of zero coverage VCF files</option>
67 </param>
68 <when value="single">
69 <param name="input_zc_vcf" type="data" format="vcf" label="Zero coverage VCF file">
70 <validator type="unspecified_build"/>
71 </param>
72 </when>
73 <when value="collection">
74 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files">
75 <validator type="unspecified_build"/>
76 </param>
77 </when>
78 </conditional>
79 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of VCF files against which to analyze the zero coverages VCF file(s)">
80 <validator type="unspecified_build"/>
81 </param>
82 <conditional name="excel_grouper_cond">
83 <param name="excel_grouper" type="select" label="Use Excel file for grouping and filtering?">
84 <option value="yes" selected="true">Yes</option>
85 <option value="no">No</option>
86 </param>
87 <when value="yes">
88 <conditional name="excel_grouper_source_cond">
89 <param name="excel_grouper_source" type="select" label="Choose the source for the Excel file">
90 <option value="cached">locally cached</option>
91 <option value="history">from history</option>
92 </param>
93 <when value="cached">
94 <param name="excel_grouper_file" type="select" label="Excel file" help="Selection will be overridden if it does not match the dbkeys associated with the collection of VCF files being analyzed">
95 <options from_data_table="vsnp_excel"/>
96 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/>
97 </param>
98 </when>
99 <when value="history">
100 <param name="excel_grouper_file" type="data" format="xlsx" label="Excel file">
101 <validator type="no_options" message="The current history does not include an xlsx dataset that can be used for grouping and filtering"/>
102 </param>
103 </when>
104 </conditional>
105 </when>
106 <when value="no"/>
107 </conditional>
108 <param name="all_isolates" type="select" display="radio" label="Create table with all isolates?">
109 <option value="No" selected="true">No</option>
110 <option value="Yes">Yes</option>
111 </param>
112 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
113 </inputs>
114 <outputs>
115 <collection name="snps" type="list" label="${tool.name} (SNPs) on ${on_string}">
116 <discover_datasets pattern="__name__" directory="output_snps_dir" format="fasta" />
117 </collection>
118 <collection name="json_avg_mq" type="list" label="${tool.name} (average MQ) on ${on_string}">
119 <discover_datasets pattern="__name__" directory="output_json_avg_mq_dir" format="json" />
120 </collection>
121 <collection name="json_snps" type="list" label="${tool.name} (SNPs as json) on ${on_string}">
122 <discover_datasets pattern="__name__" directory="output_json_snps_dir" format="json" />
123 </collection>
124 <data name="output_summary" format="html" label="${tool.name} (summary) on ${on_string}"/>
125 </outputs>
126 <tests>
127 <test>
128 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/>
129 <param name="input_vcf_collection">
130 <collection type="list">
131 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/>
132 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/>
133 </collection>
134 </param>
135 <param name="excel_grouper" value="no"/>
136 <output_collection name="snps" type="list">
137 <element name="all_vcf.fasta" file="all_vcf.fasta" ftype="fasta" compare="contains"/>
138 </output_collection>
139 <output_collection name="json_avg_mq" type="list">
140 <element name="all_vcf.json" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/>
141 </output_collection>
142 <output_collection name="json_snps" type="list">
143 <element name="all_vcf.json" file="json_all_vcf.json" ftype="json" compare="contains"/>
144 </output_collection>
145 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
146 </test>
147 </tests>
148 <help>
149 **What it does**
150
151 Accepts a zero-coverage VCF file (or a collection of them) produced by the **vSNP: add zero coverage** tool
152 along with a collection of VCF files that have been aligned with the same reference. The inputs are analyzed
153 to discover quality parsimonious SNPs in the zero-coverage VCF file(s). An Excel spreadsheet containing
154 specified SNPs can optiomally be used to filter desired SNP positions by group. Users can choose whether to
155 select a locally cached Excel spreadsheet or one from their current history.
156
157 **Required Options**
158
159 * **Choose the category of the files to be analyzed** - select single file or a collection of files, then select the appropriate history item (single VCF item or dataset collection of VCF elements) based on the selected option.
160 * **Collection of VCF files against which to analyze the zero coverages VCF file(s)** - select a dataset collection from the current history that is associated with the same reference as the selected zero-coverage VCF file(s).
161
162 **Additional Options**
163
164 * **Use Excel file for grouping and filtering?** - select Yes to filter desired SNP positions by group. A cached Excel spreadsheet provides the most widely used SNP positions for grouping, but a custom spreadhseet can be selected from the current history.
165 * **Job Resource Parameters** - an administrator for the Galaxy instance must configure this tool to display this option, so it may not be available. If it is, you can choose the number of processors to use for tool execution.
166 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
167 </help>
168 <citations>
169 <citation type="bibtex">
170 @misc{None,
171 journal = {None},
172 author = {1. Stuber T},
173 title = {Manuscript in preparation},
174 year = {None},
175 url = {https://github.com/USDA-VS/vSNP},}
176 </citation>
177 </citations>
178 </tool>
179