comparison vsnp_add_zero_coverage.xml @ 2:01312f8a6ca9 draft

Uploaded
author greg
date Sun, 03 Jan 2021 16:29:00 +0000
parents 3cb0bf7e1b2d
children bb6cc994707d
comparison
equal deleted inserted replaced
1:eaf4c304fd22 2:01312f8a6ca9
1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="1.0.0"> 1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="@WRAPPER_VERSION@.2" profile="@PROFILE@">
2 <description></description> 2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
3 <requirements> 6 <requirements>
4 <requirement type="package" version="1.76">biopython</requirement> 7 <requirement type="package" version="1.76">biopython</requirement>
5 <requirement type="package" version="1.16.5">numpy</requirement>
6 <requirement type="package" version="0.25.3">pandas</requirement> 8 <requirement type="package" version="0.25.3">pandas</requirement>
7 <requirement type="package" version="0.15.4">pysam</requirement> 9 <requirement type="package" version="0.15.4">pysam</requirement>
8 </requirements> 10 </requirements>
9 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
10 #import os
11 #import re 12 #import re
12 #set input_type = $input_type_cond.input_type 13
13 #set input_bam_dir = 'input_bam_dir' 14 ## The identifer for both of the following files is likely the same
14 #set input_vcf_dir = 'input_vcf_dir' 15 ## string, so we append a file extension to allow for both links.
15 #set output_vcf_dir = 'output_vcf_dir' 16 #set bam_identifier = re.sub('[^\s\w\-]', '_', str($bam_input.element_identifier)) + '.bam'
16 #set output_metrics_dir = 'output_metrics_dir' 17 ln -s '${bam_input}' '${bam_identifier}' &&
17 mkdir -p $input_bam_dir && 18 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($vcf_input.element_identifier)) + '.vcf'
18 mkdir -p $input_vcf_dir && 19 ln -s '${vcf_input}' '${vcf_identifier}' &&
19 mkdir -p $output_vcf_dir && 20
20 mkdir -p $output_metrics_dir &&
21 #if str($input_type) == "single":
22 #set bam_input = $input_type_cond.bam_input
23 #set file_name = $bam_input.file_name
24 #set file_name_base = $os.path.basename($file_name)
25 ln -s $file_name $input_bam_dir/$file_name_base &&
26 #set vcf_input = $input_type_cond.vcf_input
27 #set file_name = $vcf_input.file_name
28 #set file_name_base = $os.path.basename($file_name)
29 ln -s $file_name $input_vcf_dir/$file_name_base &&
30 #else:
31 #for $i in $input_type_cond.bam_input_collection:
32 #set filename = $i.file_name
33 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
34 ln -s $filename $input_bam_dir/$identifier &&
35 #end for
36 #for $i in $input_type_cond.vcf_input_collection:
37 #set filename = $i.file_name
38 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
39 ln -s $filename $input_vcf_dir/$identifier &&
40 #end for
41 #end if
42 python '$__tool_directory__/vsnp_add_zero_coverage.py' 21 python '$__tool_directory__/vsnp_add_zero_coverage.py'
43 --processes $processes 22 --bam_input '$bam_identifier'
44 #if str($reference_cond.reference_source) == "cached" 23 --vcf_input '$vcf_identifier'
24 #if str($reference_cond.reference_source) == 'cached'
45 --reference '$reference_cond.reference.fields.path' 25 --reference '$reference_cond.reference.fields.path'
46 #else: 26 #else:
47 --reference '$reference_cond.reference' 27 --reference '$reference_cond.reference'
48 #end if 28 #end if
49 #if str($input_type) == "single": 29 --output_metrics '$output_metrics'
50 --output_metrics '$output_metrics' 30 --output_vcf '$output_vcf'
51 --output_vcf '$output_vcf'
52 #end if
53 ]]></command> 31 ]]></command>
54 <inputs> 32 <inputs>
55 <conditional name="input_type_cond"> 33 <param name="bam_input" type="data" format="bam" label="BAM file"/>
56 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> 34 <param name="vcf_input" type="data" format="vcf" label="VCF file"/>
57 <option value="single" selected="true">Single files</option>
58 <option value="collection">Collections of files</option>
59 </param>
60 <when value="single">
61 <param name="bam_input" type="data" format="bam" label="BAM file">
62 <validator type="unspecified_build"/>
63 </param>
64 <param name="vcf_input" type="data" format="vcf" label="VCF file">
65 <validator type="unspecified_build"/>
66 </param>
67 </when>
68 <when value="collection">
69 <param name="bam_input_collection" type="data_collection" format="bam" collection_type="list" label="Collection of BAM files">
70 <validator type="unspecified_build"/>
71 </param>
72 <param name="vcf_input_collection" type="data_collection" format="vcf" collection_type="list" label="Collection of VCF files">
73 <validator type="unspecified_build"/>
74 </param>
75 </when>
76 </conditional>
77 <conditional name="reference_cond"> 35 <conditional name="reference_cond">
78 <param name="reference_source" type="select" label="Choose the source for the reference genome"> 36 <expand macro="param_reference_source"/>
79 <option value="cached" selected="true">locally cached</option>
80 <option value="history">from history</option>
81 </param>
82 <when value="cached"> 37 <when value="cached">
83 <param name="reference" type="select" label="Using reference genome"> 38 <param name="reference" type="select" label="Using reference genome">
84 <options from_data_table="fasta_indexes"/> 39 <options from_data_table="fasta_indexes">
85 <!-- No <filter> tag here! --> 40 <filter type="data_meta" column="1" key="dbkey" ref="bam_input"/>
86 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/> 41 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
42 </options>
87 </param> 43 </param>
88 </when> 44 </when>
89 <when value="history"> 45 <when value="history">
90 <param name="reference" type="data" format="fasta" label="Using reference genome"> 46 <param name="reference" type="data" format="fasta,fasta.gz" label="Using reference genome">
91 <validator type="no_options" message="The current history does not include a fasta dataset"/> 47 <validator type="no_options" message="The current history does not include a fasta dataset"/>
92 </param> 48 </param>
93 </when> 49 </when>
94 </conditional> 50 </conditional>
95 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
96 </inputs> 51 </inputs>
97 <outputs> 52 <outputs>
98 <data name="output_vcf" format="vcf" label="${tool.name} (filtered VCF) on ${on_string}"> 53 <data name="output_vcf" format="vcf" label="${tool.name} on ${on_string} (filtered VCF)"/>
99 <filter>input_type_cond['input_type'] == 'single'</filter> 54 <data name="output_metrics" format="tabular" label="${tool.name} on ${on_string} (metrics)"/>
100 </data>
101 <collection name="output_vcf_collection" type="list" label="${tool.name} (filtered VCFs) on ${on_string}">
102 <discover_datasets pattern="__name__" directory="output_vcf_dir" format="vcf" />
103 <filter>input_type_cond['input_type'] == 'collection'</filter>
104 </collection>
105 <data name="output_metrics" format="tabular" label="${tool.name} (metrics) on ${on_string}">
106 <filter>input_type_cond['input_type'] == 'single'</filter>
107 </data>
108 <collection name="output_metrics_collection" type="list" label="${tool.name} (metrics) on ${on_string}">
109 <discover_datasets pattern="__name__" directory="output_metrics_dir" format="tabular" />
110 <filter>input_type_cond['input_type'] == 'collection'</filter>
111 </collection>
112 </outputs> 55 </outputs>
113 <tests> 56 <tests>
114 <test> 57 <test expect_num_outputs="2">
115 <param name="input_type" value="collection"/>
116 <param name="bam_input_collection">
117 <collection type="list">
118 <element name="bam_input.bam" value="bam_input.bam" dbkey="89"/>
119 <element name="bam_input2.bam" value="bam_input2.bam" dbkey="89"/>
120 </collection>
121 </param>
122 <param name="vcf_input_collection">
123 <collection type="list">
124 <element name="vcf_input.vcf" value="vcf_input.vcf" dbkey="89"/>
125 <element name="vcf_input2.vcf" value="vcf_input2.vcf" dbkey="89"/>
126 </collection>
127 </param>
128 <param name="reference_source" value="history"/>
129 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
130 <output_collection name="output_vcf_collection" type="list">
131 <element name="vcf_input.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
132 <element name="vcf_input2.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
133 </output_collection>
134 <output_collection name="output_metrics_collection" type="list">
135 <element name="vcf_input.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
136 <element name="vcf_input2.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
137 </output_collection>
138 </test>
139 <test>
140 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/> 58 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
141 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/> 59 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
142 <param name="reference_source" value="history"/> 60 <param name="reference_source" value="history"/>
143 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/> 61 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
144 <param name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/> 62 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
63 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular"/>
64 </test>
65 <test expect_num_outputs="2">
66 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
67 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
68 <param name="reference_source" value="cached"/>
69 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
145 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/> 70 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
146 </test> 71 </test>
147 </tests> 72 </tests>
148 <help> 73 <help>
149 **What it does** 74 **What it does**
155 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome 80 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome
156 coverage percentage. 81 coverage percentage.
157 82
158 **Required Options** 83 **Required Options**
159 84
160 * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single BAM and VCF files or collections of BAM and VCF files) based on the selected option.
161 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history. 85 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
162 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
163 </help> 86 </help>
164 <citations> 87 <expand macro="citations" />
165 <citation type="bibtex">
166 @misc{None,
167 journal = {None},
168 author = {1. Stuber T},
169 title = {Manuscript in preparation},
170 year = {None},
171 url = {https://github.com/USDA-VS/vSNP},}
172 </citation>
173 </citations>
174 </tool> 88 </tool>
175 89