comparison vsnp_add_zero_coverage.xml @ 0:3cb0bf7e1b2d draft

Uploaded
author greg
date Tue, 21 Apr 2020 09:44:38 -0400
parents
children 01312f8a6ca9
comparison
equal deleted inserted replaced
-1:000000000000 0:3cb0bf7e1b2d
1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="1.0.0">
2 <description></description>
3 <requirements>
4 <requirement type="package" version="1.76">biopython</requirement>
5 <requirement type="package" version="1.16.5">numpy</requirement>
6 <requirement type="package" version="0.25.3">pandas</requirement>
7 <requirement type="package" version="0.15.4">pysam</requirement>
8 </requirements>
9 <command detect_errors="exit_code"><![CDATA[
10 #import os
11 #import re
12 #set input_type = $input_type_cond.input_type
13 #set input_bam_dir = 'input_bam_dir'
14 #set input_vcf_dir = 'input_vcf_dir'
15 #set output_vcf_dir = 'output_vcf_dir'
16 #set output_metrics_dir = 'output_metrics_dir'
17 mkdir -p $input_bam_dir &&
18 mkdir -p $input_vcf_dir &&
19 mkdir -p $output_vcf_dir &&
20 mkdir -p $output_metrics_dir &&
21 #if str($input_type) == "single":
22 #set bam_input = $input_type_cond.bam_input
23 #set file_name = $bam_input.file_name
24 #set file_name_base = $os.path.basename($file_name)
25 ln -s $file_name $input_bam_dir/$file_name_base &&
26 #set vcf_input = $input_type_cond.vcf_input
27 #set file_name = $vcf_input.file_name
28 #set file_name_base = $os.path.basename($file_name)
29 ln -s $file_name $input_vcf_dir/$file_name_base &&
30 #else:
31 #for $i in $input_type_cond.bam_input_collection:
32 #set filename = $i.file_name
33 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
34 ln -s $filename $input_bam_dir/$identifier &&
35 #end for
36 #for $i in $input_type_cond.vcf_input_collection:
37 #set filename = $i.file_name
38 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
39 ln -s $filename $input_vcf_dir/$identifier &&
40 #end for
41 #end if
42 python '$__tool_directory__/vsnp_add_zero_coverage.py'
43 --processes $processes
44 #if str($reference_cond.reference_source) == "cached"
45 --reference '$reference_cond.reference.fields.path'
46 #else:
47 --reference '$reference_cond.reference'
48 #end if
49 #if str($input_type) == "single":
50 --output_metrics '$output_metrics'
51 --output_vcf '$output_vcf'
52 #end if
53 ]]></command>
54 <inputs>
55 <conditional name="input_type_cond">
56 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
57 <option value="single" selected="true">Single files</option>
58 <option value="collection">Collections of files</option>
59 </param>
60 <when value="single">
61 <param name="bam_input" type="data" format="bam" label="BAM file">
62 <validator type="unspecified_build"/>
63 </param>
64 <param name="vcf_input" type="data" format="vcf" label="VCF file">
65 <validator type="unspecified_build"/>
66 </param>
67 </when>
68 <when value="collection">
69 <param name="bam_input_collection" type="data_collection" format="bam" collection_type="list" label="Collection of BAM files">
70 <validator type="unspecified_build"/>
71 </param>
72 <param name="vcf_input_collection" type="data_collection" format="vcf" collection_type="list" label="Collection of VCF files">
73 <validator type="unspecified_build"/>
74 </param>
75 </when>
76 </conditional>
77 <conditional name="reference_cond">
78 <param name="reference_source" type="select" label="Choose the source for the reference genome">
79 <option value="cached" selected="true">locally cached</option>
80 <option value="history">from history</option>
81 </param>
82 <when value="cached">
83 <param name="reference" type="select" label="Using reference genome">
84 <options from_data_table="fasta_indexes"/>
85 <!-- No <filter> tag here! -->
86 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
87 </param>
88 </when>
89 <when value="history">
90 <param name="reference" type="data" format="fasta" label="Using reference genome">
91 <validator type="no_options" message="The current history does not include a fasta dataset"/>
92 </param>
93 </when>
94 </conditional>
95 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
96 </inputs>
97 <outputs>
98 <data name="output_vcf" format="vcf" label="${tool.name} (filtered VCF) on ${on_string}">
99 <filter>input_type_cond['input_type'] == 'single'</filter>
100 </data>
101 <collection name="output_vcf_collection" type="list" label="${tool.name} (filtered VCFs) on ${on_string}">
102 <discover_datasets pattern="__name__" directory="output_vcf_dir" format="vcf" />
103 <filter>input_type_cond['input_type'] == 'collection'</filter>
104 </collection>
105 <data name="output_metrics" format="tabular" label="${tool.name} (metrics) on ${on_string}">
106 <filter>input_type_cond['input_type'] == 'single'</filter>
107 </data>
108 <collection name="output_metrics_collection" type="list" label="${tool.name} (metrics) on ${on_string}">
109 <discover_datasets pattern="__name__" directory="output_metrics_dir" format="tabular" />
110 <filter>input_type_cond['input_type'] == 'collection'</filter>
111 </collection>
112 </outputs>
113 <tests>
114 <test>
115 <param name="input_type" value="collection"/>
116 <param name="bam_input_collection">
117 <collection type="list">
118 <element name="bam_input.bam" value="bam_input.bam" dbkey="89"/>
119 <element name="bam_input2.bam" value="bam_input2.bam" dbkey="89"/>
120 </collection>
121 </param>
122 <param name="vcf_input_collection">
123 <collection type="list">
124 <element name="vcf_input.vcf" value="vcf_input.vcf" dbkey="89"/>
125 <element name="vcf_input2.vcf" value="vcf_input2.vcf" dbkey="89"/>
126 </collection>
127 </param>
128 <param name="reference_source" value="history"/>
129 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
130 <output_collection name="output_vcf_collection" type="list">
131 <element name="vcf_input.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
132 <element name="vcf_input2.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
133 </output_collection>
134 <output_collection name="output_metrics_collection" type="list">
135 <element name="vcf_input.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
136 <element name="vcf_input2.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
137 </output_collection>
138 </test>
139 <test>
140 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
141 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
142 <param name="reference_source" value="history"/>
143 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
144 <param name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
145 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
146 </test>
147 </tests>
148 <help>
149 **What it does**
150
151 Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each
152 combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions
153 along the reference with no coverage.
154
155 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome
156 coverage percentage.
157
158 **Required Options**
159
160 * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single BAM and VCF files or collections of BAM and VCF files) based on the selected option.
161 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
162 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
163 </help>
164 <citations>
165 <citation type="bibtex">
166 @misc{None,
167 journal = {None},
168 author = {1. Stuber T},
169 title = {Manuscript in preparation},
170 year = {None},
171 url = {https://github.com/USDA-VS/vSNP},}
172 </citation>
173 </citations>
174 </tool>
175