annotate vsnp_add_zero_coverage.xml @ 0:3cb0bf7e1b2d draft

Uploaded
author greg
date Tue, 21 Apr 2020 09:44:38 -0400
parents
children 01312f8a6ca9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="1.0.0">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
2 <description></description>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
3 <requirements>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="1.76">biopython</requirement>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="1.16.5">numpy</requirement>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="0.25.3">pandas</requirement>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
7 <requirement type="package" version="0.15.4">pysam</requirement>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
8 </requirements>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
9 <command detect_errors="exit_code"><![CDATA[
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
10 #import os
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
11 #import re
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
12 #set input_type = $input_type_cond.input_type
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
13 #set input_bam_dir = 'input_bam_dir'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
14 #set input_vcf_dir = 'input_vcf_dir'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
15 #set output_vcf_dir = 'output_vcf_dir'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
16 #set output_metrics_dir = 'output_metrics_dir'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
17 mkdir -p $input_bam_dir &&
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
18 mkdir -p $input_vcf_dir &&
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
19 mkdir -p $output_vcf_dir &&
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
20 mkdir -p $output_metrics_dir &&
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
21 #if str($input_type) == "single":
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
22 #set bam_input = $input_type_cond.bam_input
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
23 #set file_name = $bam_input.file_name
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
24 #set file_name_base = $os.path.basename($file_name)
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
25 ln -s $file_name $input_bam_dir/$file_name_base &&
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
26 #set vcf_input = $input_type_cond.vcf_input
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
27 #set file_name = $vcf_input.file_name
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
28 #set file_name_base = $os.path.basename($file_name)
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
29 ln -s $file_name $input_vcf_dir/$file_name_base &&
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
30 #else:
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
31 #for $i in $input_type_cond.bam_input_collection:
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
32 #set filename = $i.file_name
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
33 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
34 ln -s $filename $input_bam_dir/$identifier &&
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
35 #end for
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
36 #for $i in $input_type_cond.vcf_input_collection:
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
37 #set filename = $i.file_name
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
38 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
39 ln -s $filename $input_vcf_dir/$identifier &&
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
40 #end for
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
41 #end if
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
42 python '$__tool_directory__/vsnp_add_zero_coverage.py'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
43 --processes $processes
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
44 #if str($reference_cond.reference_source) == "cached"
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
45 --reference '$reference_cond.reference.fields.path'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
46 #else:
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
47 --reference '$reference_cond.reference'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
48 #end if
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
49 #if str($input_type) == "single":
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
50 --output_metrics '$output_metrics'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
51 --output_vcf '$output_vcf'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
52 #end if
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
53 ]]></command>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
54 <inputs>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
55 <conditional name="input_type_cond">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
56 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
57 <option value="single" selected="true">Single files</option>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
58 <option value="collection">Collections of files</option>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
59 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
60 <when value="single">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
61 <param name="bam_input" type="data" format="bam" label="BAM file">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
62 <validator type="unspecified_build"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
63 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
64 <param name="vcf_input" type="data" format="vcf" label="VCF file">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
65 <validator type="unspecified_build"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
66 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
67 </when>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
68 <when value="collection">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
69 <param name="bam_input_collection" type="data_collection" format="bam" collection_type="list" label="Collection of BAM files">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
70 <validator type="unspecified_build"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
71 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
72 <param name="vcf_input_collection" type="data_collection" format="vcf" collection_type="list" label="Collection of VCF files">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
73 <validator type="unspecified_build"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
74 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
75 </when>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
76 </conditional>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
77 <conditional name="reference_cond">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
78 <param name="reference_source" type="select" label="Choose the source for the reference genome">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
79 <option value="cached" selected="true">locally cached</option>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
80 <option value="history">from history</option>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
81 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
82 <when value="cached">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
83 <param name="reference" type="select" label="Using reference genome">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
84 <options from_data_table="fasta_indexes"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
85 <!-- No <filter> tag here! -->
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
86 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
87 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
88 </when>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
89 <when value="history">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
90 <param name="reference" type="data" format="fasta" label="Using reference genome">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
91 <validator type="no_options" message="The current history does not include a fasta dataset"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
92 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
93 </when>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
94 </conditional>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
95 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
96 </inputs>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
97 <outputs>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
98 <data name="output_vcf" format="vcf" label="${tool.name} (filtered VCF) on ${on_string}">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
99 <filter>input_type_cond['input_type'] == 'single'</filter>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
100 </data>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
101 <collection name="output_vcf_collection" type="list" label="${tool.name} (filtered VCFs) on ${on_string}">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
102 <discover_datasets pattern="__name__" directory="output_vcf_dir" format="vcf" />
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
103 <filter>input_type_cond['input_type'] == 'collection'</filter>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
104 </collection>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
105 <data name="output_metrics" format="tabular" label="${tool.name} (metrics) on ${on_string}">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
106 <filter>input_type_cond['input_type'] == 'single'</filter>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
107 </data>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
108 <collection name="output_metrics_collection" type="list" label="${tool.name} (metrics) on ${on_string}">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
109 <discover_datasets pattern="__name__" directory="output_metrics_dir" format="tabular" />
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
110 <filter>input_type_cond['input_type'] == 'collection'</filter>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
111 </collection>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
112 </outputs>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
113 <tests>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
114 <test>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
115 <param name="input_type" value="collection"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
116 <param name="bam_input_collection">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
117 <collection type="list">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
118 <element name="bam_input.bam" value="bam_input.bam" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
119 <element name="bam_input2.bam" value="bam_input2.bam" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
120 </collection>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
121 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
122 <param name="vcf_input_collection">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
123 <collection type="list">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
124 <element name="vcf_input.vcf" value="vcf_input.vcf" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
125 <element name="vcf_input2.vcf" value="vcf_input2.vcf" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
126 </collection>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
127 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
128 <param name="reference_source" value="history"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
129 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
130 <output_collection name="output_vcf_collection" type="list">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
131 <element name="vcf_input.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
132 <element name="vcf_input2.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
133 </output_collection>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
134 <output_collection name="output_metrics_collection" type="list">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
135 <element name="vcf_input.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
136 <element name="vcf_input2.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
137 </output_collection>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
138 </test>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
139 <test>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
140 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
141 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
142 <param name="reference_source" value="history"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
143 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
144 <param name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
145 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
146 </test>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
147 </tests>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
148 <help>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
149 **What it does**
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
150
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
151 Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
152 combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
153 along the reference with no coverage.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
154
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
155 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
156 coverage percentage.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
157
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
158 **Required Options**
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
159
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
160 * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single BAM and VCF files or collections of BAM and VCF files) based on the selected option.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
161 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
162 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
163 </help>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
164 <citations>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
165 <citation type="bibtex">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
166 @misc{None,
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
167 journal = {None},
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
168 author = {1. Stuber T},
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
169 title = {Manuscript in preparation},
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
170 year = {None},
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
171 url = {https://github.com/USDA-VS/vSNP},}
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
172 </citation>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
173 </citations>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
174 </tool>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
175