6
|
1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="@WRAPPER_VERSION@.2+galaxy0" profile="@PROFILE@">
|
0
|
2 <description></description>
|
2
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
0
|
6 <requirements>
|
7
|
7 <expand macro="biopython_requirement"/>
|
|
8 <expand macro="openpyxl_requirement"/>
|
|
9 <expand macro="pandas_requirement"/>
|
6
|
10 <requirement type="package" version="0.16.0.1">pysam</requirement>
|
0
|
11 </requirements>
|
|
12 <command detect_errors="exit_code"><![CDATA[
|
|
13 #import re
|
2
|
14
|
|
15 ## The identifer for both of the following files is likely the same
|
|
16 ## string, so we append a file extension to allow for both links.
|
|
17 #set bam_identifier = re.sub('[^\s\w\-]', '_', str($bam_input.element_identifier)) + '.bam'
|
|
18 ln -s '${bam_input}' '${bam_identifier}' &&
|
|
19 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($vcf_input.element_identifier)) + '.vcf'
|
|
20 ln -s '${vcf_input}' '${vcf_identifier}' &&
|
|
21
|
0
|
22 python '$__tool_directory__/vsnp_add_zero_coverage.py'
|
2
|
23 --bam_input '$bam_identifier'
|
|
24 --vcf_input '$vcf_identifier'
|
|
25 #if str($reference_cond.reference_source) == 'cached'
|
0
|
26 --reference '$reference_cond.reference.fields.path'
|
|
27 #else:
|
|
28 --reference '$reference_cond.reference'
|
|
29 #end if
|
2
|
30 --output_metrics '$output_metrics'
|
|
31 --output_vcf '$output_vcf'
|
0
|
32 ]]></command>
|
|
33 <inputs>
|
2
|
34 <param name="bam_input" type="data" format="bam" label="BAM file"/>
|
|
35 <param name="vcf_input" type="data" format="vcf" label="VCF file"/>
|
0
|
36 <conditional name="reference_cond">
|
2
|
37 <expand macro="param_reference_source"/>
|
0
|
38 <when value="cached">
|
|
39 <param name="reference" type="select" label="Using reference genome">
|
2
|
40 <options from_data_table="fasta_indexes">
|
|
41 <filter type="data_meta" column="1" key="dbkey" ref="bam_input"/>
|
|
42 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
|
|
43 </options>
|
0
|
44 </param>
|
|
45 </when>
|
|
46 <when value="history">
|
2
|
47 <param name="reference" type="data" format="fasta,fasta.gz" label="Using reference genome">
|
0
|
48 <validator type="no_options" message="The current history does not include a fasta dataset"/>
|
|
49 </param>
|
|
50 </when>
|
|
51 </conditional>
|
|
52 </inputs>
|
|
53 <outputs>
|
2
|
54 <data name="output_vcf" format="vcf" label="${tool.name} on ${on_string} (filtered VCF)"/>
|
|
55 <data name="output_metrics" format="tabular" label="${tool.name} on ${on_string} (metrics)"/>
|
0
|
56 </outputs>
|
|
57 <tests>
|
2
|
58 <test expect_num_outputs="2">
|
0
|
59 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
|
|
60 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
|
|
61 <param name="reference_source" value="history"/>
|
|
62 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
|
2
|
63 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
|
|
64 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular"/>
|
|
65 </test>
|
|
66 <test expect_num_outputs="2">
|
|
67 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
|
|
68 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
|
|
69 <param name="reference_source" value="cached"/>
|
|
70 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
|
0
|
71 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
|
|
72 </test>
|
|
73 </tests>
|
|
74 <help>
|
|
75 **What it does**
|
|
76
|
|
77 Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each
|
|
78 combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions
|
|
79 along the reference with no coverage.
|
|
80
|
|
81 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome
|
|
82 coverage percentage.
|
|
83
|
|
84 **Required Options**
|
|
85
|
|
86 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
|
|
87 </help>
|
7
|
88 <expand macro="citations"/>
|
0
|
89 </tool>
|
|
90
|