annotate vsnp_add_zero_coverage.xml @ 2:01312f8a6ca9 draft

Uploaded
author greg
date Sun, 03 Jan 2021 16:29:00 +0000
parents 3cb0bf7e1b2d
children bb6cc994707d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="@WRAPPER_VERSION@.2" profile="@PROFILE@">
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
2 <description></description>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
3 <macros>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
4 <import>macros.xml</import>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
5 </macros>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
6 <requirements>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
7 <requirement type="package" version="1.76">biopython</requirement>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
8 <requirement type="package" version="0.25.3">pandas</requirement>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
9 <requirement type="package" version="0.15.4">pysam</requirement>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
10 </requirements>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
11 <command detect_errors="exit_code"><![CDATA[
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
12 #import re
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
13
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
14 ## The identifer for both of the following files is likely the same
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
15 ## string, so we append a file extension to allow for both links.
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
16 #set bam_identifier = re.sub('[^\s\w\-]', '_', str($bam_input.element_identifier)) + '.bam'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
17 ln -s '${bam_input}' '${bam_identifier}' &&
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
18 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($vcf_input.element_identifier)) + '.vcf'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
19 ln -s '${vcf_input}' '${vcf_identifier}' &&
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
20
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
21 python '$__tool_directory__/vsnp_add_zero_coverage.py'
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
22 --bam_input '$bam_identifier'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
23 --vcf_input '$vcf_identifier'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
24 #if str($reference_cond.reference_source) == 'cached'
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
25 --reference '$reference_cond.reference.fields.path'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
26 #else:
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
27 --reference '$reference_cond.reference'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
28 #end if
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
29 --output_metrics '$output_metrics'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
30 --output_vcf '$output_vcf'
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
31 ]]></command>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
32 <inputs>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
33 <param name="bam_input" type="data" format="bam" label="BAM file"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
34 <param name="vcf_input" type="data" format="vcf" label="VCF file"/>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
35 <conditional name="reference_cond">
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
36 <expand macro="param_reference_source"/>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
37 <when value="cached">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
38 <param name="reference" type="select" label="Using reference genome">
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
39 <options from_data_table="fasta_indexes">
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
40 <filter type="data_meta" column="1" key="dbkey" ref="bam_input"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
41 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
42 </options>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
43 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
44 </when>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
45 <when value="history">
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
46 <param name="reference" type="data" format="fasta,fasta.gz" label="Using reference genome">
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
47 <validator type="no_options" message="The current history does not include a fasta dataset"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
48 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
49 </when>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
50 </conditional>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
51 </inputs>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
52 <outputs>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
53 <data name="output_vcf" format="vcf" label="${tool.name} on ${on_string} (filtered VCF)"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
54 <data name="output_metrics" format="tabular" label="${tool.name} on ${on_string} (metrics)"/>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
55 </outputs>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
56 <tests>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
57 <test expect_num_outputs="2">
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
58 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
59 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
60 <param name="reference_source" value="history"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
61 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
62 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
63 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
64 </test>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
65 <test expect_num_outputs="2">
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
66 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
67 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
68 <param name="reference_source" value="cached"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
69 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
70 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
71 </test>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
72 </tests>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
73 <help>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
74 **What it does**
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
75
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
76 Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
77 combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
78 along the reference with no coverage.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
79
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
80 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
81 coverage percentage.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
82
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
83 **Required Options**
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
84
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
85 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
86 </help>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
87 <expand macro="citations" />
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
88 </tool>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
89