comparison pbgcpp.xml @ 0:a6d93d0d5328 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pbgcpp commit d8032f67869704a4f9308796d748966d1f4760ae
author iuc
date Wed, 01 Mar 2023 22:42:22 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a6d93d0d5328
1 <tool id="pbgcpp" name="pbgcpp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Compute genomic consensus and call variants using PacBio reads mapped to a reference.</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="xrefs"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 ## set up files
10 #if $reference_source.reference_source_selector == 'history':
11 #set ref_fn = 'reference.fa'
12 ln -f -s '$reference_source.ref_file' '$ref_fn' &&
13 #else:
14 #set ref_fn = $reference_source.ref_file.fields.path
15 #end if
16
17 ln -s '$input' 'input.bam' &&
18 ln -s '$input.metadata.bam_index' 'input.bam.bai' &&
19
20 ## set up the outputs
21 #set output_line = ','.join('output.' + str(x) for x in $output_selector)
22
23 ## run variantCaller
24 gcpp
25 --num-threads \${GALAXY_SLOTS:-4}
26 --reference '$ref_fn'
27 --output $output_line
28 'input.bam'
29 ]]></command>
30 <inputs>
31 <!-- from tools-iuc minimap2 wrapper -->
32 <conditional name="reference_source">
33 <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?">
34 <option value="cached">Use a built-in genome index</option>
35 <option value="history">Use a genome from history and build index</option>
36 </param>
37 <when value="cached">
38 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
39 <options from_data_table="all_fasta">
40 <filter type="sort_by" column="2" />
41 <validator type="no_options" message="No reference genomes are available" />
42 </options>
43 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
44 </param>
45 </when>
46 <when value="history">
47 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" />
48 </when>
49 </conditional>
50 <param type="data" name="input" format="bam" label="bam" help="The input BAM alignment file" />
51 <!-- Output Options -->
52 <param name="output_selector" type="select" multiple="True" display="checkboxes" label="Output formats">
53 <!-- Use the format's extension as the value, so we can use it directly in the output_line constructor. -->
54 <option value="fa" selected="true">Computed consensus (fasta)</option>
55 <option value="vcf">Variants (vcf)</option>
56 <option value="gff">Variants (gff)</option>
57 </param>
58 </inputs>
59 <outputs>
60 <data name="fa" format="fasta" from_work_dir="output.fa" label="${tool.name} on ${on_string} (consensus)">
61 <filter>output_selector and 'fa' in output_selector</filter>
62 </data>
63 <data name="gff" format="gff" from_work_dir="output.gff" label="${tool.name} on ${on_string} (gff)">
64 <filter>output_selector and 'gff' in output_selector</filter>
65 </data>
66 <data name="vcf" format="vcf" from_work_dir="output.vcf" label="${tool.name} on ${on_string} (vcf)">
67 <filter>output_selector and 'vcf' in output_selector</filter>
68 </data>
69 </outputs>
70 <tests>
71 <!-- test1: basic test (output from pbmm2 1.10.0) -->
72 <test expect_num_outputs="1">
73 <param name="reference_source_selector" value="history" />
74 <param name="ref_file" value="bnd-ref.fasta"/>
75 <param name="input" value="pbmm2_3.bam"/>
76 <param name="output_selector" value="fa"/>
77 <output name="fa" ftype="fasta" file="pbgcpp_test1_out.fa"/>
78 </test>
79 <!-- test2: output selector -->
80 <test expect_num_outputs="3">
81 <param name="reference_source_selector" value="history" />
82 <param name="ref_file" value="bnd-ref.fasta"/>
83 <param name="input" value="pbmm2.bam"/>
84 <param name="output_selector" value="fa,gff,vcf"/>
85 <output name="fa" ftype="fasta" file="pbgcpp_test2_out.fa"/>
86 <output name="gff" ftype="gff">
87 <assert_contents>
88 <has_text text="gff-version 3" />
89 </assert_contents>
90 </output>
91 <output name="vcf" ftype="vcf">
92 <assert_contents>
93 <has_text text="fileformat=VCFv4.2" />
94 </assert_contents>
95 </output>
96 </test>
97 <!-- test3: cached genome -->
98 <test>
99 <param name="reference_source_selector" value="cached" />
100 <param name="ref_file" value="bnd-ref"/>
101 <param name="input" value="pbmm2_3.bam"/>
102 <param name="output_selector" value="fa"/>
103 <output name="fa" ftype="fasta" file="pbgcpp_test3_out.fa"/>
104 </test>
105 </tests>
106 <help><![CDATA[
107 **What it does**
108
109 Compute genomic consensus and call variants relative to the reference.
110
111 This tool requires a PacBio BAM file.
112
113 You can create one by mapping PacBio reads to the reference genome with
114 the `pbmm2 <root?tool_id=pbmm2>`__ tool. When doing this, you have to
115 input CLR reads to pbmm2 in [unaligned] BAM format, not fastq or fasta. This is
116 because the pbgcpp algorithm uses additional information stored in the
117 unaligned BAM format that PacBio uses.
118
119 **NOTE**: The pbgcpp tool used to be called GenomicConsensus. It works for PacBio Sequel data and RS data with the P6-C4 chemistry.
120
121 --------------
122
123 pbgcpp is Pacific Biosciences’ tool to generate accurate reference
124 contigs. It takes an alignment in the form of a BAM file and polishes
125 the references with the provided subreads from the alignment. It uses
126 the Arrow algorithm in multi-molecule consensus setting and can reach up
127 to QV60 at coverage 100. pbgcpp is the successor of the venerable
128 GenomicConsensus suite which has reached EOL.
129
130 See the `Pacific Biosciences GitHub
131 page <https://github.com/PacificBiosciences/pbbioconda>`__ for more
132 information.
133
134 **Input**: Aligned subreads in PacBio BAM format (.bam). Compatible with PacBio Sequel data and RS data with the P6-C4 chemistry.
135
136 **Output**: Polished contigs in .fasta format.
137
138 **Why am I getting “Missing valid chemistry from input file, is this a
139 proper PBBAM input file?”**
140
141 pbgcpp expects metadata in the bamfile that most aligners (like
142 minimap2) don’t include by default. Align the PacBio reads file using
143 pbmm2.
144
145 ]]></help>
146 <expand macro="creator"/>
147 </tool>