Mercurial > repos > iuc > pbgcpp
comparison pbgcpp.xml @ 0:a6d93d0d5328 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pbgcpp commit d8032f67869704a4f9308796d748966d1f4760ae
author | iuc |
---|---|
date | Wed, 01 Mar 2023 22:42:22 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a6d93d0d5328 |
---|---|
1 <tool id="pbgcpp" name="pbgcpp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>Compute genomic consensus and call variants using PacBio reads mapped to a reference.</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="xrefs"/> | |
7 <expand macro="requirements"/> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 ## set up files | |
10 #if $reference_source.reference_source_selector == 'history': | |
11 #set ref_fn = 'reference.fa' | |
12 ln -f -s '$reference_source.ref_file' '$ref_fn' && | |
13 #else: | |
14 #set ref_fn = $reference_source.ref_file.fields.path | |
15 #end if | |
16 | |
17 ln -s '$input' 'input.bam' && | |
18 ln -s '$input.metadata.bam_index' 'input.bam.bai' && | |
19 | |
20 ## set up the outputs | |
21 #set output_line = ','.join('output.' + str(x) for x in $output_selector) | |
22 | |
23 ## run variantCaller | |
24 gcpp | |
25 --num-threads \${GALAXY_SLOTS:-4} | |
26 --reference '$ref_fn' | |
27 --output $output_line | |
28 'input.bam' | |
29 ]]></command> | |
30 <inputs> | |
31 <!-- from tools-iuc minimap2 wrapper --> | |
32 <conditional name="reference_source"> | |
33 <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?"> | |
34 <option value="cached">Use a built-in genome index</option> | |
35 <option value="history">Use a genome from history and build index</option> | |
36 </param> | |
37 <when value="cached"> | |
38 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> | |
39 <options from_data_table="all_fasta"> | |
40 <filter type="sort_by" column="2" /> | |
41 <validator type="no_options" message="No reference genomes are available" /> | |
42 </options> | |
43 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
44 </param> | |
45 </when> | |
46 <when value="history"> | |
47 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> | |
48 </when> | |
49 </conditional> | |
50 <param type="data" name="input" format="bam" label="bam" help="The input BAM alignment file" /> | |
51 <!-- Output Options --> | |
52 <param name="output_selector" type="select" multiple="True" display="checkboxes" label="Output formats"> | |
53 <!-- Use the format's extension as the value, so we can use it directly in the output_line constructor. --> | |
54 <option value="fa" selected="true">Computed consensus (fasta)</option> | |
55 <option value="vcf">Variants (vcf)</option> | |
56 <option value="gff">Variants (gff)</option> | |
57 </param> | |
58 </inputs> | |
59 <outputs> | |
60 <data name="fa" format="fasta" from_work_dir="output.fa" label="${tool.name} on ${on_string} (consensus)"> | |
61 <filter>output_selector and 'fa' in output_selector</filter> | |
62 </data> | |
63 <data name="gff" format="gff" from_work_dir="output.gff" label="${tool.name} on ${on_string} (gff)"> | |
64 <filter>output_selector and 'gff' in output_selector</filter> | |
65 </data> | |
66 <data name="vcf" format="vcf" from_work_dir="output.vcf" label="${tool.name} on ${on_string} (vcf)"> | |
67 <filter>output_selector and 'vcf' in output_selector</filter> | |
68 </data> | |
69 </outputs> | |
70 <tests> | |
71 <!-- test1: basic test (output from pbmm2 1.10.0) --> | |
72 <test expect_num_outputs="1"> | |
73 <param name="reference_source_selector" value="history" /> | |
74 <param name="ref_file" value="bnd-ref.fasta"/> | |
75 <param name="input" value="pbmm2_3.bam"/> | |
76 <param name="output_selector" value="fa"/> | |
77 <output name="fa" ftype="fasta" file="pbgcpp_test1_out.fa"/> | |
78 </test> | |
79 <!-- test2: output selector --> | |
80 <test expect_num_outputs="3"> | |
81 <param name="reference_source_selector" value="history" /> | |
82 <param name="ref_file" value="bnd-ref.fasta"/> | |
83 <param name="input" value="pbmm2.bam"/> | |
84 <param name="output_selector" value="fa,gff,vcf"/> | |
85 <output name="fa" ftype="fasta" file="pbgcpp_test2_out.fa"/> | |
86 <output name="gff" ftype="gff"> | |
87 <assert_contents> | |
88 <has_text text="gff-version 3" /> | |
89 </assert_contents> | |
90 </output> | |
91 <output name="vcf" ftype="vcf"> | |
92 <assert_contents> | |
93 <has_text text="fileformat=VCFv4.2" /> | |
94 </assert_contents> | |
95 </output> | |
96 </test> | |
97 <!-- test3: cached genome --> | |
98 <test> | |
99 <param name="reference_source_selector" value="cached" /> | |
100 <param name="ref_file" value="bnd-ref"/> | |
101 <param name="input" value="pbmm2_3.bam"/> | |
102 <param name="output_selector" value="fa"/> | |
103 <output name="fa" ftype="fasta" file="pbgcpp_test3_out.fa"/> | |
104 </test> | |
105 </tests> | |
106 <help><![CDATA[ | |
107 **What it does** | |
108 | |
109 Compute genomic consensus and call variants relative to the reference. | |
110 | |
111 This tool requires a PacBio BAM file. | |
112 | |
113 You can create one by mapping PacBio reads to the reference genome with | |
114 the `pbmm2 <root?tool_id=pbmm2>`__ tool. When doing this, you have to | |
115 input CLR reads to pbmm2 in [unaligned] BAM format, not fastq or fasta. This is | |
116 because the pbgcpp algorithm uses additional information stored in the | |
117 unaligned BAM format that PacBio uses. | |
118 | |
119 **NOTE**: The pbgcpp tool used to be called GenomicConsensus. It works for PacBio Sequel data and RS data with the P6-C4 chemistry. | |
120 | |
121 -------------- | |
122 | |
123 pbgcpp is Pacific Biosciences’ tool to generate accurate reference | |
124 contigs. It takes an alignment in the form of a BAM file and polishes | |
125 the references with the provided subreads from the alignment. It uses | |
126 the Arrow algorithm in multi-molecule consensus setting and can reach up | |
127 to QV60 at coverage 100. pbgcpp is the successor of the venerable | |
128 GenomicConsensus suite which has reached EOL. | |
129 | |
130 See the `Pacific Biosciences GitHub | |
131 page <https://github.com/PacificBiosciences/pbbioconda>`__ for more | |
132 information. | |
133 | |
134 **Input**: Aligned subreads in PacBio BAM format (.bam). Compatible with PacBio Sequel data and RS data with the P6-C4 chemistry. | |
135 | |
136 **Output**: Polished contigs in .fasta format. | |
137 | |
138 **Why am I getting “Missing valid chemistry from input file, is this a | |
139 proper PBBAM input file?”** | |
140 | |
141 pbgcpp expects metadata in the bamfile that most aligners (like | |
142 minimap2) don’t include by default. Align the PacBio reads file using | |
143 pbmm2. | |
144 | |
145 ]]></help> | |
146 <expand macro="creator"/> | |
147 </tool> |