Mercurial > repos > iuc > pbgcpp
diff pbgcpp.xml @ 0:a6d93d0d5328 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pbgcpp commit d8032f67869704a4f9308796d748966d1f4760ae
author | iuc |
---|---|
date | Wed, 01 Mar 2023 22:42:22 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pbgcpp.xml Wed Mar 01 22:42:22 2023 +0000 @@ -0,0 +1,147 @@ +<tool id="pbgcpp" name="pbgcpp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Compute genomic consensus and call variants using PacBio reads mapped to a reference.</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xrefs"/> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + ## set up files + #if $reference_source.reference_source_selector == 'history': + #set ref_fn = 'reference.fa' + ln -f -s '$reference_source.ref_file' '$ref_fn' && + #else: + #set ref_fn = $reference_source.ref_file.fields.path + #end if + + ln -s '$input' 'input.bam' && + ln -s '$input.metadata.bam_index' 'input.bam.bai' && + + ## set up the outputs + #set output_line = ','.join('output.' + str(x) for x in $output_selector) + + ## run variantCaller + gcpp + --num-threads \${GALAXY_SLOTS:-4} + --reference '$ref_fn' + --output $output_line + 'input.bam' + ]]></command> + <inputs> + <!-- from tools-iuc minimap2 wrapper --> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?"> + <option value="cached">Use a built-in genome index</option> + <option value="history">Use a genome from history and build index</option> + </param> + <when value="cached"> + <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No reference genomes are available" /> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> + </when> + </conditional> + <param type="data" name="input" format="bam" label="bam" help="The input BAM alignment file" /> + <!-- Output Options --> + <param name="output_selector" type="select" multiple="True" display="checkboxes" label="Output formats"> + <!-- Use the format's extension as the value, so we can use it directly in the output_line constructor. --> + <option value="fa" selected="true">Computed consensus (fasta)</option> + <option value="vcf">Variants (vcf)</option> + <option value="gff">Variants (gff)</option> + </param> + </inputs> + <outputs> + <data name="fa" format="fasta" from_work_dir="output.fa" label="${tool.name} on ${on_string} (consensus)"> + <filter>output_selector and 'fa' in output_selector</filter> + </data> + <data name="gff" format="gff" from_work_dir="output.gff" label="${tool.name} on ${on_string} (gff)"> + <filter>output_selector and 'gff' in output_selector</filter> + </data> + <data name="vcf" format="vcf" from_work_dir="output.vcf" label="${tool.name} on ${on_string} (vcf)"> + <filter>output_selector and 'vcf' in output_selector</filter> + </data> + </outputs> + <tests> + <!-- test1: basic test (output from pbmm2 1.10.0) --> + <test expect_num_outputs="1"> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="bnd-ref.fasta"/> + <param name="input" value="pbmm2_3.bam"/> + <param name="output_selector" value="fa"/> + <output name="fa" ftype="fasta" file="pbgcpp_test1_out.fa"/> + </test> + <!-- test2: output selector --> + <test expect_num_outputs="3"> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="bnd-ref.fasta"/> + <param name="input" value="pbmm2.bam"/> + <param name="output_selector" value="fa,gff,vcf"/> + <output name="fa" ftype="fasta" file="pbgcpp_test2_out.fa"/> + <output name="gff" ftype="gff"> + <assert_contents> + <has_text text="gff-version 3" /> + </assert_contents> + </output> + <output name="vcf" ftype="vcf"> + <assert_contents> + <has_text text="fileformat=VCFv4.2" /> + </assert_contents> + </output> + </test> + <!-- test3: cached genome --> + <test> + <param name="reference_source_selector" value="cached" /> + <param name="ref_file" value="bnd-ref"/> + <param name="input" value="pbmm2_3.bam"/> + <param name="output_selector" value="fa"/> + <output name="fa" ftype="fasta" file="pbgcpp_test3_out.fa"/> + </test> + </tests> + <help><![CDATA[ +**What it does** + +Compute genomic consensus and call variants relative to the reference. + +This tool requires a PacBio BAM file. + +You can create one by mapping PacBio reads to the reference genome with +the `pbmm2 <root?tool_id=pbmm2>`__ tool. When doing this, you have to +input CLR reads to pbmm2 in [unaligned] BAM format, not fastq or fasta. This is +because the pbgcpp algorithm uses additional information stored in the +unaligned BAM format that PacBio uses. + +**NOTE**: The pbgcpp tool used to be called GenomicConsensus. It works for PacBio Sequel data and RS data with the P6-C4 chemistry. + +-------------- + +pbgcpp is Pacific Biosciences’ tool to generate accurate reference +contigs. It takes an alignment in the form of a BAM file and polishes +the references with the provided subreads from the alignment. It uses +the Arrow algorithm in multi-molecule consensus setting and can reach up +to QV60 at coverage 100. pbgcpp is the successor of the venerable +GenomicConsensus suite which has reached EOL. + +See the `Pacific Biosciences GitHub +page <https://github.com/PacificBiosciences/pbbioconda>`__ for more +information. + +**Input**: Aligned subreads in PacBio BAM format (.bam). Compatible with PacBio Sequel data and RS data with the P6-C4 chemistry. + +**Output**: Polished contigs in .fasta format. + +**Why am I getting “Missing valid chemistry from input file, is this a +proper PBBAM input file?”** + +pbgcpp expects metadata in the bamfile that most aligners (like +minimap2) don’t include by default. Align the PacBio reads file using +pbmm2. + + ]]></help> + <expand macro="creator"/> +</tool>