view manta.xml @ 6:cb5691381acb draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit 01bc6749826f5ef4a22540a9aa6a5ffd93786d4c
author artbio
date Thu, 08 Jun 2023 17:36:38 +0000
parents f55d45b0c6d1
children 555971edd46e
line wrap: on
line source

<tool id="manta" name="Manta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
    <description>Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.</description>
    <macros>
        <import>manta_macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>

    <command detect_errors="exit_code"><![CDATA[
    @pipefail@
    @set_reference_fasta_filename@
    #set run_dir = './MantaWorkflow'
    cp $__tool_directory__/configManta.py.ini configManta.py.ini &&
    #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":
    ln -s '$bam_input.normal_bam_file' normal.bam &&
    ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai &&
    #else if str( $bam_input.bam_input_selector ) == "tumor_bam":
    ln -s '$bam_input.normal_bam_file' normal.bam &&
    ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai &&
    ln -s '$bam_input.tumor_bam_file' tumor.bam &&
    ln -s '$bam_input.tumor_bam_file.metadata.bam_index' tumor.bai &&
    #end if

    #if str( $set_configuration.set_configuration_switch ) == "Custom_config_file":
        cp '$set_configuration.CustomConfigFile' ./configManta.py.ini &&
    #end if
    #if str( $set_configuration.set_configuration_switch ) == "Customized":
        rm ./configManta.py.ini &&
        python '$__tool_directory__/customConfigManta.py'
        --minCandidateVariantSize '$set_configuration.minCandidateVariantSize'
        --rnaMinCandidateVariantSize '$set_configuration.rnaMinCandidateVariantSize'
        --minEdgeObservations '$set_configuration.minEdgeObservations'
        --graphNodeMaxEdgeCount '$set_configuration.graphNodeMaxEdgeCount'
        --minCandidateSpanningCount '$set_configuration.minCandidateSpanningCount'
        --minScoredVariantSize '$set_configuration.minScoredVariantSize'
        --minDiploidVariantScore '$set_configuration.minDiploidVariantScore'
        --minPassDiploidVariantScore '$set_configuration.minPassDiploidVariantScore'
        --minPassDiploidGTScore '$set_configuration.minPassDiploidGTScore'
        --minSomaticScore '$set_configuration.minSomaticScore'
        --minPassSomaticScore '$set_configuration.minPassSomaticScore'
        --enableRemoteReadRetrievalForInsertionsInGermlineCallingModes '$set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes'
        --enableRemoteReadRetrievalForInsertionsInCancerCallingModes '$set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes'
        --useOverlapPairEvidence '$set_configuration.useOverlapPairEvidence' &&
    #end if
    
    configManta.py
        --referenceFasta='${reference_fasta_filename}'
        --config='./configManta.py.ini'
        #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":
            --bam='normal.bam'
        #else if str( $bam_input.bam_input_selector ) == "tumor_bam":
            --bam='normal.bam'
            --tumorBam='tumor.bam'
        #end if
        --runDir='${run_dir}'
        --scanSizeMb=${advanced.scanSizeMb}
        --callMemMb=${advanced.callMemMb} &&

    python2 '${run_dir}/runWorkflow.py' -m local -j \${GALAXY_SLOTS:-4}

    ]]></command>
    <inputs>
        <expand macro="reference_source_conditional" />
        <conditional name="bam_input">
            <param name="bam_input_selector" type="select" label="Single 'normal' or 'normal vs tumor' analysis" help="Select between a single normal BAM file or a pair of normal/tumor BAM files">
                <option value="not_tumor_bam">Normal</option>
                <option value="tumor_bam">Normal + Tumor</option>
            </param>
            <when value="not_tumor_bam">
                <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
            </when>
            <when value='tumor_bam'>
                <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
                <param name="tumor_bam_file" type="data" format="bam" label="select tumor BAM" help="Select the files you wish to send to Manta (tumor sample, it must be in BAM format)." />
            </when>
        </conditional>
        <param name="additional_param" type="select" multiple="true" display="checkboxes" label="Additional parameters" >
            <option value="exome">Set options for WES input: turn off depth filters</option>
            <option value="rna">Set options for RNA-Seq input. Must specify exactly one bam input file</option>
            <option value="unstrandedRNA">Set if RNA-Seq input is unstranded: Allows splice-junctions on either strand</option>
        </param>
        <section name="advanced" title="Advanced options" expanded="false">
            <param name="callMemMb" type="integer" value="8000" label="Set default task memory requirements" help="The maximum memory size to assign to tasks" />
            <param name="scanSizeMb" type="integer" value="12" label="Set maximum sequence region size" help="The maximum sequence region size (in megabases) scanned by each task during SV Locus graph generation. (default: 12)" />
            <!-- <param name="generateEvidenceBam" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Generate a bam of supporting reads for all SVs" help="Click yes for generating a BAM of supporting reads for all SVs."/> -->
        </section>
        <conditional name="set_configuration">
            <param name="set_configuration_switch" type="select" label="Do you want to change default configuration settings?">
                <option value="Default_config_file">Default Manta Configuration File</option>    
                <option value="Custom_config_file">Upload your Own Configuration File</option>
                <option value="Customized">Customize a Configuration File using this Galaxy Form</option>
            </param>
            <when value="Default_config_file">
            </when>
            <when value="Custom_config_file">
                <param format="ini" name="CustomConfigFile" type="data" label="config file"/>
            </when>
            <when value="Customized">
                <param name="minCandidateVariantSize" type="integer" value="8" label="minCandidateVariantSize" help="Run discovery and candidate reporting for all SVs/indels at or above this size."/>
                <param name="rnaMinCandidateVariantSize" type="integer" value="1000" label="rnaMinCandidateVariantSize" help="Separate option (to provide different default) used for runs in RNA-mode."/>
                <param name="minEdgeObservations" type="integer" value="3" label="minEdgeObservations" help="Remove all edges from the graph unless they're supported by this many 'observations'."/>
                <param name="graphNodeMaxEdgeCount" type="integer" value="10" label="graphNodeMaxEdgeCount" help="If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge."/>
                <param name="minCandidateSpanningCount" type="integer" value="3" label="minCandidateSpanningCount" help="Run discovery and candidate reporting for all SVs/indels with at least this many spanning support observations."/>
                <param name="minScoredVariantSize" type="integer" value="50" label="minScoredVariantSize" help="After candidate identification, only score and report SVs/indels at or above this size."/>
                <param name="minDiploidVariantScore" type="integer" value="10" label="minDiploidVariantScore" help="Minimum VCF 'QUAL' score for a variant to be included in the diploid vcf."/>
                <param name="minPassDiploidVariantScore" type="integer" value="20" label="minPassDiploidVariantScore" help="VCF 'QUAL' score below which a variant is marked as filtered in the diploid vcf."/>
                <param name="minPassDiploidGTScore" type="integer" value="15" label="minPassDiploidGTScore" help="Minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf."/>
                <param name="minSomaticScore" type="integer" value="10" label="minSomaticScore" help="Somatic quality scores below this level are not included in the somatic vcf."/>
                <param name="minPassSomaticScore" type="integer" value="30" label="minPassSomaticScore" help="Somatic quality scores below this level are filtered in the somatic vcf."/>
                <param name="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" type="integer" value="1" label="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" help="Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote locations with poor mapping quality. This feature can be enabled/disabled separately for germline and cancer calling below."/>
                <param name="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" type="integer" value="0" label="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" help="Here 'CancerCallingModes' includes tumor-normal subtraction and tumor-only calling. 'GermlineCallingModes' includes all other calling modes."/>
                <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value &lt;= 0 to skip overlapping read pairs."/>
            </when>
        </conditional>
        <param name="config_file_check" type="boolean" label="output conf file" checked="False" help="Show configuration file on history"/>
        <param name="candidateSV_check" type="boolean" label="Unfiltered structural variants" checked="False"
               help="All unscored structural variant candidates"/>
        <param name="candidateSmallIndels_check" type="boolean" label="Unfiltered small indel candidates" checked="False"
               help="Subset of the unscored candidates, containing only small indel variants"/>
        <param name="diploidSV_check" type="boolean" label="Score-filtered variants in diploid model" checked="False"
               help="Show filtered variants in a diploid (only normal) model. In the case of a tumor/normal subtraction, the scores in this file *do not*
                     reflect any information from the tumor sample" />
    </inputs>
    <outputs>
        <data format="tabular" name="conf_file" label="conf_file.ini" from_work_dir="./configManta.py.ini">
            <filter>config_file_check == True</filter>
        </data>
        <data format="vcf_bgzip" name="candidateSV" label="Manta unfiltered variants" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz">
            <filter>candidateSV_check == True</filter>
        </data>
        <data format="vcf_bgzip" name="candidateSmallIndels" label="Manta unfiltered indels" from_work_dir="MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz">
            <filter>candidateSmallIndels_check == True</filter>
        </data>
        <data format="vcf_bgzip" name="diploidSV" label="Score-filtered Variants (diploid model)" from_work_dir="MantaWorkflow/results/variants/diploidSV.vcf.gz">
            <filter>diploidSV_check == True</filter>
        </data>
        <data format="vcf_bgzip" name="somaticSV" label="Score-filtered Variants (somatic model)" from_work_dir="MantaWorkflow/results/variants/somaticSV.vcf.gz">
            <filter>bam_input['bam_input_selector'] == 'tumor_bam'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="reference_source_selector" value="cached"/>
            <param name="index" value="hg19"/>
            <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
            <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
            <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
            <param name="set_configuration_switch" value="Default_config_file"/>
            <param name="callMemMb" value="1000"/>
            <param name="candidateSmallIndels_check" value="True"/>
            <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
            <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
        </test>
        <test>
            <param name="reference_source_selector" value="cached"/>
            <param name="index" value="hg19"/>
            <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
            <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
            <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
            <param name="set_configuration_switch" value="Customized"/>
            <param name="callMemMb" value="1000"/>
            <param name="candidateSmallIndels_check" value="True"/>
            <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
            <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
        </test>
        <test>
                <param name="reference_source_selector" value="cached"/>
                <param name="index" value="hg19"/>
                <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
                <param name="set_configuration_switch" value="Default_config_file"/>
                <param name="callMemMb" value="1000"/>
                <param name="candidateSmallIndels_check" value="True"/>
                <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
                <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
        </test>
        <test>
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
                <param name="bam_input_selector" value="tumor_bam"/>
                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
                <param name="set_configuration_switch" value="Default_config_file"/>
                <param name="callMemMb" value="1000"/>
                <param name="candidateSV_check" value="True"/>
                <output name="candidateSV" file="candidateSV.vcf.gz" decompress="true" lines_diff="6"/>
                <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
        </test>
        <test>
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
                <param name="bam_input_selector" value="tumor_bam"/>
                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
                <param name="set_configuration_switch" value="Default_config_file"/>
                <param name="callMemMb" value="1000"/>
                <param name="candidateSmallIndels_check" value="True"/>
                <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
                <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
        </test>
    </tests>
    <help><![CDATA[
**Outputs**
  The primary Manta outputs are a set of VCF 4.1 files. Currently there are 3 VCF files
  created for a germline analysis, and an additional somatic VCF is produced for a
  tumor/normal subtraction. These files are:
  
  - diploidSV.vcf.gz
      SVs and indels scored and genotyped under a diploid model for the set of samples in a
      joint diploid sample analysis or for the normal sample in a tumor/normal subtraction
      analysis. **In the case of a tumor/normal subtraction, the scores in this file do not
      reflect any information from the tumor sample.**
  
  - somaticSV.vcf.gz
      SVs and indels scored under a somatic variant model. This file will only be produced
      if a tumor sample alignment file is supplied during configuration
  
  - candidateSV.vcf.gz
      Unscored SV and indel candidates. Only a minimal amount of supporting evidence is
      required for an SV to be entered as a candidate in this file. An SV or indel must be a
      candidate to be considered for scoring, therefore an SV cannot appear in the other VCF
      outputs if it is not present in this file. Note that by default this file includes
      indels of size 8 and larger. The smallest indels in this set are intended to be passed
      on to a small variant caller without scoring by manta itself (by default manta scoring
      starts at size 50).
  
  - candidateSmallIndels.vcf.gz
      Subset of the candidateSV.vcf.gz file containing only simple insertion and deletion
      variants less than the minimum scored variant size (50 by default). Passing this file
      to a small variant caller will provide continuous coverage over all indel sizes when
      the small variant caller and manta outputs are evaluated together. Alternate small
      indel candidate sets can be parsed out of the candidateSV.vcf.gz file if this
      candidate set is not appropriate.
  
  For tumor-only analysis, Manta will produce an additional VCF:

  - tumorSV.vcf.gz
      Subset of the candidateSV.vcf.gz file after removing redundant candidates and small
      indels less than the minimum scored variant size (50 by default). The SVs are not
      scored, but include additional details: (1) paired and split read supporting evidence
      counts for each allele (2) a subset of the filters from the scored tumor-normal model
      are applied to the single tumor case to improve precision.

**Manta helps**
  This script configures the Manta SV analysis pipeline.
  You must specify a BAM or CRAM file for at least one sample.
  Configuration will produce a workflow run script which
  can execute the workflow on a single node or through
  sge and resume any interrupted execution.


For further info see: https://github.com/Illumina/manta

  ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btv710</citation>
    </citations>
</tool>