view pilon.xml @ 1:11e5408fd238 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pilon commit f11cbbf24637c96c53e4a1ea5ace4b21e116677c
author iuc
date Tue, 04 Apr 2017 11:18:37 -0400
parents fe0dc27e6327
children
line wrap: on
line source

<tool id="pilon" name="pilon" version="1.20.1">
    <description>An automated genome assembly improvement and variant detection tool</description>
    <requirements>
      <requirement type="package" version="1.20">pilon</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
      #if $auto_selection.auto_enabled == "yes"
        #for $bamfile in $auto_selection.bam
          ln -f -s "$bamfile" "\$(basename $bamfile)" &&
          ln -f -s "$bamfile.metadata.bam_index" "\$(basename $bamfile).bai" &&
        #end for
      #end if
      #if $options.selection_mode == "advanced"
        #if $options.frags_selection.frags_enabled == "yes"
          #for $bamfile in $options.frags_selection.frags
            ln -f -s "$bamfile" "\$(basename $bamfile)" &&
            ln -f -s "$bamfile.metadata.bam_index" "\$(basename $bamfile).bai" &&
          #end for
        #end if
        #if $options.jumps_selection.jumps_enabled == "yes"
         #for $bamfile in $options.jumps_selection.jumps
           ln -f -s "$bamfile" "\$(basename $bamfile)" &&
           ln -f -s "$bamfile.metadata.bam_index" "\$(basename $bamfile).bai" &&
         #end for
        #end if
        #if $options.unpaired_selection.unpaired_enabled == "yes"
          #for $bamfile in $options.unpaired_selection.unpaired
            ln -f -s "$bamfile" "\$(basename $bamfile)" &&
            ln -f -s "$bamfile.metadata.bam_index" "\$(basename $bamfile).bai" &&
          #end for
        #end if
      #end if
        ln -s -f
          #if $reference_genome.reference_genome_source == "history"
            "$reference_genome.history_item"
          #else
            "$reference_genome.builtin.fields.path"
          #end if
          reference.fasta &&
        pilon
        --genome reference.fasta
        $variant
        $changes
        #if $auto_selection.auto_enabled == "yes"
          #for $bamfile in $auto_selection.bam
            --bam "\$(basename $bamfile)"
          #end for
        #end if
        #if $options.selection_mode == "advanced"
          #if $options.frags_selection.frags_enabled == "yes"
            #for $bamfile in $options.frags_selection.frags
              --frags "\$(basename $bamfile)"
            #end for
          #end if
          #if $options.jumps_selection.jumps_enabled == "yes"
            #for $bamfile in $options.jumps_selection.jumps
              --jumps "\$(basename $bamfile)"
            #end for
          #end if
          #if $options.unpaired_selection.unpaired_enabled == "yes"
            #for $bamfile in $options.unpaired_selection.unpaired
              --unpaired "\$(basename $bamfile)"
            #end for
          #end if
          $options.vcfqe
          $options.vcf
          $options.tracks
          --chunksize $options.chunk_size
          $options.diploid
          $options.duplicates
          $options.iupac
          $options.nonpf
          #if len($options.targets.strip()) > 0
            --targets $options.targets
          #end if
          --fix $options.fixes
          $options.verbose
          --defaultqual $options.defaultqual
          --flank $options.flank
          --gapmargin $options.gapmargin
          --K $options.kmersize
          --mindepth $options.mindepth
          --mingap $options.mingap
          --minmq $options.minmq
          --minqual $options.minqual
          $options.nostrays
        #end if
        --threads \${GALAXY_SLOTS:-1}
        --output pilon
    ]]></command>
    <inputs>
      <conditional name="reference_genome">
        <param label="Source for reference genome used for BAM alignments" name="reference_genome_source" type="select">
          <option selected="True" value="history">Use a genome from history</option>
          <option value="builtin">Use a built-in genome"</option>
        </param>
        <when value="history">
          <param format="fasta" type="data" name="history_item" label="Select a reference genome"/>
        </when>
        <when value="builtin">
          <param label="Select a reference genome" name="builtin" type="select">
            <options from_data_table="all_fasta">
              <filter column="2" type="sort_by" />
              <validator message="No genomes are available for the selected input dataset" type="no_options" />
            </options>
          </param>
        </when>
      </conditional>
      <conditional name="auto_selection">
        <param name="auto_enabled" label="Type automatically determined by pilon"
          type="boolean" checked="true" truevalue="yes" falsevalue="no" />
        <when value="yes">
          <param argument="bam" label="Input BAM file" multiple="true" type="data" format="bam"/>
        </when>
        <when value="no"></when>
      </conditional>
      <param argument="variant" type="boolean" label="Variant calling mode" checked="true" truevalue="--variant" falsevalue=""
        help="Sets up heuristics for variant calling, as opposed to assembly improvement; equivalent to '--vcf --fix all,breaks'."/>
      <param argument="changes" type="boolean" label="Create changes file" truevalue="--changes" falsevalue=""
        help="If specified, a file listing changes in the &lt;output&gt;.fasta will be generated."/>
      <conditional name="options">
        <param label="Use advanced options" name="selection_mode" type="select">
          <option selected="True" value="default">Use default options</option>
          <option value="advanced">Use advanced options</option>
        </param>
        <when value="default"> </when>
        <when value="advanced">
          <conditional name="frags_selection">
            <param name="frags_enabled" label="Paired end fragments" type="boolean" truevalue="yes" falsevalue="no" />
            <when value="yes">
              <param argument="frags" label="Input BAM file (paired end fragments)" multiple="true" type="data" format="bam"
                help="BAM file consisting of fragment paired-end alignments." />
            </when>
            <when value="no"></when>
          </conditional>
          <conditional name="jumps_selection">
            <param name="jumps_enabled" label="Mate pairs" type="boolean" truevalue="yes" falsevalue="no" />
            <when value="yes">
              <param argument="jumps" label="Input BAM file (mate pairs)" multiple="true" type="data" format="bam"
                help="BAM file consisting of jump (mate pair) paired-end alignments." />
            </when>
            <when value="no"></when>
          </conditional>
          <conditional name="unpaired_selection">
            <param name="unpaired_enabled" label="Unpaired reads"  type="boolean" truevalue="yes" falsevalue="no" />
            <when value="yes">
              <param argument="unpaired" label="Input BAM file (unpaired)" multiple="true" type="data" format="bam"
                help="BAM file consisting of unpaired alignments." />
            </when>
            <when value="no"></when>
          </conditional>
          <param argument="vcf" type="boolean" checked="false" label="VCF output"
            truevalue="--vcf" falsevalue="" help="If specified, a vcf file will be generated (even if 'Variant calling mode' is off)"/>
          <param argument="tracks" type="boolean" checked="false" label="Output annotation tracks"
            help="Write many track files (*.bed, *.wig) suitable for viewing in a genome browser."
            truevalue="--tracks" falsevalue="" />
          <param argument="chunk_size" type="integer" min="1" value="10000000" label="Chunk size"
            help="Input FASTA elements larger than this will be processed in smaller pieces not to
            exceed this size." />
          <param argument="vcfqe" type="boolean" checked="false" label="QE (not QP) in VCF" help="If specified the VCF will contain a QE (quality-weighted evidence) field rather
          than the default QP (quality-weighted percentage of evidence) field." truevalue="--vcfqe" falsevalue="" />
          <param argument="fixes" label="Issues that pilon should try and fix" type="select" multiple="true">
            <option value="all" selected="true">All non-experimental fixes</option>
            <option value="bases">Individual bases and small indels</option>
            <option value="gaps">Fill gaps</option>
            <option value="local">Detect and fix local misassemblies</option>
            <option value="none">Do none of these fixes (no FASTA will be written)</option>
            <option value="amb">Fix ambigious bases in FASTA output (experimental)</option>
            <option value="breaks">Allow local reassembly to open new gaps (experimental, requires local assembly fixing to be selected)</option>
            <option value="novel">Assemble novel sequence from unaligned non-jump reads (experimental)</option>
            <!-- TODO: enable this when documented in pilon <option value="circle">Trim long reads for circular continuity(experimental, requires unpaired reads)</option>-->
          </param>
          <param argument="diploid" label="Organism is diploid" type="boolean" checked="false"
            help="Sample is from diploid organism; will eventually affect calling of heterozygous SNPs"
            truevalue="--diploid" falsevalue="" />
          <param argument="duplicates" label="Use duplicates" type="boolean" checked="false"
            help="Use reads marked as duplicates in the input BAMs"
            truevalue="--duplicates" falsevalue="" />
          <param argument="iupac" label="Use IUPAC codes in FASTA output" type="boolean" checked="false"
            help="Output IUPAC ambiguous base codes in the output FASTA file when appropriate"
            truevalue="--iupac" falsevalue="" />
          <param argument="nonpf" label="Use low quality reads" type="boolean" checked="false"
            help="Use reads which failed sequencer quality filtering"
            truevalue="--nonpf" falsevalue="" />
          <param argument="targets" label="List of targets to process (leave blank for all)" type="text"
            help="Only process the specified target(s).  Targets are comma-separated, and each target is a fasta element name optionally followed by a base range." />
          <param argument="verbose" label="Verbose output (in tool log)" type="boolean" checked="false"
            truevalue="--verbose" falsevalue="" />
          <param argument="defaultqual" label="Default base quality" type="integer" min="1" value="15"
            help="Assumes bases are of this quality if quals are no present in input BAMs" />
          <param argument="flank" label="Flanking bases to ignore" type="integer" min="1" value="10"
            help="This many bases at each end of the good reads will be ignored." />
          <param argument="gapmargin" label="Allowable gap margin" type="integer" min="1" value="100000"
            help="Closed gaps must be within this number of bases of true size to be closed" />
          <param argument="kmersize" label="Kmer size" type="integer" min="1" value="47"
            help="Kmer size used by internal assembler" />
          <param argument="mindepth" label="Minimum depth" type="float" value="0.1"
            help="Minimum depth of coverage required for variants to be called. See complete documentation below." />
          <param argument="mingap" label="Mininum gap size" type="integer" value="10"
            help="Minimum size for unclosed gaps" />
          <param argument="minmq" label="Minimum mapping quality" type="integer" value="0"
            help="Minimum alignment mapping quality for a read to count in pileups" />
          <param argument="minqual" label="Minimum base quality" type="integer" value="0"
            help="Minimum base quality to consider for pileups" />
          <param argument="nostrays" label="Disable 'stray read filtering'" type="boolean" checked="false"
            help="See documentation below"
            truevalue="--nostrays" falsevalue="" />
        </when>
      </conditional>
    </inputs>
    <outputs>
      <data format="vcf" from_work_dir="pilon.vcf" label="VCF from ${tool.name} on ${on_string}" name="output_vcf">
        <filter>variant or (options['selection_mode'] == 'advanced' and options['vcf'])</filter>
      </data>
      <data format="txt" from_work_dir="pilon.changes" label="Changes in FASTA from ${tool.name} on ${on_string}" name="output_changes">
        <filter>changes</filter>
      </data>
      <data format="fasta" from_work_dir="pilon.fasta" label="FASTA from ${tool.name} on ${on_string}" name="output_fasta">
        <filter>options['selection_mode'] == 'default' or (options['selection_mode'] == 'advanced' and 'none' not in options['fixes'])</filter>
      </data>
      <collection name="tracks" type="list" label="Annotation tracks from ${tool.name} on ${on_string}">
        <filter>options['selection_mode'] == 'advanced' and options['tracks']</filter>
        <data format="bed" from_work_dir="pilonPilon.bed" label="Features from ${tool.name} on ${on_string} (BED format)" name="output_pilon_bed" />
        <data format="wig" from_work_dir="pilonChanges.wig" label="${tool.name} changes track on ${on_string} (WIG format)" name="output_changes_wig" />
        <data format="wig" from_work_dir="pilonUnconfirmed.wig" label="${tool.name} unconfirmed track on ${on_string}" name="output_unconfirmed_wig" />
        <data format="wig" from_work_dir="pilonCopyNumber.wig" label="${tool.name} copy number track on ${on_string}" name="output_copynumber_wig" />
        <data format="wig" from_work_dir="pilonCoverage.wig" label="${tool.name} coverage track on ${on_string}" name="output_coverage_wig" />
        <data format="wig" from_work_dir="pilonBadCoverage.wig" label="${tool.name} bad coverage track on ${on_string}" name="output_badcoverage_wig" />
        <data format="wig" from_work_dir="pilonPctBad.wig" label="${tool.name} pct bad track on ${on_string}" name="output_pctbad_wig" />
        <data format="wig" from_work_dir="pilonDeltaCoverage.wig" label="${tool.name} delta coverage track on ${on_string}" name="output_deltacoverage_wig" />
        <data format="wig" from_work_dir="pilonDipCoverage.wig" label="${tool.name} dip coverage track on ${on_string}" name="output_dipcoverage_wig" />
        <data format="wig" from_work_dir="pilonPhysicalCoverage.wig" label="${tool.name} physical coverage track on ${on_string}" name="output_physicalcoverage_wig" />
        <data format="wig" from_work_dir="pilonClippedAlignments.wig" label="${tool.name} clipped alignments track on ${on_string}" name="output_clippedalignments_wig" />
        <data format="wig" from_work_dir="pilonWeightedQual.wig" label="${tool.name} weighted quality track on ${on_string}" name="output_weightedqual_wig" />
        <data format="wig" from_work_dir="pilonWeightedMq.wig" label="${tool.name} weighted MQ track on ${on_string}" name="output_weightedmq_wig" />
        <data format="wig" from_work_dir="pilonGC.wig" label="${tool.name} GC track on ${on_string}" name="output_gc_wig" />
      </collection>
     </outputs>
    <tests>
      <test>
        <param name="reference_genome_source" value="history" />
        <param ftype="fasta" name="history_item" value="test1.fasta" />
        <param name="bam" value="test1.bam" />
        <param name="variant" value="true" />
        <output file="test1-vcf-part" ftype="vcf" name="output_vcf" compare="contains" />
        <output md5="352907b0d965bc926289b1b2eb9cbecb" ftype="fasta" name="output_fasta" />
      </test>
      <test>
        <param name="reference_genome_source" value="history" />
        <param ftype="fasta" name="history_item" value="test1.fasta" />
        <param name="bam" value="test1.bam" />
        <param name="variant" value="true" />
        <param name="selection_mode" value="advanced" />
        <param name="tracks" value="true" />
        <output file="test1-vcf-part" ftype="vcf" name="output_vcf" compare="contains" />
        <output md5="352907b0d965bc926289b1b2eb9cbecb" ftype="fasta" name="output_fasta" />
        <output_collection name="tracks">
          <element name="output_pilon_bed" md5="ae0518a6d641efecdcf41c808c014226" />
          <element name="output_changes_wig" md5="26e0a9a3793e6604673e9911c4d334ab" />
          <element name="output_unconfirmed_wig" md5="f8bb3def4547e854bd90e1a6b4f8cc66" />
          <element name="output_copynumber_wig" md5="c1e94296fbc24d00fa7d057f30e4de8f" />
          <element name="output_coverage_wig" md5="48f7da4bad60fcbb49aaaa992aef8a02" />
          <element name="output_badcoverage_wig" md5="31e3ad4a5d0f8cd1e296e4b1424d6a95" />
          <element name="output_pctbad_wig" md5="8353134113c87dbc7d9497f983d58b4b" />
          <element name="output_deltacoverage_wig" md5="e0fb6474b851e1890f91c57c2ba1fd76" />
          <element name="output_dipcoverage_wig" md5="8885c63df1dc7309a7bab371b9eb449b" />
          <element name="output_physicalcoverage_wig" md5="db485f84b51499c3a3c72da70a8ef7af" />
          <element name="output_clippedalignments_wig" md5="b6eab0827a9b6f2e925d9ece5ee4f87f" />
          <element name="output_weightedqual_wig" md5="9987289bd0ec8cd05bd6d330bdd1d01d" />
          <element name="output_weightedmq_wig" md5="7ec1b6420f52fc44bf3c04aa593fcdeb" />
          <element name="output_gc_wig" md5="815af1378d016b85ed6f52667dde10c1" />
        </output_collection>
      </test>

    </tests>
    <help><![CDATA[
  Pilon is a software tool which can be used to:

  * Automatically improve draft assemblies

  * Find variation among strains, including large event detection

  Pilon requires as input a FASTA file of the genome along with one or more BAM files of reads aligned to the input FASTA file. Pilon uses read alignment analysis to identify inconsistencies between the input genome and the evidence in the reads. It then attempts to make improvements to the input genome, including:

  * Single base differences

  * Small indels

  * Larger indel or block substitution events

  * Gap filling

  * Identification of local misassemblies, including optional opening of new gaps

  Pilon then outputs a FASTA file containing an improved representation of the genome from the read data and an optional VCF file detailing variation seen between the read data and the input genome.

  To aid manual inspection and improvement by an analyst, Pilon can optionally produce tracks that can be displayed in genome viewers such as IGV and GenomeView, and it reports other events (such as possible large collapsed repeat regions) in its standard output.

  Note on **mindepth**:

  Variants (snps and indels) will only be called if there is coverage of good pairs
  at the value set for *mindepth* depth or more; if this value is >= 1, it is an absolute depth, if it is a
  fraction < 1, then minimum depth is computed by multiplying this value by the mean
  coverage for the region, with a minumum value of 5 (default 0.1: min depth to call
  is 10% of mean coverage or 5, whichever is greater).

  Note on **stray read filtering**

  By default a pass is made through the input BAM files to identify stray pairs, that is,
  those pairs in which both reads are aligned but not marked valid because they have
  inconsistent orientation or separation. Identifying stray pairs can help fill gaps
  and assemble larger insertions, especially of repeat content.  However, doing so
  sometimes consumes considerable memory.
    ]]></help>
    <citations>
        <citation type="doi">10.1371/journal.pone.0112963</citation>
    </citations>
</tool>