Mercurial > repos > iuc > porechop

<tool id="porechop" name="Porechop" version="0.2.3">
    <description>adapter trimmer for Oxford Nanopore reads</description>
    <requirements>
        <requirement type="package" version="0.2.3_seqan2.1.1">porechop</requirement>
    </requirements>
    <version_command>porechop --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
porechop
    -i '$input_file'
    --format '$format'
    --barcode_threshold '$barcode_binning_settings.barcode_threshold'
    --barcode_diff '$barcode_binning_settings.barcode_diff'
    $barcode_binning_settings.require_two_barcodes
    $barcode_binning_settings.discard_unassigned
    --adapter_threshold '$adapter_search_settings.adapter_threshold'
    --check_reads '$adapter_search_settings.check_reads'
    --scoring_scheme '$adapter_search_settings.scoring_scheme'
    --end_size '$end_adapter_settings.end_size'
    --min_trim_size '$end_adapter_settings.min_trim_size'
    --extra_end_trim '$end_adapter_settings.extra_end_trim'
    --end_threshold '$end_adapter_settings.end_threshold'
    $middle_adapter_settings.no_split
    $middle_adapter_settings.discard_middle
    --middle_threshold '$middle_adapter_settings.middle_threshold'
    --extra_middle_trim_good_side '$middle_adapter_settings.extra_middle_trim_good_side'
    --extra_middle_trim_bad_side '$middle_adapter_settings.extra_middle_trim_bad_side'
    --min_split_read_size '$middle_adapter_settings.min_split_read_size'
    -o 'out.$format'

    ]]></command>
    <inputs>
        <param name="input_file" type="data" format="fasta,fastq" label="Input FASTA/FASTQ" help="FASTA/FASTQ of input reads" />
        <param name="format" type="select" label="Output format for the reads" help="Output format for the reads - if auto, the format will be chosen based on the output filename or the input read format">
            <option selected="True" value="fasta">FASTA</option>
            <option value="fastq">FASTQ</option>
            <option value="fasta.gz">FASTA.gz</option>
            <option value="fastq.gz">FASTQ.gz</option>
        </param>
        <section name="barcode_binning_settings" title="Barcode binning settings">
            <param argument="--barcode_threshold" type="float" min="0" max="100" value="75.0" optional="True" label="Percent identity for binning"
                   help="A read must have at least this percent identity to a barcode to be binned (default: 75.0)"/>
            <param argument="--barcode_diff" type="float" min="0" max="100" value="5.0" optional="True" label="Minimum difference in identity"
                   help="If the difference between a read's best barcode identity and its second-best barcode identity is less than this value, it will not be put in a barcode bin (to exclude cases which are too close to call) (default: 5.0)"/>
            <param argument="--require_two_barcodes" type="boolean" checked="false" truevalue="--require_two_barcodes" falsevalue="" label="Require two matches for binning"
                   help="Reads will only be put in barcode bins if they have a strong match for the barcode on both their start and end (default: a read can be binned with a match at its start or end)"/>
            <param argument="--discard_unassigned" type="boolean" checked="false" truevalue="--discard_unassigned" falsevalue="" label="Discard unassigned reads"
                   help="Discard unassigned reads (instead of creating a 'none' bin) (default: False)"/>
        </section>
        <section name="adapter_search_settings" title="Adapter search settings">
            <param argument="--adapter_threshold" type="float" min="0" max="100" value="90.0" optional="True" label="Minimum identity"
                   help="An adapter set has to have at least this percent identity to be labelled as present and trimmed off (0 to 100) (default: 90.0)"/>
            <param argument="--check_reads" type="integer" min="0" value="10000" optional="True" label="Number of alligned reads"
                   help="This many reads will be aligned to all possible adapters to determine which adapter sets are present (default: 10000)"/>
            <param argument="--scoring_scheme" type="text" value="3,-6,-5,-2" label="Scoring scheme"
                   help="Comma-delimited string of alignment scores: match, mismatch, gap open, gap extend">
                <validator type="regex" message="Scoring scheme must be comma-separated string of four positive/negative integers">^((-?\d+[,]){3})-?\d+$</validator>
            </param>
        </section>
        <section name="end_adapter_settings" title="End adapter settings">
            <param argument="--end_size" type="integer" min="0" value="150" optional="True" label="Number of bases"
                   help="The number of base pairs at each end of the read which will be searched for adapter sequences (default: 150)"/>
            <param argument="--min_trim_size" type="integer" min="0" value="4" optional="True" label="Minimum trim length"
                   help="Adapter alignments smaller than this will be ignored (default: 4)"/>
            <param argument="--extra_end_trim" type="integer" min="0" value="2" optional="True" label="Extra bases trimmed"
                   help="This many additional bases will be removed next to adapters found at the ends of reads (default: 2)"/>
            <param argument="--end_threshold" type="float" min="0" max="100" value="75.0" optional="True" label="Minimum percent identity"
                   help="Adapters at the ends of reads must have at least this percent identity to be removed (0 to 100) (default: 75.0)"/>
        </section>
        <section name="middle_adapter_settings" title="Middle adapter settings">
            <param argument="--no_split" type="boolean" checked="false" truevalue="--no_split" falsevalue="" label="Skip splitting"
                   help="Skip splitting reads based on middle adapters (default: split reads when an adapter is found in the middle)"/>
            <param argument="--discard_middle" type="boolean" checked="false" truevalue="--discard_middle" falsevalue="" label="Discard reads with middle adapter"
                   help="Reads with middle adapters will be discarded (default: reads with middle adapters are split) (required for reads to be used with Nanopolish, this option is on by default when outputting reads into barcode bins)"/>
            <param argument="--middle_threshold" type="float" min="0" max="100" value="85.0" optional="True" label="Minimum percent identity"
                   help="Adapters in the middle of reads must have at least this percent identity to be found (0 to 100) (default: 85.0)"/>
            <param argument="--extra_middle_trim_good_side" type="integer" min="0" value="10" optional="True" label="Extra trimming good side"
                   help="This many additional bases will be removed next to middle adapters on their 'good' side (default: 10)"/>
            <param argument="--extra_middle_trim_bad_side" type="integer" min="0" value="100" optional="True" label="Extra trimming bad side"
                   help="This many additional bases will be removed next to middle adapters on their 'bad' side (default: 100)"/>
            <param argument="--min_split_read_size" type="integer" min="0" value="1000" optional="True" label="Minimum length reads post-split"
                   help="Post-split read pieces smaller than this many base pairs will not be outputted (default: 1000)"/>
        </section>
    </inputs>
    <outputs>
        <data name="outfile" format="fasta" from_work_dir="out.*" label="${tool.name} on ${on_string}: Trimmed">
            <change_format>
                <when input="format" value="fastq" format="fastq"/>
                <when input="format" value="fasta.gz" format="fasta.gz"/>
                <when input="format" value="fastq.gz" format="fastq.gz"/>
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
            <param name="format" value="fasta"/>
            <output name="outfile" ftype="fasta" file="out.fasta"/>
        </test>
        <test>
            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
            <param name="format" value="fastq"/>
            <output name="outfile" ftype="fastq" file="out.fastq"/>
        </test>
        <test>
            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
            <param name="format" value="fasta.gz"/>
            <output name="outfile" ftype="fasta.gz" file="out.fasta.gz" compare="sim_size"/>
        </test>
        <test>
            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
            <param name="format" value="fastq.gz"/>
            <output name="outfile" ftype="fastq.gz" file="out.fastq.gz" compare="sim_size"/>
        </test>
        <test>
            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
            <param name="format" value="fasta"/>
            <param name="barcode_threshold" value="70"/>
            <param name="barcode_diff" value="4"/>
            <param name="require_two_barcodes" value="True"/>
            <param name="discard_unassigned" value="True"/>
            <param name="end_size" value="100"/>
            <param name="min_trim_size" value="2"/>
            <param name="extra_end_trim" value="1"/>
            <param name="end_threshold" value="80"/>
            <param name="discard_middle" value="True"/>
            <param name="middle_threshold" value="90"/>
            <param name="extra_middle_trim_good_size" value="3"/>
            <param name="extra_middle_trim_bad_size" value="30"/>
            <param name="min_split_read_size" value="1500"/>
            <output name="outfile" ftype="fasta" file="out_advanced.fasta"/>
        </test>
    </tests>
    <help><![CDATA[
Porechop is a tool for finding and removing adapters from Oxford Nanopore reads.
Adapters on the ends of reads are trimmed off, and when a read has an adapter in
its middle, it is treated as chimeric and chopped into separate reads. Porechop
performs thorough alignments to effectively find adapters, even at low sequence identity.

Porechop also supports demultiplexing of Nanopore reads that were barcoded with
the Native Barcoding Kit, PCR Barcoding Kit or Rapid Barcoding Kit.
    ]]></help>
    <citations>
        <citation type="bibtex">
            @misc{rrwick2017,
                author = {Wick, Ryan},
                year = {2017},
                title = {Porechop},
                publisher = {GitHub},
                journal = {GitHub repository},
                url = {https://github.com/rrwick/Porechop},
            }
        </citation>
    </citations>
</tool>
author	iuc
date	Tue, 18 Sep 2018 16:24:49 -0400
parents
children	93d623d9979c