Mercurial > repos > iuc > qfilt

<?xml version="1.0"?>
<tool id="qfilt" version="1.0.0" name="qfilt">
    <description>filter sequencing data using simple heuristics</description>
    <requirements>
        <requirement type="package" version="0.0.1">qfilt</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        qfilt -o '$filtered_output'
        #if str($options.advanced) == 'advanced':
            -q $options.qscore
            -l $options.length
            $options.split
            #if $options.replace and $options.remove:
                 -P '$options.replace'
                 -R '$options.remove'
            #elif $options.mode:
                -m $options.mode
            #end if
            #if $options.prefix:
                -t $options.mismatch
                -T '$options.prefix'
            #end if
            -f $options.output_format
            $options.tolerate_homopolymeric $options.tolerate_ambiguous
        #end if
        #if $input_data.format == 'fastq':
            -Q '$input_data.fastq'
        #else:
            -F '$input_data.fasta'
            '$input_data.qual'
        #end if
    ]]></command>
    <inputs>
        <conditional name="input_data">
            <param name="format" type="select" label="Additional options">
                <option value="fastq">FASTQ</option>
                <option value="fastaq">FASTA+QUAL</option>
            </param>
            <when value="fastq">
                <param name="fastq" argument="-Q" type="data" format="fastq" label="Input FASTA" help="FASTQ File" />
            </when>
            <when value="fastaq">
                <param name="fasta" argument="-F" type="data" format="fasta" label="Input FASTA" help="FASTA File" />
                <param name="qual" argument="-F" type="data" format="qual" label="Input QUAL" help="FASTQ File" />
            </when>
        </conditional>

        <conditional name="options">
            <param name="advanced" type="select" label="Additional options">
                <option value="defaults">Use defaults</option>
                <option value="advanced">Specify additional parameters</option>
            </param>
            <when value="defaults"/>
            <when value="advanced">
                <param name="qscore" argument="-q" type="integer" value="20" label="QScore" help="minimum per-base quality score below which a read will be split or truncated" />
                <param name="length" argument="-l" type="integer" value="50" label="Length" help="minimum retained fragment LENGTH" />
                <param name="mode" argument="-m" type="integer" value="0" label="Mode" help="MODE is a 3-bitmask (an integer in [0-7], default=0)" />
                <param name="split" argument="-s" type="boolean" truevalue="-s" falsevalue="" label="Split" help="when encountering a low q-score, split instead of truncate" />
                <param name="tolerate_homopolymeric" argument="-p" type="boolean" truevalue="-p" falsevalue="" label="Tolerate Homopolymeric" help="tolerate low q-score homopolymeric regions" />
                <param name="tolerate_ambiguous" argument="-a" type="boolean" truevalue="-a" falsevalue="" label="Tolerate Ambiguous" help="tolerate low q-score ambiguous nucleotides" />
                <param name="replace" argument="-P" type="text" label="Replace" help="rather than splitting or truncating, replace low quality bases with CHAR this option OVERRIDES all -m mode options" />
                <param name="remove" argument="-R" type="text" label="Remove" help="rather than splitting or truncating, remove reads which contain more than COUNT low quality bases this option only works in COMBINATION with the -P (punch) option" />
                <param name="prefix" argument="-T" type="text" label="Prefix" help="if supplied, only reads with this PREFIX are retained, and the PREFIX is stripped from each contributing read" />
                <param name="mismatch" argument="-t" type="integer" value="0" label="Mismatch" help="if PREFIX is supplied, prefix matching tolerates at most MISMATCH mismatches" />
                <param name="output_format" argument="-f" type="select" label="Format" help="output in FASTA or FASTQ format (default=FASTA)" >
                    <option value="FASTA">FASTA</option>
                    <option value="FASTQ">FASTQ</option>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="fasta" name="filtered_output">
            <change_format>
                <when input="output_format" value="FASTQ" format="fastq" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="fastq" value="qfilt-in1.fastq" />
            <output file="qfilt-out1.fa" ftype="fasta" name="filtered_output" />
        </test>
        <test>
            <param name="format" value="fastq" />
            <param name="fastq" value="qfilt-in1.fastq" />
            <param name="advanced" value="advanced" />
            <param name="tolerate_homopolymeric" value="False" />
            <output file="qfilt-out2.fa" ftype="fasta" name="filtered_output" />
        </test>
        <test>
            <param name="fasta" value="qfilt-in2.fa" />
            <param name="qual" value="qfilt-in2.qual" />
            <param name="format" value="fastaq" />
            <output file="qfilt-out3.fa" ftype="fasta" name="filtered_output" />
        </test>
    </tests>
    <help><![CDATA[
qfilt
=====

This simple program is meant to filter sequencing data with the option to:

- Remove or split reads with poor quality scores
- Retain only fragments from reads that are tagged with a given 5' sequence.

^^^^^^^^

Example Output

>GM98SRO01B77KU rank=0000671 x=796.0 y=1996.0 length=58

CCACGCGTATCGATGTCGACTTTTTTTTCTTTTCTTACATAGTAG

>GM98SRO01BA3RP rank=0000953 x=419.5 y=1603.5 length=87

CTGATGCTGCACCAACTGTACTCCCTCGCGATA

>GM98SRO01E1BKW rank=0001233 x=1948.0 y=846.0 length=66

TACAGTTGGTGCAGCATCAGAAAAGTACGACATCGATACGCGTGGTCCTCGCGA

>GM98SRO01DVVNY rank=0001304 x=1476.0 y=636.5 length=84

ACGGCTGATGCTGCACCAACTGTACTCCCTCGCGATA

>GM98SRO01D6FIX rank=0001416 x=1596.0 y=1415.0 length=91

CGGCTGATGCTGCACCAACTGTACTCCCTCGCGATA
    ]]></help>
    <citations>
        <citation type="bibtex">
            @UNPUBLISHED{spond,
                author = "Sergei Kosakovsky Pond",
                title = "HyPhy: Hypothesis Testing using Phylogenies",
                year = "2000",
                note = "http://hyphy.org/",
                url = "http://hyphy.org/"}
        </citation>
    </citations>
</tool>
author	iuc
date	Mon, 18 Jun 2018 16:55:27 -0400
parents
children	c5e1ff9191a5