Mercurial > repos > iuc > khmer_normalize_by_median

<tool id="khmer_normalize_by_median" name="Normalize By Median" version="@WRAPPER_VERSION@.0">
    <description>Filter reads using digital normalization via k-mer abundances</description>
    <macros>
        <token name="@BINARY@">normalize-by-median.py</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version" />
    <command><![CDATA[
set -xu &&
#for $num, $input in enumerate($inputs)
    ln -s ${input} sequence-${num} &&
#end for
mkdir output &&
cd output &&
normalize-by-median.py
${paired_switch}
${force_single_switch}
@TABLEPARAMS@
--cutoff=${cutoff}
#if $unpaired_reads_filename
    --unpaired-reads=${unpaired_reads_filename}
#end if
#if $save_countgraph
    --savegraph=${countgraph}
#end if
#if $countgraph_to_load
    --loadgraph=${countgraph_to_load}
#end if
--report=${report}
../sequence-*
]]>
    </command>
    <inputs>
        <expand macro="input_sequences_filenames" />
        <param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue=""
            label="Require all sequences be properly paired?"
            help="(--paired) The tool will fail if given improperly paired reads and this option is selected." />
        <param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue=""
            label="Ignore all pairing information?"
            help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." />
        <param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true"
            label="Extra unpaired reads"
            help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." />
        <param name="countgraph_to_load" type="data" format="oxlicg" optional="true"
            label="Optional k-mer countgraph"
            help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." />
        <param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" />
        <param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" />
        <expand macro="tableinputs" />
    </inputs>
    <outputs>
        <data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph">
            <filter>save_countgraph == True</filter>
        </data>
        <data name="report" format="txt" label="${tool.name} report" />
        <collection name="sequences" type="list">
            <discover_datasets pattern="__name__" directory="output" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="inputs" value="test-abund-read-2.fa"/>
            <param name="type" value="specific" />
            <param name="cutoff" value="1" />
            <param name="ksize" value="17" />
            <output name="report" file="normalize-by-median.report.txt" />
            <output_collection name="sequences" type="list">
                <element name="sequence-0.keep">
                    <assert_contents>
                        <has_text text="GGTTGACGGGGCTCAGGGGG" />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <test>
            <param name="inputs" value="test-abund-read-2.fa" />
            <param name="type" value="specific" />
            <param name="cutoff" value="2" />
            <param name="ksize" value="17" />
            <output name="report" file="normalize-by-median.c2.report.txt" />
            <output_collection name="sequences" type="list">
                <element name="sequence-0.keep">
                    <assert_contents>
                        <has_text text="GGTTGACGGGGCTCAGGGGG" />
                        <has_text text="GGTTGACGGGGCTCAGGG" />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <test>
            <param name="inputs" value="test-abund-read-paired.fa" />
            <param name="type" value="specific" />
            <param name="cutoff" value="1" />
            <param name="ksize" value="17" />
            <param name="paired" value="true" />
            <output name="report" file="normalize-by-median.paired.report.txt" />
            <output_collection name="sequences" type="list">
                <element name="sequence-0.keep">
                    <assert_contents>
                        <has_text text="GGTTGACGGGGCTCAGGGGG" />
                        <has_text text="GGTTGACGGGGCTCAGGG" />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[
Do digital normalization (remove mostly redundant sequences)

Discard sequences based on whether or not their median k-mer abundance lies
above a specified cutoff. Kept sequences will be placed in <fileN>.keep.

By default, Paired end reads will be considered together; if either read will
be kept, then both will be kept. (This keeps both reads from a fragment, and
helps with retention of repeats.) Unpaired reads are treated individually.

If `--paired` is set then proper pairing is required and the tool will exit on
unpaired reads, although `--unpaired-reads` can be used to supply a file of
orphan reads to be read after the paired reads.

`--force_single` will ignore all pairing information and treat reads
individually.

With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified
file after all sequences have been processed. `--loadgraph` will load the
specified k-mer countgraph before processing the specified files.  Note
that the countgraph is in same format as those produced by
`load-into-counting.py` and consumed by `abundance-dist.py`.

@HELP_FOOTER@
]]>
    </help>
    <citations>
        <expand macro="software-citation" />
        <expand macro="diginorm-citation" />
    </citations>
</tool>
author	iuc
date	Mon, 14 Nov 2016 17:51:02 -0500
parents	73314e26dcfd
children	557cc16931f4