view count-median.xml @ 1:84b323860b0a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 72078bced0c08685219c6e80e42ce07458822391
author iuc
date Wed, 11 Nov 2015 10:04:38 -0500
parents 42fbb6359626
children 77410799e8e9
line wrap: on
line source

<tool id="khmer_count_median" name="Count Median" version="@WRAPPER_VERSION@.0">
    <description>Count the median/avg k-mer abundance for each sequence</description>
    <macros>
        <token name="@BINARY@">count-median.py</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version" />
    <command><![CDATA[
set -xu &&
@BINARY@
$input_countgraph_filename
$input_sequence_filename
$output_summary_filename
]]>
    </command>
    <inputs>
        <expand macro="input_sequence_filename" />
        <expand macro="input_countgraph_filename" />
    </inputs>
    <outputs>
        <data name="output_summary_filename" format="txt"
            label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" />
    </outputs>
    <tests>
        <test>
            <param name="input_sequence_filename" value="test-abund-read-2.fa" />
            <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" />
            <output name="output_summary_filename">
                <assert_contents>
                    <has_text text="seq,1001,1001.0,0.0,18" />
                    <has_text text="895:1:37:17593:9954/1,1,21.408163071,141.391921997,114" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Count the median/avg k-mer abundance for each sequence in the input file,
based on the k-mer counts in the given k-mer countgraph. Can be used to
estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The
output file contains sequence id, median, average, stddev, and seq length;
fields are separated by spaces. For khmer 1.x count-median.py will split
sequence names at the first space which means that some sequence formats (e.g.
paired FASTQ in Casava 1.8 format) will yield uninformative names. Use
`--csv` to fix this behavior.

@HELP_FOOTER@
]]>
    </help>
    <citations>
        <expand macro="software-citation" />
        <expand macro="diginorm-citation" />
    </citations>
</tool>