view count-median.xml @ 4:8f3aad378610 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit f30bc6f7e10409dfa0dd65688e60da8b59f12464
author iuc
date Mon, 14 Nov 2016 17:50:04 -0500
parents 84b323860b0a
children 77410799e8e9
line wrap: on
line source

<tool id="khmer_count_median" name="Count Median" version="@WRAPPER_VERSION@.0">
    <description>Count the median/avg k-mer abundance for each sequence</description>
    <macros>
        <token name="@BINARY@">count-median.py</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version" />
    <command><![CDATA[
set -xu &&
@BINARY@
$input_countgraph_filename
$input_sequence_filename
$output_summary_filename
]]>
    </command>
    <inputs>
        <expand macro="input_sequence_filename" />
        <expand macro="input_countgraph_filename" />
    </inputs>
    <outputs>
        <data name="output_summary_filename" format="txt"
            label="${input_sequence_filename} sequence id, median, average, stddev, and seq length" />
    </outputs>
    <tests>
        <test>
            <param name="input_sequence_filename" value="test-abund-read-2.fa" />
            <param name="input_countgraph_filename" value="test-abund-read-2.oxlicg" ftype="oxlicg" />
            <output name="output_summary_filename">
                <assert_contents>
                    <has_text text="seq,1001,1001.0,0.0,18" />
                    <has_text text="895:1:37:17593:9954/1,1,21.408163071,141.391921997,114" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Count the median/avg k-mer abundance for each sequence in the input file,
based on the k-mer counts in the given k-mer countgraph. Can be used to
estimate expression levels (mRNAseq) or coverage (genomic/metagenomic). The
output file contains sequence id, median, average, stddev, and seq length;
fields are separated by spaces. For khmer 1.x count-median.py will split
sequence names at the first space which means that some sequence formats (e.g.
paired FASTQ in Casava 1.8 format) will yield uninformative names. Use
`--csv` to fix this behavior.

@HELP_FOOTER@
]]>
    </help>
    <citations>
        <expand macro="software-citation" />
        <expand macro="diginorm-citation" />
    </citations>
</tool>