Mercurial > repos > bgruening > text_processing

<tool id="tp_sorted_uniq" name="Unique" version="@BASE_VERSION@.0">
  <description>occurrences of each record</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <version_command>sort --version | head -n 1</version_command>
    <command>
<![CDATA[
    sort -u
        $ignore_case
        $is_numeric
        -t '	'
        #if $adv_opts.adv_opts_selector == "advanced":
            -k$adv_opts.column_start,$adv_opts.column_end
        #end if
        -o "$outfile"
        "$infile"
]]>
    </command>
    <inputs>
        <param name="infile" type="data" format="tabular" label="File to scan for unique values" />
        <param name="ignore_case" type="boolean" truevalue="-f" falsevalue="" checked="False"
            label="Ignore differences in case when comparing" help="(-f)"/>
        <param name="is_numeric" type="boolean" truevalue="-n" falsevalue="" checked="False"
            label="Column only contains numeric values" help="(-n)" />
        <conditional name="adv_opts">
            <param name="adv_opts_selector" type="select" label="Advanced Options">
                <option value="basic" selected="True">Hide Advanced Options</option>
                <option value="advanced">Show Advanced Options</option>
            </param>
            <when value="basic" />
            <when value="advanced">
                <param name="column_start" type="data_column" data_ref="infile" label="Column start" help="Unique on specific column range"/>
                <param name="column_end" type="data_column" data_ref="infile" label="Column end" help="Unique on specific column range"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="outfile" format_source="infile" metadata_source="infile"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="1.bed"/>
            <param name="is_numeric" value="True"/>
            <param name="ignore_case" value="True"/>
            <param name="adv_opts_selector" value="advanced"/>
            <param name="column_start" value="2"/>
            <param name="column_end" value="3"/>
            <output name="outfile" file="unique_results1.bed"/>
        </test>
    </tests>
    <help>
<![CDATA[
 .. class:: infomark

**Syntax**

This tool returns all unique lines using the 'sort -u' command. It can be used with unsorted files.
If you need additional options, like grouping or counting your unique results, please use the 'Unique lines from sorted file' tool.

-----

.. class:: infomark

The input file needs to be tab separated. Please convert your file if necessary.

-----

**Example**

- Input file::

       chr1   10  100  gene1
       chr1  105  200  gene2
       chr1   10  100  gene1
       chr2   10  100  gene4
       chr2 1000 1900  gene5
       chr3   15 1656  gene6
       chr2   10  100  gene4

- Unique lines will result in::

       chr1   10  100  gene1
       chr1  105  200  gene2
       chr2   10  100  gene4
       chr2 1000 1900  gene5
       chr3   15 1656  gene6

@REFERENCES@
]]>
</help>
</tool>
author	bgruening
date	Tue, 30 Jun 2015 16:23:23 -0400
parents	5314e5d6f040
children	60edf2f8c28f