Mercurial > repos > bgruening > text_processing

diff unsorted_uniq.xml @ 0:5314e5d6f040 draft
Imported from capsule None
author: bgruening
date: Thu, 29 Jan 2015 07:53:17 -0500
children: 60edf2f8c28f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unsorted_uniq.xml	Thu Jan 29 07:53:17 2015 -0500
@@ -0,0 +1,93 @@
+<tool id="tp_sorted_uniq" name="Unique" version="@BASE_VERSION@.0">
+  <description>occurrences of each record</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <version_command>sort --version | head -n 1</version_command>
+    <command>
+<![CDATA[
+    sort -u
+        $ignore_case
+        $is_numeric
+        -t '	'
+        #if $adv_opts.adv_opts_selector == "advanced":
+            -k$adv_opts.column_start,$adv_opts.column_end
+        #end if
+        -o "$outfile"
+        "$infile"
+]]>
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="tabular" label="File to scan for unique values" />
+        <param name="ignore_case" type="boolean" truevalue="-f" falsevalue="" checked="False"
+            label="Ignore differences in case when comparing" help="(-f)"/>
+        <param name="is_numeric" type="boolean" truevalue="-n" falsevalue="" checked="False"
+            label="Column only contains numeric values" help="(-n)" />
+        <conditional name="adv_opts">
+            <param name="adv_opts_selector" type="select" label="Advanced Options">
+                <option value="basic" selected="True">Hide Advanced Options</option>
+                <option value="advanced">Show Advanced Options</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <param name="column_start" type="data_column" data_ref="infile" label="Column start" help="Unique on specific column range"/>
+                <param name="column_end" type="data_column" data_ref="infile" label="Column end" help="Unique on specific column range"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="outfile" format_source="infile" metadata_source="infile"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="1.bed"/>
+            <param name="is_numeric" value="True"/>
+            <param name="ignore_case" value="True"/>
+            <param name="adv_opts_selector" value="advanced"/>
+            <param name="column_start" value="2"/>
+            <param name="column_end" value="3"/>
+            <output name="outfile" file="unique_results1.bed"/>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+ .. class:: infomark
+
+**Syntax**
+
+This tool returns all unique lines using the 'sort -u' command. It can be used with unsorted files.
+If you need additional options, like grouping or counting your unique results, please use the 'Unique lines from sorted file' tool.
+
+-----
+
+.. class:: infomark
+
+The input file needs to be tab separated. Please convert your file if necessary.
+
+-----
+
+**Example**
+
+- Input file::
+
+       chr1   10  100  gene1
+       chr1  105  200  gene2
+       chr1   10  100  gene1
+       chr2   10  100  gene4
+       chr2 1000 1900  gene5
+       chr3   15 1656  gene6
+       chr2   10  100  gene4
+
+- Unique lines will result in::
+
+       chr1   10  100  gene1
+       chr1  105  200  gene2
+       chr2   10  100  gene4
+       chr2 1000 1900  gene5
+       chr3   15 1656  gene6
+
+@REFERENCES@
+]]>
+</help>
+</tool>
author	bgruening
date	Thu, 29 Jan 2015 07:53:17 -0500
parents
children	60edf2f8c28f