text_processing: sorted_uniq.xml comparison

comparison sorted_uniq.xml @ 0:5314e5d6f040 draft

Imported from capsule None

author	bgruening
date	Thu, 29 Jan 2015 07:53:17 -0500
parents
children	37e1eb05b1b4

comparison

equal deleted inserted replaced

--1:000000000000
+:5314e5d6f040
+<tool id="tp_uniq_tool" name="Unique lines" version="@BASE_VERSION@.0">
+<description>assuming sorted input file</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<expand macro="requirements">
+<requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement>
+</expand>
+<version_command>uniq --version | head -n 1</version_command>
+<command>
+<![CDATA[
+uniq
+#if $skipfields:
+-f $skipfields
+#end if
+$ignorecase
+#if $grouping.grouping_select == 'yes':
+--group=$grouping.group
+#else:
+$grouping.count
+$grouping.repeated
+$grouping.uniqueonly
+#end if
+"$infile"
+#if $grouping.grouping_select == 'no' and $grouping.count:
+## count will print the count with spaces in front of the line and
+## with a space (not a tab) after the number, we need to cahnge that
+| sed -e 's/ *//' -e 's/ /\t/'
+#end if
+> "$outfile"
+]]>
+</command>
+<inputs>
+<param name="infile" format="tabular" type="data"
+label="File to scan for unique values" help="Make sure you have sorted this file" />
+<conditional name="grouping">
+<param name="grouping_select" type="select" label="Do you want to group each unique group?">
+<option value="no">No</option>
+<option value="yes">Yes</option>
+</param>
+<when value="no">
+<param name="count" type="boolean" truevalue="-c" falsevalue=""
+label="Counting number of occurrences" help="Prefix lines by the number of occurrences. (-c)" />
+<param name="repeated" type="boolean" truevalue="-d" falsevalue=""
+label="Only print duplicate lines" help="(-d)"/>
+<param name="uniqueonly" type="boolean" checked="True" truevalue="-u" falsevalue=""
+label="Only print unique lines" help="(-u)" />
+</when>
+<when value="yes">
+<param name="group" type="select"
+label="Output all lines, and delimit each unique group" help="(--group)">
+<option value="">No grouping at all</option>
+<option value="separate">Separate unique groups with a single delimiter</option>
+<option value="prepend">Output a delimiter before each group of unique items</option>
+<option value="append">Output a delimiter after each group of unique items</option>
+<option value="both">Output a delimiter around each group of unique items</option>
+</param>
+</when>
+</conditional>
+<param name="ignorecase" type="boolean" truevalue="-i" falsevalue=""
+label="Ignore differences in case when comparing" help="(-i)"/>
+<param name="skipfields" type="integer" size="2" value="0"
+label="Avoid comparing the first N fields" help="Use zero to start from the first field. (-f)" />
+</inputs>
+<outputs>
+<data name="outfile" format_source="infile" metadata_source="infile"/>
+</outputs>
+<tests>
+<test>
+<param name="infile" value="sorted_uniq1.tabular" />
+<param name="grouping_select" value="no"/>
+<param name="count" value="True"/>
+<param name="ignorecase" value="True"/>
+<param name="uniqueonly" value="True"/>
+<output name="outfile" file="sorted_uniq_results1.tabular" />
+</test>
+<test>
+<param name="infile" value="sorted_uniq1.tabular" />
+<param name="ignorecase" value="True"/>
+<param name="grouping_select" value="yes"/>
+<param name="group" value="separate"/>
+<output name="outfile" file="sorted_uniq_results2.tabular" />
+</test>
+</tests>
+<help>
+<![CDATA[
+This tool takes a sorted file and look for lines that are unique.
+.. class:: warningmark
+Please make sure your file is sorted, or else this tool will give you an erroneous output.
+.. class:: infomark
+You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools".
+@REFERENCES@
+]]>
+</help>
+</tool>

Mercurial > repos > bgruening > text_processing

comparison sorted_uniq.xml @ 0:5314e5d6f040 draft