comparison sorted_uniq.xml @ 0:5314e5d6f040 draft

Imported from capsule None
author bgruening
date Thu, 29 Jan 2015 07:53:17 -0500
parents
children 37e1eb05b1b4
comparison
equal deleted inserted replaced
-1:000000000000 0:5314e5d6f040
1 <tool id="tp_uniq_tool" name="Unique lines" version="@BASE_VERSION@.0">
2 <description>assuming sorted input file</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements">
7 <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement>
8 </expand>
9 <version_command>uniq --version | head -n 1</version_command>
10 <command>
11 <![CDATA[
12 uniq
13 #if $skipfields:
14 -f $skipfields
15 #end if
16 $ignorecase
17
18 #if $grouping.grouping_select == 'yes':
19 --group=$grouping.group
20 #else:
21 $grouping.count
22 $grouping.repeated
23 $grouping.uniqueonly
24 #end if
25
26 "$infile"
27
28 #if $grouping.grouping_select == 'no' and $grouping.count:
29 ## count will print the count with spaces in front of the line and
30 ## with a space (not a tab) after the number, we need to cahnge that
31 | sed -e 's/ *//' -e 's/ /\t/'
32 #end if
33 > "$outfile"
34 ]]>
35 </command>
36 <inputs>
37 <param name="infile" format="tabular" type="data"
38 label="File to scan for unique values" help="Make sure you have sorted this file" />
39
40 <conditional name="grouping">
41 <param name="grouping_select" type="select" label="Do you want to group each unique group?">
42 <option value="no">No</option>
43 <option value="yes">Yes</option>
44 </param>
45 <when value="no">
46 <param name="count" type="boolean" truevalue="-c" falsevalue=""
47 label="Counting number of occurrences" help="Prefix lines by the number of occurrences. (-c)" />
48 <param name="repeated" type="boolean" truevalue="-d" falsevalue=""
49 label="Only print duplicate lines" help="(-d)"/>
50 <param name="uniqueonly" type="boolean" checked="True" truevalue="-u" falsevalue=""
51 label="Only print unique lines" help="(-u)" />
52 </when>
53 <when value="yes">
54 <param name="group" type="select"
55 label="Output all lines, and delimit each unique group" help="(--group)">
56 <option value="">No grouping at all</option>
57 <option value="separate">Separate unique groups with a single delimiter</option>
58 <option value="prepend">Output a delimiter before each group of unique items</option>
59 <option value="append">Output a delimiter after each group of unique items</option>
60 <option value="both">Output a delimiter around each group of unique items</option>
61 </param>
62 </when>
63 </conditional>
64
65 <param name="ignorecase" type="boolean" truevalue="-i" falsevalue=""
66 label="Ignore differences in case when comparing" help="(-i)"/>
67 <param name="skipfields" type="integer" size="2" value="0"
68 label="Avoid comparing the first N fields" help="Use zero to start from the first field. (-f)" />
69 </inputs>
70 <outputs>
71 <data name="outfile" format_source="infile" metadata_source="infile"/>
72 </outputs>
73 <tests>
74 <test>
75 <param name="infile" value="sorted_uniq1.tabular" />
76 <param name="grouping_select" value="no"/>
77 <param name="count" value="True"/>
78 <param name="ignorecase" value="True"/>
79 <param name="uniqueonly" value="True"/>
80 <output name="outfile" file="sorted_uniq_results1.tabular" />
81 </test>
82 <test>
83 <param name="infile" value="sorted_uniq1.tabular" />
84 <param name="ignorecase" value="True"/>
85 <param name="grouping_select" value="yes"/>
86 <param name="group" value="separate"/>
87 <output name="outfile" file="sorted_uniq_results2.tabular" />
88 </test>
89 </tests>
90 <help>
91 <![CDATA[
92 This tool takes a sorted file and look for lines that are unique.
93
94 .. class:: warningmark
95
96 Please make sure your file is sorted, or else this tool will give you an erroneous output.
97
98 .. class:: infomark
99
100 You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools".
101
102 @REFERENCES@
103 ]]>
104 </help>
105 </tool>