annotate sorted_uniq.xml @ 0:5314e5d6f040 draft

Imported from capsule None
author bgruening
date Thu, 29 Jan 2015 07:53:17 -0500
parents
children 37e1eb05b1b4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
1 <tool id="tp_uniq_tool" name="Unique lines" version="@BASE_VERSION@.0">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
2 <description>assuming sorted input file</description>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
3 <macros>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
4 <import>macros.xml</import>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
5 </macros>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
6 <expand macro="requirements">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
7 <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
8 </expand>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
9 <version_command>uniq --version | head -n 1</version_command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
10 <command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
11 <![CDATA[
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
12 uniq
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
13 #if $skipfields:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
14 -f $skipfields
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
15 #end if
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
16 $ignorecase
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
17
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
18 #if $grouping.grouping_select == 'yes':
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
19 --group=$grouping.group
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
20 #else:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
21 $grouping.count
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
22 $grouping.repeated
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
23 $grouping.uniqueonly
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
24 #end if
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
25
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
26 "$infile"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
27
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
28 #if $grouping.grouping_select == 'no' and $grouping.count:
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
29 ## count will print the count with spaces in front of the line and
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
30 ## with a space (not a tab) after the number, we need to cahnge that
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
31 | sed -e 's/ *//' -e 's/ /\t/'
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
32 #end if
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
33 > "$outfile"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
34 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
35 </command>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
36 <inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
37 <param name="infile" format="tabular" type="data"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
38 label="File to scan for unique values" help="Make sure you have sorted this file" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
39
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
40 <conditional name="grouping">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
41 <param name="grouping_select" type="select" label="Do you want to group each unique group?">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
42 <option value="no">No</option>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
43 <option value="yes">Yes</option>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
44 </param>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
45 <when value="no">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
46 <param name="count" type="boolean" truevalue="-c" falsevalue=""
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
47 label="Counting number of occurrences" help="Prefix lines by the number of occurrences. (-c)" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
48 <param name="repeated" type="boolean" truevalue="-d" falsevalue=""
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
49 label="Only print duplicate lines" help="(-d)"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
50 <param name="uniqueonly" type="boolean" checked="True" truevalue="-u" falsevalue=""
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
51 label="Only print unique lines" help="(-u)" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
52 </when>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
53 <when value="yes">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
54 <param name="group" type="select"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
55 label="Output all lines, and delimit each unique group" help="(--group)">
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
56 <option value="">No grouping at all</option>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
57 <option value="separate">Separate unique groups with a single delimiter</option>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
58 <option value="prepend">Output a delimiter before each group of unique items</option>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
59 <option value="append">Output a delimiter after each group of unique items</option>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
60 <option value="both">Output a delimiter around each group of unique items</option>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
61 </param>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
62 </when>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
63 </conditional>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
64
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
65 <param name="ignorecase" type="boolean" truevalue="-i" falsevalue=""
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
66 label="Ignore differences in case when comparing" help="(-i)"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
67 <param name="skipfields" type="integer" size="2" value="0"
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
68 label="Avoid comparing the first N fields" help="Use zero to start from the first field. (-f)" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
69 </inputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
70 <outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
71 <data name="outfile" format_source="infile" metadata_source="infile"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
72 </outputs>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
73 <tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
74 <test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
75 <param name="infile" value="sorted_uniq1.tabular" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
76 <param name="grouping_select" value="no"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
77 <param name="count" value="True"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
78 <param name="ignorecase" value="True"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
79 <param name="uniqueonly" value="True"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
80 <output name="outfile" file="sorted_uniq_results1.tabular" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
81 </test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
82 <test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
83 <param name="infile" value="sorted_uniq1.tabular" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
84 <param name="ignorecase" value="True"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
85 <param name="grouping_select" value="yes"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
86 <param name="group" value="separate"/>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
87 <output name="outfile" file="sorted_uniq_results2.tabular" />
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
88 </test>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
89 </tests>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
90 <help>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
91 <![CDATA[
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
92 This tool takes a sorted file and look for lines that are unique.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
93
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
94 .. class:: warningmark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
95
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
96 Please make sure your file is sorted, or else this tool will give you an erroneous output.
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
97
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
98 .. class:: infomark
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
99
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
100 You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools".
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
101
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
102 @REFERENCES@
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
103 ]]>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
104 </help>
5314e5d6f040 Imported from capsule None
bgruening
parents:
diff changeset
105 </tool>