comparison tools/stats/grouping.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 <tool id="Grouping1" name="Group" version="2.0.0">
2 <description>data by a column and perform aggregate operation on other columns.</description>
3 <command interpreter="python">
4 grouping.py
5 $out_file1
6 $input1
7 $groupcol
8 $ignorecase
9 #for $op in $operations
10 '${op.optype}
11 ${op.opcol}
12 ${op.opround}'
13 #end for
14 </command>
15 <inputs>
16 <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
17 <param name="groupcol" label="Group by column" type="data_column" data_ref="input1" />
18 <param name="ignorecase" type="boolean" truevalue="1" falsevalue="0">
19 <label>Ignore case while grouping?</label>
20 </param>
21 <repeat name="operations" title="Operation">
22 <param name="optype" type="select" label="Type">
23 <option value="mean">Mean</option>
24 <option value="median">Median</option>
25 <option value="mode">Mode</option>
26 <option value="max">Maximum</option>
27 <option value="min">Minimum</option>
28 <option value="sum">Sum</option>
29 <option value="length">Count</option>
30 <option value="unique">Count Distinct</option>
31 <option value="cat">Concatenate</option>
32 <option value="cat_uniq">Concatenate Distinct</option>
33 <option value="random">Randomly pick</option>
34 <option value="std">Standard deviation</option>
35 </param>
36 <param name="opcol" label="On column" type="data_column" data_ref="input1" />
37 <param name="opround" type="select" label="Round result to nearest integer?">
38 <option value="no">NO</option>
39 <option value="yes">YES</option>
40 </param>
41 </repeat>
42 </inputs>
43 <outputs>
44 <data format="tabular" name="out_file1" />
45 </outputs>
46 <requirements>
47 <requirement type="python-module">numpy</requirement>
48 </requirements>
49 <tests>
50 <!-- Test valid data -->
51 <test>
52 <param name="input1" value="1.bed"/>
53 <param name="groupcol" value="1"/>
54 <param name="ignorecase" value="true"/>
55 <param name="optype" value="mean"/>
56 <param name="opcol" value="2"/>
57 <param name="opround" value="no"/>
58 <output name="out_file1" file="groupby_out1.dat"/>
59 </test>
60 <!-- Long case but test framework doesn't allow yet
61 <test>
62 <param name="input1" value="1.bed"/>
63 <param name="groupcol" value="1"/>
64 <param name="ignorecase" value="false"/>
65 <param name="operations" value='[{"opcol": "2", "__index__": 0, "optype": "mean", "opround": "no"}, {"opcol": "2", "__index__": 1, "optype": "median", "opround": "no"}, {"opcol": "6", "__index__": 2, "optype": "mode", "opround": "no"}, {"opcol": "2", "__index__": 3, "optype": "max", "opround": "no"}, {"opcol": "2", "__index__": 4, "optype": "min", "opround": "no"}, {"opcol": "2", "__index__": 5, "optype": "sum", "opround": "no"}, {"opcol": "1", "__index__": 6, "optype": "length", "opround": "no"}, {"opcol": "1", "__index__": 7, "optype": "unique", "opround": "no"}, {"opcol": "1", "__index__": 8, "optype": "cat", "opround": "no"}, {"opcol": "6", "__index__": 9, "optype": "cat_uniq", "opround": "no"}, {"opcol": "2", "__index__": 10, "optype": "random", "opround": "no"}, {"opcol": "2", "__index__": 11, "optype": "std", "opround": "no"}]'/>
66 <output name="out_file1" file="groupby_out3.tabular"/>
67 </test>
68 -->
69 <!-- Test data with an invalid value in a column. Can't do it because test framework doesn't allow testing of errors
70 <test>
71 <param name="input1" value="1.tabular"/>
72 <param name="groupcol" value="1"/>
73 <param name="ignorecase" value="true"/>
74 <param name="optype" value="mean"/>
75 <param name="opcol" value="2"/>
76 <param name="opround" value="no"/>
77 <output name="out_file1" file="groupby_out2.dat"/>
78 </test>
79 -->
80 </tests>
81 <help>
82
83 .. class:: infomark
84
85 **TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
86
87 -----
88
89 **Syntax**
90
91 This tool allows you to group the input dataset by a particular column and perform aggregate functions: Mean, Median, Mode, Sum, Max, Min, Count, Concatenate, and Randomly pick on any column(s).
92
93 The Concatenate function will take, for each group, each item in the specified column and build a comma delimited list. Concatenate Unique will do the same but will build a list of unique items with no repetition.
94
95 Count and Count Unique are equivalent to Concatenate and Concatenate Unique, but will only count the number of items and will return an integer.
96
97 - If multiple modes are present, all are reported.
98
99 -----
100
101 **Example**
102
103 - For the following input::
104
105 chr22 1000 1003 TTT
106 chr22 2000 2003 aaa
107 chr10 2200 2203 TTT
108 chr10 1200 1203 ttt
109 chr22 1600 1603 AAA
110
111 - **Grouping on column 4** while ignoring case, and performing operation **Count on column 1** will return::
112
113 AAA 2
114 TTT 3
115
116 - **Grouping on column 4** while not ignoring case, and performing operation **Count on column 1** will return::
117
118 aaa 1
119 AAA 1
120 ttt 1
121 TTT 2
122 </help>
123 </tool>