Mercurial > repos > nml > csvtk_summary
comparison summary.xml @ 0:ceb70f0dd898 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
| author | nml |
|---|---|
| date | Tue, 19 May 2020 17:23:57 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ceb70f0dd898 |
|---|---|
| 1 <tool id="csvtk_summary" name="csvtk-summary" version="@VERSION@+@GALAXY_VERSION@"> | |
| 2 <description> statistics of selected fields</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements" /> | |
| 7 <expand macro="version_cmd" /> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 | |
| 10 ## Set Up Input ## | |
| 11 ################## | |
| 12 | |
| 13 #set input_list = list() | |
| 14 #for $repeat in $field | |
| 15 #silent $input_list.append(str($repeat.column_text.in_text) + ":" + str($repeat.analysis_type)) | |
| 16 #end for | |
| 17 | |
| 18 #set input_total = ",".join($input_list) | |
| 19 | |
| 20 ################### | |
| 21 ## Start Command ## | |
| 22 ################### | |
| 23 | |
| 24 csvtk summary --num-cpus "\${GALAXY_SLOTS:-1}" | |
| 25 | |
| 26 ## Add additional flags as specified ## | |
| 27 ####################################### | |
| 28 $global_param.illegal_rows | |
| 29 $global_param.empty_rows | |
| 30 $global_param.header | |
| 31 $global_param.lazy_quotes | |
| 32 | |
| 33 ## Set Tabular input/output flag if input is tabular ## | |
| 34 ####################################################### | |
| 35 #if $in_1.is_of_type("tabular"): | |
| 36 -t -T | |
| 37 #end if | |
| 38 | |
| 39 ## Set Input ## | |
| 40 ############### | |
| 41 '$in_1' | |
| 42 | |
| 43 ## Specify fields ## | |
| 44 #################### | |
| 45 -f '$input_total' | |
| 46 | |
| 47 ## other ## | |
| 48 ########### | |
| 49 | |
| 50 #if $group_field.select_group != "none" | |
| 51 -g '$group_field.in_text' | |
| 52 #end if | |
| 53 | |
| 54 -s '$extra.separator' | |
| 55 -S '$extra.rand_int' | |
| 56 -n '$decimal_width' | |
| 57 $ignore_non_digits | |
| 58 | |
| 59 | |
| 60 ## To output ## | |
| 61 ############### | |
| 62 > summary | |
| 63 | |
| 64 ]]></command> | |
| 65 <inputs> | |
| 66 <expand macro="singular_input" /> | |
| 67 <repeat name="field" title="Select Column and Operator" min="1"> | |
| 68 <expand macro="singular_fields_input" /> | |
| 69 <param name="analysis_type" type="select" label="Analysis Type" help="Select analysis type to do on the chosen field"> | |
| 70 <option value="collapse">Collapse</option> | |
| 71 <option value="count">Count</option> | |
| 72 <option value="countn">Count of Digits (countn)</option> | |
| 73 <option value="countunique">Count Unique</option> | |
| 74 <option value="first">First Value</option> | |
| 75 <option value="last">Last Value</option> | |
| 76 <option value="max">Maximum</option> | |
| 77 <option value="mean">Mean</option> | |
| 78 <option value="median">Median</option> | |
| 79 <option value="min">Minimum</option> | |
| 80 <option value="prod">Product of the Elements</option> | |
| 81 <option value="q1">q1</option> | |
| 82 <option value="q2">q2</option> | |
| 83 <option value="q3">q3</option> | |
| 84 <option value="rand">Random Value</option> | |
| 85 <option value="entropy">Shannon Entropy</option> | |
| 86 <option value="stdev">Standard Deviation</option> | |
| 87 <option value="sum">Sum</option> | |
| 88 <option value="uniq">Unique</option> | |
| 89 <option value="variance">Variance</option> | |
| 90 </param> | |
| 91 </repeat> | |
| 92 <expand macro="groups_input" /> | |
| 93 <param name="decimal_width" type="integer" value="2" argument="-n" | |
| 94 label="Number of Decimals" | |
| 95 help="Limit float to N decimal places" | |
| 96 /> | |
| 97 <param name="ignore_non_digits" type="boolean" checked="false" argument="-i" | |
| 98 truevalue="-i" | |
| 99 falsevalue="" | |
| 100 label="Ignore non-digits" | |
| 101 help="Ignore non-digit values in columns. Ex. NA or N/A" | |
| 102 /> | |
| 103 <section name="extra" title="Specific Optional Analysis Modifiers" expanded="false"> | |
| 104 <param name="separator" type="text" value="; " argument="-s" | |
| 105 label="Collapse Separator String" | |
| 106 help="Input string of characters that will separate collapsed columns. The ' character is not allowed"> | |
| 107 <expand macro="text_sanitizer" /> | |
| 108 </param> | |
| 109 <param name="rand_int" type="integer" value="11" argument="-S" | |
| 110 label="Random Value Seed" | |
| 111 help="specify an integer" | |
| 112 /> | |
| 113 </section> | |
| 114 <expand macro="global_parameters" /> | |
| 115 </inputs> | |
| 116 <outputs> | |
| 117 <data format_source="in_1" name="summary" from_work_dir="summary" label="${in_1.name} summary of analyses" /> | |
| 118 </outputs> | |
| 119 <tests> | |
| 120 <test> | |
| 121 <param name="in_1" value="plot.csv" /> | |
| 122 <repeat name="field"> | |
| 123 <conditional name="column_text"> | |
| 124 <param name="select" value="string" /> | |
| 125 <param name="in_text" value="2" /> | |
| 126 </conditional> | |
| 127 <param name="analysis_type" value="collapse" /> | |
| 128 </repeat> | |
| 129 <repeat name="field"> | |
| 130 <conditional name="column_text"> | |
| 131 <param name="select" value="string" /> | |
| 132 <param name="in_text" value="3" /> | |
| 133 </conditional> | |
| 134 <param name="analysis_type" value="count" /> | |
| 135 </repeat> | |
| 136 <repeat name="field"> | |
| 137 <conditional name="column_text"> | |
| 138 <param name="select" value="string" /> | |
| 139 <param name="in_text" value="2" /> | |
| 140 </conditional> | |
| 141 <param name="analysis_type" value="countn" /> | |
| 142 </repeat> | |
| 143 <repeat name="field"> | |
| 144 <conditional name="column_text"> | |
| 145 <param name="select" value="string" /> | |
| 146 <param name="in_text" value="3" /> | |
| 147 </conditional> | |
| 148 <param name="analysis_type" value="countunique" /> | |
| 149 </repeat> | |
| 150 <repeat name="field"> | |
| 151 <conditional name="column_text"> | |
| 152 <param name="select" value="string" /> | |
| 153 <param name="in_text" value="2" /> | |
| 154 </conditional> | |
| 155 <param name="analysis_type" value="entropy" /> | |
| 156 </repeat> | |
| 157 <repeat name="field"> | |
| 158 <conditional name="column_text"> | |
| 159 <param name="select" value="string" /> | |
| 160 <param name="in_text" value="3" /> | |
| 161 </conditional> | |
| 162 <param name="analysis_type" value="first" /> | |
| 163 </repeat> | |
| 164 <repeat name="field"> | |
| 165 <conditional name="column_text"> | |
| 166 <param name="select" value="string" /> | |
| 167 <param name="in_text" value="2" /> | |
| 168 </conditional> | |
| 169 <param name="analysis_type" value="last" /> | |
| 170 </repeat> | |
| 171 <repeat name="field"> | |
| 172 <conditional name="column_text"> | |
| 173 <param name="select" value="string" /> | |
| 174 <param name="in_text" value="3" /> | |
| 175 </conditional> | |
| 176 <param name="analysis_type" value="max" /> | |
| 177 </repeat> | |
| 178 <repeat name="field"> | |
| 179 <conditional name="column_text"> | |
| 180 <param name="select" value="string" /> | |
| 181 <param name="in_text" value="2" /> | |
| 182 </conditional> | |
| 183 <param name="analysis_type" value="mean" /> | |
| 184 </repeat> | |
| 185 <repeat name="field"> | |
| 186 <conditional name="column_text"> | |
| 187 <param name="select" value="string" /> | |
| 188 <param name="in_text" value="3" /> | |
| 189 </conditional> | |
| 190 <param name="analysis_type" value="median" /> | |
| 191 </repeat> | |
| 192 <repeat name="field"> | |
| 193 <conditional name="column_text"> | |
| 194 <param name="select" value="string" /> | |
| 195 <param name="in_text" value="2" /> | |
| 196 </conditional> | |
| 197 <param name="analysis_type" value="min" /> | |
| 198 </repeat> | |
| 199 <repeat name="field"> | |
| 200 <conditional name="column_text"> | |
| 201 <param name="select" value="string" /> | |
| 202 <param name="in_text" value="3" /> | |
| 203 </conditional> | |
| 204 <param name="analysis_type" value="prod" /> | |
| 205 </repeat> | |
| 206 <repeat name="field"> | |
| 207 <conditional name="column_text"> | |
| 208 <param name="select" value="string" /> | |
| 209 <param name="in_text" value="2" /> | |
| 210 </conditional> | |
| 211 <param name="analysis_type" value="q1" /> | |
| 212 </repeat> | |
| 213 <repeat name="field"> | |
| 214 <conditional name="column_text"> | |
| 215 <param name="select" value="string" /> | |
| 216 <param name="in_text" value="3" /> | |
| 217 </conditional> | |
| 218 <param name="analysis_type" value="q2" /> | |
| 219 </repeat> | |
| 220 <repeat name="field"> | |
| 221 <conditional name="column_text"> | |
| 222 <param name="select" value="string" /> | |
| 223 <param name="in_text" value="2" /> | |
| 224 </conditional> | |
| 225 <param name="analysis_type" value="q3" /> | |
| 226 </repeat> | |
| 227 <repeat name="field"> | |
| 228 <conditional name="column_text"> | |
| 229 <param name="select" value="string" /> | |
| 230 <param name="in_text" value="3" /> | |
| 231 </conditional> | |
| 232 <param name="analysis_type" value="rand" /> | |
| 233 </repeat> | |
| 234 <repeat name="field"> | |
| 235 <conditional name="column_text"> | |
| 236 <param name="select" value="string" /> | |
| 237 <param name="in_text" value="2" /> | |
| 238 </conditional> | |
| 239 <param name="analysis_type" value="stdev" /> | |
| 240 </repeat> | |
| 241 <repeat name="field"> | |
| 242 <conditional name="column_text"> | |
| 243 <param name="select" value="string" /> | |
| 244 <param name="in_text" value="3" /> | |
| 245 </conditional> | |
| 246 <param name="analysis_type" value="sum" /> | |
| 247 </repeat> | |
| 248 <repeat name="field"> | |
| 249 <conditional name="column_text"> | |
| 250 <param name="select" value="string" /> | |
| 251 <param name="in_text" value="2" /> | |
| 252 </conditional> | |
| 253 <param name="analysis_type" value="uniq" /> | |
| 254 </repeat> | |
| 255 <repeat name="field"> | |
| 256 <conditional name="column_text"> | |
| 257 <param name="select" value="string" /> | |
| 258 <param name="in_text" value="3" /> | |
| 259 </conditional> | |
| 260 <param name="analysis_type" value="variance" /> | |
| 261 </repeat> | |
| 262 <conditional name="group_field"> | |
| 263 <param name="select_group" value="string" /> | |
| 264 <param name="in_text" value="1" /> | |
| 265 </conditional> | |
| 266 <output name="summary" > | |
| 267 <assert_contents> | |
| 268 <has_text text="collapse" /> | |
| 269 <has_text text="count" /> | |
| 270 <has_text text="countn" /> | |
| 271 <has_text text="countunique" /> | |
| 272 <has_text text="entropy" /> | |
| 273 <has_text text="first" /> | |
| 274 <has_text text="last" /> | |
| 275 <has_text text="max" /> | |
| 276 <has_text text="mean" /> | |
| 277 <has_text text="median" /> | |
| 278 <has_text text="min" /> | |
| 279 <has_text text="prod" /> | |
| 280 <has_text text="q1" /> | |
| 281 <has_text text="q2" /> | |
| 282 <has_text text="q3" /> | |
| 283 <has_text text="rand" /> | |
| 284 <has_text text="stdev" /> | |
| 285 <has_text text="sum" /> | |
| 286 <has_text text="uniq" /> | |
| 287 <has_text text="variance" /> | |
| 288 </assert_contents> | |
| 289 </output> | |
| 290 </test> | |
| 291 </tests> | |
| 292 <help><![CDATA[ | |
| 293 | |
| 294 Csvtk - Summary Help | |
| 295 -------------------- | |
| 296 | |
| 297 Info | |
| 298 #### | |
| 299 | |
| 300 Csvtk Summary works to allow the use of a variety of analysis tools on the selected columns(s) and display one output at the end | |
| 301 | |
| 302 .. class:: warningmark | |
| 303 | |
| 304 Single quotes are not allowed in text inputs! | |
| 305 | |
| 306 @HELP_INPUT_DATA@ | |
| 307 | |
| 308 | |
| 309 Usage | |
| 310 ##### | |
| 311 | |
| 312 To run csvtk-summary, all you need is a valid (as defined above) CSV or TSV file with any column(s) that you want to | |
| 313 run one of the analyses on. | |
| 314 | |
| 315 Analyses include: | |
| 316 | |
| 317 - Collapse | |
| 318 | |
| 319 - Count | |
| 320 | |
| 321 - Count Numbers (countn) | |
| 322 | |
| 323 - Count Unique | |
| 324 | |
| 325 - First Value Selection | |
| 326 | |
| 327 - Last Value Selection | |
| 328 | |
| 329 - Maximum | |
| 330 | |
| 331 - Mean | |
| 332 | |
| 333 - Median | |
| 334 | |
| 335 - Minimum | |
| 336 | |
| 337 - q1 | |
| 338 | |
| 339 - q2 | |
| 340 | |
| 341 - q3 | |
| 342 | |
| 343 - Random Value Selection | |
| 344 | |
| 345 - Shannon Entropy | |
| 346 | |
| 347 - Sum | |
| 348 | |
| 349 - Unique Values | |
| 350 | |
| 351 - Variance | |
| 352 | |
| 353 More information on these can be found on the `csvtk website. <https://bioinf.shenwei.me/csvtk/usage/#summary>`_ | |
| 354 | |
| 355 **Example Summary Input** | |
| 356 | |
| 357 Input table: | |
| 358 | |
| 359 +-------+--------+ | |
| 360 | Group | Length | | |
| 361 +=======+========+ | |
| 362 | A | 1500 | | |
| 363 +-------+--------+ | |
| 364 | B | 1000 | | |
| 365 +-------+--------+ | |
| 366 | B | 1500 | | |
| 367 +-------+--------+ | |
| 368 | B | 2000 | | |
| 369 +-------+--------+ | |
| 370 | |
| 371 Suppose you wanted to group the values based on column 1 of the input table and then find out the mean lenght and maximum length for each group. | |
| 372 You would input this into csvtk-summary by creating 2 input repeats where the first one selects "column 2" and an analysis of "mean" and the | |
| 373 second one selects "column 2" with an analysis of "maximum". | |
| 374 | |
| 375 Running this would generate the following output: | |
| 376 | |
| 377 +-------+-------------+------------+ | |
| 378 | Group | Length:mean | Length:max | | |
| 379 +=======+=============+============+ | |
| 380 | A | 1500 | 1500 | | |
| 381 +-------+-------------+------------+ | |
| 382 | B | 1500 | 2000 | | |
| 383 +-------+-------------+------------+ | |
| 384 | |
| 385 -------- | |
| 386 | |
| 387 | |
| 388 @HELP_COLUMNS@ | |
| 389 | |
| 390 | |
| 391 @HELP_END_STATEMENT@ | |
| 392 | |
| 393 | |
| 394 ]]></help> | |
| 395 <expand macro="citations" /> | |
| 396 </tool> |
