comparison w4mclassfilter.xml @ 14:87ec0d3c2266 draft

"planemo upload for repository https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper/tree/master commit a9664e9a04e49d436ebbb643ba1755397ab759dc"
author eschen42
date Fri, 03 Jan 2020 11:07:39 -0500
parents c18040b6e8b9
children f9cb2244fd01
comparison
equal deleted inserted replaced
13:c18040b6e8b9 14:87ec0d3c2266
1 <tool id="w4mclassfilter" name="W4m Data Subset" version="0.98.14"> 1 <tool id="w4mclassfilter" name="W4m Data Subset" version="0.98.18">
2 <description>Filter W4m data by values or metadata</description> 2 <description>Filter W4M data by values or metadata</description>
3 <!-- Here is the hyphenation standard that I *try* to apply consistently in my documentation: http://www.sandranoonan.com/dont-let-hyphenation-drive-crazy/ --> 3 <!-- Here is the hyphenation standard that I *try* to apply consistently in my documentation:
4 https://web.archive.org/web/20161014025757/http://www.sandranoonan.com/dont-let-hyphenation-drive-crazy/
5 -->
4 <requirements> 6 <requirements>
5 <requirement type="package" version="3.6.1">r-base</requirement> 7 <requirement type="package" version="3.6.2">r-base</requirement>
6 <requirement type="package" version="1.1_5">r-batch</requirement> 8 <requirement type="package" version="1.1_5">r-batch</requirement>
7 <requirement type="package" version="0.98.14">w4mclassfilter</requirement> 9 <requirement type="package" version="0.98.18">w4mclassfilter</requirement>
8 </requirements> 10 </requirements>
9 <command detect_errors="aggressive"><![CDATA[ 11 <command detect_errors="aggressive"><![CDATA[
10 unset R_HOME; 12 unset R_HOME;
11 Rscript $__tool_directory__/w4mclassfilter_wrapper.R 13 if [ '$centering' == 'medoid' -a '$imputation' == 'none' ]; then
12 dataMatrix_in '$dataMatrix_in' 14 (echo 'medoid' centering may not be chosen with imputation 'none' 1>&2);
13 sampleMetadata_in '$sampleMetadata_in' 15 [ ! 1 ];
14 variableMetadata_in '$variableMetadata_in' 16 else
15 sampleclassNames '$sampleclassNames' 17 Rscript
16 inclusive '$inclusive' 18 $__tool_directory__/w4mclassfilter_wrapper.R
17 wildcards '$wildcards' 19 dataMatrix_in '$dataMatrix_in'
18 classnameColumn '$classnameColumn' 20 sampleMetadata_in '$sampleMetadata_in'
19 samplenameColumn 'sampleMetadata' 21 variableMetadata_in '$variableMetadata_in'
20 variable_range_filter '$variableRangeFilter' 22 sampleclassNames '$sampleclassNames'
21 transformation '$transformation' 23 inclusive '$inclusive'
22 imputation '$imputation' 24 wildcards '$wildcards'
23 dataMatrix_out '$dataMatrix_out' 25 classnameColumn '$classnameColumn'
24 sampleMetadata_out '$sampleMetadata_out' 26 samplenameColumn 'sampleMetadata'
25 variableMetadata_out '$variableMetadata_out' 27 variable_range_filter '$variableRangeFilter'
26 order_vrbl '$order_vrbl' 28 transformation '$transformation'
27 order_smpl '$order_smpl' 29 imputation '$imputation'
30 dataMatrix_out '$dataMatrix_out'
31 sampleMetadata_out '$sampleMetadata_out'
32 variableMetadata_out '$variableMetadata_out'
33 order_vrbl '$order_vrbl'
34 order_smpl '$order_smpl'
35 centering '$centering';
36 fi
28 ]]></command> 37 ]]></command>
29 <inputs> 38 <inputs>
30 <param name="dataMatrix_in" format="tabular" label="Data matrix file" type="data" 39 <param name="dataMatrix_in" format="tabular" label="Data matrix" type="data"
31 help="data matrix, with sample names in first row and feature names in first column" /> 40 help="Choose data-matrix file (tab-separated values with sample names in first row and feature names in first column)." />
32 <param name="sampleMetadata_in" format="tabular" label="Sample metadata file" type="data" 41 <param name="sampleMetadata_in" format="tabular" label="Sample metadata" type="data"
33 help="sample metadata, with one row per sample" /> 42 help="Choose sample-metadata file (tab-separated values with one row per sample, sample name in first column)." />
34 <param name="variableMetadata_in" format="tabular" label="Variable metadata file" type="data" 43 <param name="variableMetadata_in" format="tabular" label="Variable metadata" type="data"
35 help="variable metadata, with one row per feature" /> 44 help="Choose variable-metadata file (tab-separated values with one row per feature, feature name in first column)." />
36 <param name="classnameColumn" label="Column that names the sample-class" type="text" value = "class" 45 <param name="classnameColumn" label="Column containing the sample-class names (or treatment names)" type="text" value = "class"
37 help="name of the column in sample metadata that has the values to be tested against the 'Names of sample-classes' input parameter"> 46 help="Name the column in 'Sample metadata' that has the values to be referenced by 'Sample-class names' and 'Compute centers for classes'. [default: 'class']">
38 <sanitizer> 47 <sanitizer>
39 <valid initial="string.letters"> 48 <valid initial="string.letters">
40 <add preset="string.digits"/> 49 <add preset="string.digits"/>
41 <add value="&#46;" /> <!-- dot, period --> 50 <add value="&#46;" /> <!-- dot, period -->
42 <add value="&#95;" /> <!-- underscore --> 51 <add value="&#95;" /> <!-- underscore -->
43 </valid> 52 </valid>
44 </sanitizer> 53 </sanitizer>
45 </param> 54 </param>
46 <param name="sampleclassNames" label="Names of sample-classes" type="text" value = "" 55 <param name="sampleclassNames" label="Sample-class names (or patterns)" type="text" value = ""
47 help="comma-separated names (or regular expressions to match names) of sample-classes to filter in or out (Leave empty to match no names.)"> 56 help="List of names (or patterns to match names) of sample classes to include or exclude. List should be comma-separated with no stray space characters. (Leave this empty to match no names.) [default: empty]">
48 <sanitizer> 57 <sanitizer>
49 <valid initial="string.letters"> 58 <valid initial="string.letters">
50 <add preset="string.digits"/> 59 <add preset="string.digits"/>
51 <add value="&#123;" /> <!-- l-cube, left-curly-bracket --> 60 <add value="&#123;" /> <!-- l-curb, left-curly-bracket -->
52 <add value="&#124;" /> <!-- pipe --> 61 <add value="&#124;" /> <!-- pipe -->
53 <add value="&#125;" /> <!-- r-cube, right-curly-bracket --> 62 <add value="&#125;" /> <!-- r-curb, right-curly-bracket -->
54 <add value="&#36;" /> <!-- dollar, dollar-sign --> 63 <add value="&#36;" /> <!-- dollar, dollar-sign -->
55 <add value="&#40;" /> <!-- left-paren --> 64 <add value="&#40;" /> <!-- left-paren -->
56 <add value="&#41;" /> <!-- right-paren --> 65 <add value="&#41;" /> <!-- right-paren -->
57 <add value="&#42;" /> <!-- splat, asterisk --> 66 <add value="&#42;" /> <!-- splat, asterisk -->
58 <add value="&#43;" /> <!-- plus --> 67 <add value="&#43;" /> <!-- plus -->
60 <add value="&#44;" /> <!-- comma --> 69 <add value="&#44;" /> <!-- comma -->
61 <add value="&#46;" /> <!-- dot, period --> 70 <add value="&#46;" /> <!-- dot, period -->
62 <add value="&#58;" /> <!-- colon --> 71 <add value="&#58;" /> <!-- colon -->
63 <add value="&#59;" /> <!-- semi, semicolon --> 72 <add value="&#59;" /> <!-- semi, semicolon -->
64 <add value="&#63;" /> <!-- what, question mark --> 73 <add value="&#63;" /> <!-- what, question mark -->
65 <add value="&#91;" /> <!-- l-squib, left-squre-bracket --> 74 <add value="&#91;" /> <!-- l-squib, left-square-bracket -->
66 <add value="&#92;" /> <!-- whack, backslash --> 75 <add value="&#92;" /> <!-- whack, backslash -->
67 <add value="&#93;" /> <!-- r-squib, right-squre-bracket --> 76 <add value="&#93;" /> <!-- r-squib, right-square-bracket -->
68 <add value="&#94;" /> <!-- hat, caret --> 77 <add value="&#94;" /> <!-- hat, caret -->
69 <add value="&#95;" /> <!-- underscore --> 78 <add value="&#95;" /> <!-- underscore -->
70 </valid> 79 </valid>
71 </sanitizer> 80 </sanitizer>
72 </param> 81 </param>
73 <param name="wildcards" label="Use 'wild cards' or 'regular expressions'" type="select" 82 <param name="inclusive" label="Exclude/include named (or matched) sample classes" type="select" help="Indicate meaning of preceding list: either to identify classes to exclude from output or to identify classes to include in output. [default: 'filter-out']">
74 help="'wild-cards' - use '*' and '?' to match class names; &#160;&#160; 'regular-expressions' - use regular expressions to match class names"> 83 <option value="TRUE">filter-in: &#160;&#160; Include only the named sample classes.</option>
75 <option value="TRUE" selected="true">wild-cards</option> 84 <option value="FALSE" selected="true">filter-out: &#160;&#160; Exclude only the named sample classes.</option>
76 <option value="FALSE">regular-expressions</option>
77 </param> 85 </param>
78 <param name="inclusive" label="Exclude/include named classes" type="select" 86 <param name="wildcards" label="Use 'wild card patterns' or 'regular expression patterns' to match sample-class names" type="select"
79 help="'filter-out' - exclude only the named sample-classes; &#160;&#160; 'filter-in' - include only the named sample-classes"> 87 help="See '&lt;i&gt;Wild-card patterns to match class names&lt;/i&gt;' and '&lt;i&gt;Regular-expression patterns to match sample-class names&lt;/i&gt;' sections below. [default: 'wild-card patterns']">
80 <option value="TRUE">filter-in</option> 88 <option value="TRUE" selected="true">wild-card patterns: &#160;&#160; Use '*' and '?' to match sample-class names.</option>
81 <option value="FALSE" selected="true">filter-out</option> 89 <option value="FALSE">regular-expression patterns: &#160;&#160; Use regular expressions to match sample-class names.</option>
82 </param> 90 </param>
83 <param name="variableRangeFilter" label="Variable-range filters" type="text" value = "" 91 <param name="variableRangeFilter" label="Variable-range filters" type="text" value = ""
84 help="comma-separated filters, each specified as 'variableMetadataColumnName:min:max' (leave empty for no filtering, as described in help below.)"> 92 help="List of filters, each specifying the range of permitted values in a column of 'Variable metadata' (specified as 'column:min:max'), as described in '&lt;i&gt;Variable-range filters&lt;/i&gt;' section below. List should be comma-separated with no stray space characters. (Leave this empty for no filtering.) [default: empty]">
85 <sanitizer> 93 <sanitizer>
86 <valid initial="string.letters"> 94 <valid initial="string.letters">
87 <add preset="string.digits"/> 95 <add preset="string.digits"/>
88 <add value="&#44;" /> <!-- comma --> 96 <add value="&#44;" /> <!-- comma -->
89 <add value="&#46;" /> <!-- dot, period --> 97 <add value="&#46;" /> <!-- dot, period -->
90 <add value="&#58;" /> <!-- colon --> 98 <add value="&#58;" /> <!-- colon -->
91 <add value="&#95;" /> <!-- underscore --> 99 <add value="&#95;" /> <!-- underscore -->
92 </valid> 100 </valid>
93 </sanitizer> 101 </sanitizer>
94 </param> 102 </param>
95 <param name="transformation" label="Data-transformation" type="select" 103 <param name="transformation" label="Data transformation" type="select"
96 help="'none' - do not transform data; &#160;&#160; 'log2' - log base 2 of data; &#160;&#160; 'log10' - log base 10 of data; &#160;&#160; in all cases, negative and missing values are imputed to zero"> 104 help="Choose transformation. In all cases, negative intensities become missing values. See '&lt;i&gt;Data transformation and imputation&lt;/i&gt;' section below. [default: 'none']">
97 <option value="none" selected="true">none</option> 105 <option value="none" selected="true">none: &#160;&#160; Do not transform data.</option>
98 <option value="log2">log2</option> 106 <option value="log2">log2: &#160;&#160; Perform log base 2 transformation of data.</option>
99 <option value="log10">log10</option> 107 <option value="log10">log10: &#160;&#160; Perform log base 10 transformation of data.</option>
100 </param> 108 </param>
101 <param name="imputation" label="Imputation of missing values" type="select" 109 <param name="imputation" label="Imputation of missing values" type="select"
102 help="'zero' - replace missing values with zero; &#160;&#160; 'center' - replace missing values with feature-median; &#160;&#160; 'none' - perform no imputation"> 110 help="Choose imputation for missing values. See '&lt;i&gt;Data transformation and imputation&lt;/i&gt;' section below. [default: 'zero']">
103 <option value="zero" selected="true">zero</option> 111 <option value="zero" selected="true">zero: &#160;&#160; Replace missing values with zero.</option>
104 <option value="center">center</option> 112 <option value="center">center: &#160;&#160; Replace missing values with feature-median.</option>
105 <option value="none">none</option> 113 <option value="none">none: &#160;&#160; Perform no imputation. Note that 'compute centers' cannot be set to 'medoid'.</option>
106 </param> 114 </param>
107 <param name="order_smpl" label="Column that specifies order for samples" type="text" value = "sampleMetadata" 115 <param name="order_smpl" label="Columns that specify order for samples" type="text" value = "sampleMetadata"
108 help="name of the column in sample metadata that is used to sort samples"> 116 help="List of sample-metadata column names for sorting samples. List should be comma-separated with no stray space characters. (This is ignored when 'Compute centers for classes' is set to either 'centroid' or 'median'.) [default: 'sampleMetadata']">
109 <sanitizer> 117 <sanitizer>
110 <valid initial="string.letters"> 118 <valid initial="string.letters">
111 <add preset="string.digits"/> 119 <add preset="string.digits"/>
112 <add value="&#46;" /> <!-- dot, period --> 120 <add value="&#46;" /> <!-- dot, period -->
113 <add value="&#95;" /> <!-- underscore --> 121 <add value="&#95;" /> <!-- underscore -->
122 <add value="&#44;" /> <!-- comma -->
114 </valid> 123 </valid>
115 </sanitizer> 124 </sanitizer>
116 </param> 125 </param>
117 <param name="order_vrbl" label="Column that specifies order for features" type="text" value = "variableMetadata" 126 <param name="order_vrbl" label="Columns that specify order for features" type="text" value = "variableMetadata"
118 help="name of the column in variable metadata that is used to sort features"> 127 help="List of feature-metadata column names for sorting features. List should be comma-separated with no stray space characters. [default: 'variableMetadata']">
119 <sanitizer> 128 <sanitizer>
120 <valid initial="string.letters"> 129 <valid initial="string.letters">
121 <add preset="string.digits"/> 130 <add preset="string.digits"/>
122 <add value="&#46;" /> <!-- dot, period --> 131 <add value="&#46;" /> <!-- dot, period -->
123 <add value="&#95;" /> <!-- underscore --> 132 <add value="&#95;" /> <!-- underscore -->
133 <add value="&#44;" /> <!-- comma -->
124 </valid> 134 </valid>
125 </sanitizer> 135 </sanitizer>
136 </param>
137 <param name="centering" label="Compute centers for classes (e.g., treatments)" type="select" help="[default: 'none']">
138 <option value="none" selected="true">none: &#160;&#160; Do not compute centers for classes/treatments.</option>
139 <option value="centroid">centroid: &#160;&#160; For each class, compute the mean for each feature.</option>
140 <option value="median">median: &#160;&#160; For each class, compute the median for each feature.</option>
141 <option value="medoid">medoid: &#160;&#160; For each class, select only the most central member. Note that 'Imputation of missing values' cannot be 'none'.</option>
126 </param> 142 </param>
127 </inputs> 143 </inputs>
128 <outputs> 144 <outputs>
129 <data name="dataMatrix_out" format="tabular" label="${dataMatrix_in.name}.subset" ></data> 145 <data name="dataMatrix_out" format="tabular" label="${dataMatrix_in.name}.subset" ></data>
130 <data name="sampleMetadata_out" format="tabular" label="${sampleMetadata_in.name}.subset" ></data> 146 <data name="sampleMetadata_out" format="tabular" label="${sampleMetadata_in.name}.subset" ></data>
192 <not_has_text text="HMDB59717" /> 208 <not_has_text text="HMDB59717" />
193 </assert_contents> 209 </assert_contents>
194 </output> 210 </output>
195 </test> 211 </test>
196 <!-- test 2 --> 212 <!-- test 2 -->
197 <test> 213 <test>
198 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/> 214 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
199 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/> 215 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
200 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/> 216 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
201 <!-- test that hyphens in regular expressions work --> 217 <!-- test that hyphens in regular expressions work -->
202 <param name="sampleclassNames" value="HU_[0-9][0-9][0-9]"/> 218 <param name="sampleclassNames" value="HU_[0-9][0-9][0-9]"/>
203 <param name="inclusive" value="TRUE"/> 219 <param name="inclusive" value="TRUE"/>
204 <param name="wildcards" value="FALSE"/> 220 <param name="wildcards" value="FALSE"/>
205 <param name="classnameColumn" value="sampleMetadata"/> 221 <param name="classnameColumn" value="sampleMetadata"/>
206 <!-- test that variableRangeFilter works with tranformation --> 222 <!-- test that variableRangeFilter works with tranformation -->
207 <param name="variableRangeFilter" value="FEATMAX:6.30103:,mz:200:,rt::800"/> 223 <param name="variableRangeFilter" value="FEATMAX:6.30103:,mz:200:,rt::800"/>
208 <param name="transformation" value="log10"/> 224 <param name="transformation" value="log10"/>
209 <param name="imputation" value="zero"/> 225 <param name="imputation" value="zero"/>
210 <output name="dataMatrix_out" md5="5644d2ea01d072ee1d0c40e29e9d0089"> 226 <output name="dataMatrix_out" md5="5644d2ea01d072ee1d0c40e29e9d0089">
211 <assert_contents> 227 <assert_contents>
212 <has_text text="5.8733671" /> 228 <has_text text="5.8733671" />
213 </assert_contents> 229 </assert_contents>
214 </output> 230 </output>
215 <output name="sampleMetadata_out"> 231 <output name="sampleMetadata_out">
216 <assert_contents> 232 <assert_contents>
217 <has_text text="HU_017" /> 233 <has_text text="HU_017" />
516 <param name="classnameColumn" value="gender"/> 532 <param name="classnameColumn" value="gender"/>
517 <param name="sampleclassNames" value="*"/> 533 <param name="sampleclassNames" value="*"/>
518 <param name="wildcards" value="TRUE"/> 534 <param name="wildcards" value="TRUE"/>
519 <param name="inclusive" value="TRUE"/> 535 <param name="inclusive" value="TRUE"/>
520 <param name="imputation" value="none"/> 536 <param name="imputation" value="none"/>
521 <output name="dataMatrix_out" md5="cc9ab8bdb70b68b43b19b7327d285166"> 537 <output name="dataMatrix_out" md5="6200dfa77d09c56e434f80b1a23b3393">
522 <assert_contents> 538 <assert_contents>
523 <not_has_text text="HU_204" /> 539 <not_has_text text="HU_204" />
524 <has_text text="NA" /> 540 <has_text text="NA" />
525 <has_text text="HU_028" /> 541 <has_text text="HU_028" />
526 </assert_contents> 542 </assert_contents>
540 <param name="classnameColumn" value="gender"/> 556 <param name="classnameColumn" value="gender"/>
541 <param name="sampleclassNames" value="*"/> 557 <param name="sampleclassNames" value="*"/>
542 <param name="wildcards" value="TRUE"/> 558 <param name="wildcards" value="TRUE"/>
543 <param name="inclusive" value="TRUE"/> 559 <param name="inclusive" value="TRUE"/>
544 <param name="imputation" value="center"/> 560 <param name="imputation" value="center"/>
545 <output name="dataMatrix_out" md5="75a4802bb8887709e4d4dec8c2c3d3cf"> 561 <output name="dataMatrix_out" md5="a404278c5c9ffd5bdadf346c4f8a0184">
546 <assert_contents> 562 <assert_contents>
547 <not_has_text text="HU_204" /> 563 <not_has_text text="HU_204" />
548 <not_has_text text="NA" /> 564 <not_has_text text="NA" />
549 <has_text text="HU_028" /> 565 <has_text text="HU_028" />
550 </assert_contents> 566 </assert_contents>
554 <not_has_text text="HU_204" /> 570 <not_has_text text="HU_204" />
555 <has_text text="HU_028" /> 571 <has_text text="HU_028" />
556 </assert_contents> 572 </assert_contents>
557 </output> 573 </output>
558 </test> 574 </test>
575 <!-- test 12 - select medoid for class -->
576 <test>
577 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
578 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
579 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
580 <param name="classnameColumn" value="gender"/>
581 <param name="sampleclassNames" value=""/>
582 <param name="wildcards" value="TRUE"/>
583 <param name="inclusive" value="FALSE"/>
584 <param name="imputation" value="zero"/>
585 <param name="order_vrbl" value="rt"/>
586 <param name="order_smpl" value="gender"/>
587 <param name="centering" value="medoid"/>
588 <output name="dataMatrix_out" md5="c91bbfbf30004fa24b05a67ec479bfb1">
589 <assert_contents>
590 <not_has_text text="1013302" />
591 <has_text text="4763576" />
592 <has_text text="2003278" />
593 <has_text text="26222916" />
594 </assert_contents>
595 </output>
596 <output name="sampleMetadata_out">
597 <assert_contents>
598 <not_has_text text="HU_099" />
599 <not_has_text text="HU_185" />
600 <has_text text="HU_110" />
601 <has_text text="HU_078" />
602 </assert_contents>
603 </output>
604 </test>
559 </tests> 605 </tests>
560 <help><![CDATA[ 606 <help><![CDATA[
561 607
562 608
563 **Author** Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu) 609 **Author** Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu)
565 -------------------------------------------------------------------------- 611 --------------------------------------------------------------------------
566 612
567 613
568 **R package** 614 **R package**
569 615
570 The *w4mclassfilter* package (which is used by the W4m Data Subset tool) is available from the Hegeman lab github repository (https://github.com/HegemanLab/w4mclassfilter/releases). 616 The *w4mclassfilter* package (which is used by the W4M Data Subset tool) is available from the Hegeman lab GitHub repository (https://github.com/HegemanLab/w4mclassfilter/releases).
571 617
572 ----------------------------------------------------------------------------------------------------------------------------------------- 618 -----------------------------------------------------------------------------------------------------------------------------------------
573 619
574 620
575 **Tool updates** 621 **Tool updates**
576 622
577 See https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper#news 623 See https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper#news
578 624
579 --------------------------------------------------- 625 ---------------------------------------------------
580 626
581 =========================================================== 627 ======================================================
582 "W4m Data Subset" - Filter Workflow4Metabolomics data files 628 "W4M Data Subset" - Filter Workflow4Metabolomics data
583 =========================================================== 629 ======================================================
584 630
585 ---------- 631 ----------
586 Motivation 632 Motivation
587 ---------- 633 ----------
588 634
589 GC-MS and LC-MS experiments seek to resolve as features chemicals that have distinct chromatographic retention-time ("rt") and (after ionization) mass-to-charge ratio ("m/z" or "mz"). 635 LC-MS metabolomics experiments seek to resolve "features", i.e., species that have distinct chromatographic retention time ("rt") and (after ionization) mass-to-charge ratio ("*m/z*" or "mz").
590 (If the MS protocol includes fragmentation, several features may result for each chemical.) 636 (If a chemical is fragmented or may have a variety of adducts, several features will result.)
591 Data for a sample are collected as MS intensities, each of which is associated with a position on a 2D plane with dimensions of rt and m/z. 637 Data for a sample are collected as mass-spectral intensities, each of which is associated with a position on a 2D plane with dimensions of rt and *m/z*.
592 Ideally, features would be sufficiently reproducible among sample-runs to distinguish features that are commmon among samples from those that differ. 638 Ideally, features would be sufficiently reproducible among sample-runs to distinguish features that are similar among samples from those that differ.
593 639
594 The chromatographic retention-time for a chemical can vary from one chromatography run to the next. 640 For liquid chromatography, the retention time for a species can vary considerably from one chromatography run to the next.
595 Workflow4Metabolomics (W4m, [Giacomoni *et al.*, 2014, Guitton *et al.* 2017]) is a "flavor" of Galaxy that uses the XCMS preprocessing tools for "retention-time correction" to align features among samples. 641 The Workflow4Metabolomics suite of Galaxy tools (W4M, [Giacomoni *et al.*, 2014, Guitton *et al.* 2017])
642 uses the XCMS preprocessing tools [Smith *et al.*, 2006]
643 for "retention-time correction" to align features among samples.
596 Features may be better aligned if pooled samples and blanks are included. 644 Features may be better aligned if pooled samples and blanks are included.
597 645
598 Multivariate statistical techniques may be used to discover clusters of similar samples (Th]]>&#233;<![CDATA[venot *et al.*, 2015). 646 Multivariate statistical tools may be used to discover clusters of similar samples [Th]]>&#233;<![CDATA[venot *et al.*, 2015].
599 However, once retention-time alignment of features has been achieved among samples in GC-MS and LC-MS datasets: 647 However, once retention-time alignment of features has been achieved among samples in LC-MS datasets:
600 648
601 - The presence of pools and blanks may confound identification and separation of clusters. 649 - The presence of pools and blanks may confound identification and separation of sample clusters.
602 - Multivariate statistical algorithms may be impacted by missing values or dimensions that have zero variance. 650 - Multivariate statistical algorithms may be impacted by missing values or dimensions that have zero variance.
603 651
604 ----------- 652 -----------
605 Description 653 Description
606 ----------- 654 -----------
607 655
608 The **W4m Data Subset** tool **selects subsets of samples, features, or data values** for further analysis. 656 The **W4M Data Subset** tool **selects subsets of samples, features, or data values** and **conditions the data** for further analysis.
609 657
610 - The tool takes as input the data matrix, sample metadata, and variable metadata datasets produced by W4m's XCMS [Smith *et al.*, 2006] and CAMERA [Kuhl *et al.*, 2012] tools. 658 - The tool takes as input the *dataMatrix*, *sampleMetadata*, and *variableMetadata* datasets produced by W4M's XCMS and CAMERA [Kuhl *et al.*, 2012] tools.
611 - The tool produces the same trio of output datasets, modified as follows. 659 - The tool produces the same trio of output datasets, modified as described below.
612 660
613 This tool can perform several operations to reduce the number samples or features to be analyzed (although **this should be done only in a statistically sound manner** consistent with the nature of the experiment): 661 This tool can perform several operations to reduce the number samples or features to be analyzed (although *this should be done only in a statistically sound manner* consistent with the nature of the experiment):
614 662
615 - Samples may be eliminated by filtering on a designated “sample class” column in sampleMetadata. 663 - *Sample filtering:* Samples may be selected by designating a "sample class" column in *sampleMetadata* and specifying criteria to include or exclude samples based on the contents of this column.
616 - Features may be eliminated by specifying minimum or maximum value (or both) allowable in columns of variableMetadata. 664 - *Feature filtering:* Features may be selected by specifying minimum or maximum value (or both) allowable in columns of *variableMetadata*.
617 - Features may be eliminated by “range of row-maximum for each feature”, i.e., by specifying minimum or maximum intensity (or both) allowable in each row of the dataMatrix (i.e., for the feature across all samples). 665 - *Intensity filtering:* To exclude minimal features from consideration, a lower bound may be specified for the maximum intensity for a feature across all samples (i.e., for a row in *dataMatrix*).
618 666
619 This tool also performs several operations to address several data issues that may impede downstream statistical analysis: 667 This tool also conditions data for statistical analysis:
620 668
621 - Samples that are missing from either sampleMetadata or dataMatrix are eliminated. 669 - Samples that are missing from either *sampleMetadata* or *dataMatrix* are eliminated.
622 - Features that are missing from either variableMetadata or dataMatrix are eliminated. 670 - Features that are missing from either *variableMetadata* or *dataMatrix* are eliminated.
623 - Features and samples that have zero variance are eliminated. 671 - Features and samples that have zero variance are eliminated.
624 - Samples and features have consistent order in `variableMetadata`, `sampleMetadata`, and `dataMatrix`. 672 - Samples and features are ordered consistently in *variableMetadata*, *sampleMetadata*, and *dataMatrix*.
625 (The column for sorting `variableMetadata` or `sampleMetadata` may be specified.) 673 (The columns for sorting *variableMetadata* or *sampleMetadata* may be specified.)
626 - The names of the first columns of variableMetadata and sampleMetadata are set respectively to "variableMetadata" and "sampleMetadata". 674 - The names of the first columns of *variableMetadata* and *sampleMetadata* are set respectively to "variableMetadata" and "sampleMetadata".
627 - If desired, the values in the dataMatrix may be log-transformed. 675 - If desired, the values in *dataMatrix* may be log-transformed.
628 - If desired, each missing value in dataMatrix is replaced with zero or the median value observed for the corresponding feature. 676 - Negative intensities become missing values (before missing-value replacement is performed).
677 - If desired, each missing value in *dataMatrix* may be replaced with zero or the median value observed for the corresponding feature.
678 - If desired, a "center" for each treatment can be computed in lieu of the samples for that treatment.
629 679
630 This tool may be applied several times sequentially, which may be useful for: 680 This tool may be applied several times sequentially, which may be useful for:
631 681
632 - analyzing subsets of samples for progressively smaller sets of treatment-levels, or 682 - analyzing subsets of samples for progressively smaller sets of treatment levels, or
633 - choosing subsets of samples based on criteria in several columns of the sampleMetadata table. 683 - choosing subsets of samples or features, respectively based on criteria in columns of *sampleMetadata* or *variableMetadata*.
634 684
635 ----------------- 685 -----------------
636 Workflow Position 686 Workflow Position
637 ----------------- 687 -----------------
638 688
643 693
644 ----------- 694 -----------
645 Input files 695 Input files
646 ----------- 696 -----------
647 697
648 +---------------------------+------------+ 698 +------------------------+---------------------------------------+------------+
649 | File | Format | 699 | File | Contents | Format |
650 +===========================+============+ 700 +========================+=======================================+============+
651 | Data matrix | tabular | 701 | Data matrix | per-feature, per-sample intensities | tabular |
652 +---------------------------+------------+ 702 +------------------------+---------------------------------------+------------+
653 | Sample metadata | tabular | 703 | Sample metadata | metadata for samples | tabular |
654 +---------------------------+------------+ 704 +------------------------+---------------------------------------+------------+
655 | Variable metadata | tabular | 705 | Variable metadata | metadata for features | tabular |
656 +---------------------------+------------+ 706 +------------------------+---------------------------------------+------------+
657 707
658 708
659 ---------- 709 ----------
660 Parameters 710 Parameters
661 ---------- 711 ----------
662 712
663 Data matrix file 713 Data matrix
664 | feature x sample **dataMatrix** (tabular separated values) file of the numeric data matrix, with period-character ('.') as decimal, and 'NA' for missing values; the table must not contain metadata apart from the required row and column names; the row and column names must be identical (with regard to both content or order) to the respective rownames of the sample metadata file and variable metadata file 714 | feature x sample **dataMatrix** (tab-separated values) file of the numeric data matrix, with period-character ('.') as decimal, and 'NA' for missing values.
665 | 715 | The file must not contain metadata apart from the required row and column names.
666 716 |
667 Sample metadata file 717
668 | sample x metadata **sampleMetadata** (tabular separated values) file of the numeric and/or character sample metadata, with . as decimal and NA for missing values 718 Sample metadata
669 | 719 | sample x metadata **sampleMetadata** (tab-separated values) file of the numeric and/or character sample metadata, with period-character ('.') as decimal, and 'NA' for missing values.
670 720 |
671 Variable metadata file 721
672 | variable x metadata **variableMetadata** (tabular separated values) file of the numeric and/or character variable metadata, with . as decimal and NA for missing values 722 Variable metadata
673 | 723 | variable x metadata **variableMetadata** (tab-separated values) file of the numeric and/or character variable metadata, with period-character ('.') as decimal, and 'NA' for missing values.
674 724 |
675 Column that names the sample-class (default = '``class``') 725
676 | name of the column in **sampleMetadata** that has the values to be tested against the '``Names of sample-classes``' input parameter; only letters, digits, periods, and underscores are permitted. 726 Column containing the sample-class names (default = '``class``')
677 | 727 | name of the column in **sampleMetadata** that has the values to be tested against the '``Sample-class names``' input parameter or to be referenced by the '``Compute centers for classes``' input parameter.
678 728 | Only letters, digits, periods, and underscores are permitted.
679 Names of sample-classes (default = no names) 729 |
680 | comma-separated names (or regular expressions to match names) of sample-classes to include or exclude 730
681 | 731 Sample-class names (default = no names)
682 732 | names (or regular expressions to match names) of sample-classes to include or exclude
683 'Wild cards' or 'regular expressions' (default = '``wild-cards``') 733 | (Separate names with commas, without any extra space characters.)
684 | '``wild-cards``' - use wild cards to match names of sample-classes (see the 'Wild card patterns to match class-names' section below) 734 |
685 | '``regular-expressions``' - use regular expressions to match the named sample-classes (see the 'Regular expression patterns to match class-names' section below) 735
686 | 736 Exclude/include named (or matched) classes (default = '``filter-out``')
687
688 Exclude/include named classes (default = '``filter-out``')
689 | '``filter-in``' - include only the named sample-classes 737 | '``filter-in``' - include only the named sample-classes
690 | '``filter-out``' - exclude only the named sample-classes 738 | '``filter-out``' - exclude only the named sample-classes
691 | 739 |
692 740
741 Use 'wild card patterns' or 'regular expression patterns' (default = '``wild-card patterns``')
742 | '``wild-card patterns``' - use wild cards to match names of sample-classes (see the *'Wild-card patterns to match class names'* section below.)
743 | '``regular-expression patterns``' - use regular expressions to match the named sample-classes (see the *'Regular-expression patterns to match class names'* section below.)
744 |
745
693 Variable-range filters (default = no filters) 746 Variable-range filters (default = no filters)
694 | comma-separated names of variable-range filters (see the 'Variable-range filters' section below) 747 | variable-range filters (see the *'Variable-range filters'* section below)
695 | 748 | (Separate filter expressions with commas, without any extra space characters.)
696 749 |
697 Data-transformation (default = '``none``') 750
751 Data transformation (default = '``none``')
698 | '``none``' - Do not transform data matrix values. 752 | '``none``' - Do not transform data matrix values.
699 | '``log2``' - Take the log base 2 of the values in the data matrix. 753 | '``log2``' - Take the log base 2 of the values in the data matrix.
700 | '``log10``' - Take the log base 10 of the values in the data matrix. 754 | '``log10``' - Take the log base 10 of the values in the data matrix.
701 | 755 |
702 756 | Note that negative intensities become missing values regardless of the choice made here.
703 Data-imputation (default = '``zero``') 757 |
758
759 Imputation of missing values (default = '``zero``')
704 | '``none``' - Do not impute data matrix values. 760 | '``none``' - Do not impute data matrix values.
705 | '``zero``' - Negative and missing values are imputed to zero. 761 | '``zero``' - Negative and missing values are imputed to zero.
706 | '``center``' - For each feature, negative and missing values are imputed to the median of other values. 762 | '``center``' - For each feature, negative and missing values are imputed to the median of other values.
707 | 763 |
708 764 | Note well: For '``none``' option, '``Compute centers for classes``' cannot be set to '``medoid``'.
709 Column that specifies order for samples (default = 'sampleMetadata') 765 |
710 | name of the column in **sampleMetadata** that is used to sort samples; only letters, digits, periods, and underscores are permitted. 766
711 | 767 Columns that specify order for samples (default = '``sampleMetadata``')
712 768 | names of the columns in **sampleMetadata** that is used to sort samples; only letters, digits, periods, and underscores are permitted.
713 Column that specifies order for features (default = 'variableMetadata') 769 | (Separate column names with commas, without any extra space characters.)
714 | name of the column in **variableMetadata** that is used to sort features; only letters, digits, periods, and underscores are permitted. 770 |
771
772 Columns that specify order for features (default = '``variableMetadata``')
773 | names of the columns in **variableMetadata** that is used to sort features; only letters, digits, periods, and underscores are permitted.
774 | (Separate column names with commas, without any extra space characters.)
775 |
776
777 Compute centers for classes, e.g., treatments (default = '``none``')
778 | '``none``' - Return all samples; do not compute centers for classes/treatments.
779 | '``centroid``' - For each treatment, return only the centroid (the treatment-center computed as the mean intensity for each feature).
780 | '``median``' - For each treatment, return only the treatment-center computed as the median intensity for each feature.
781 | '``medoid``' - For each treatment, return only the medoid (the sample most similar to the other samples for that treatment).
782 |
783 | Note well: For '``medoid``' option, '``Imputation of missing values``' cannot be set to '``none``'.
715 | 784 |
716 785
717 ------------ 786 ------------
718 Output files 787 Output files
719 ------------ 788 ------------
720 789
721 sampleMetadata 790 sampleMetadata
722 | (tabular separated values) file identical to the **sampleMetadata** file given as an input argument, excepting lacking rows for samples that have been filtered out (by the sample-class filter, or because of zero variance, or because they were missing in the input data matrix) 791 | (tab-separated values) file.
792 | If centering is '``none``' or '``medoid``', this will be identical to the **sampleMetadata** file given as an input argument, excepting lacking rows for samples that have been filtered out (by the sample-class filter, or because of zero variance, or because they were missing in the input data matrix)
793 | If centering is '``centroid``' or '``median``', most columns will be replaced with the treatment name and the number of samples for that treatment.
723 | 794 |
724 795
725 variableMetadata 796 variableMetadata
726 | (tabular separated values) file identical to the **variableMetadata** file given as an input argument, excepting lacking rows for variables (xC-MS features) that have been filtered out (by the variable-range filter, or because of zero variance, or because they were missing in the input data matrix) 797 | (tab-separated values) file identical to the **variableMetadata** file given as an input argument, excepting lacking rows for variables (LC-MS features) that have been filtered out (by the variable-range filter, or because of zero variance, or because they were missing in the input data matrix)
727 | 798 |
728 799
729 dataMatrix 800 dataMatrix
730 | (tabular separated values) file identical to the **dataMatrix** file given as an input argument, excepting lacking rows and columns for variables and samples that have been filtered out, respectively 801 | (tab-separated values) file identical to the **dataMatrix** file given as an input argument, excepting lacking rows and columns for variables and samples that have been filtered out, respectively
731 | 802 |
732 803
733 804
734 ----------------------------------------- 805 -----------------------------------------
735 'Wild card' patterns to match class-names 806 Wild-card patterns to match class names
736 ----------------------------------------- 807 -----------------------------------------
737 808
738 W4m Data Subset supports use of R "wild card" patterns to select class-names. 809 W4M Data Subset supports use of "wild card" patterns to select class-names.
739 810
740 - use '``?``' to match a single character 811 - use '``?``' to match a single character
741 - use '``*``' to match zero or more characters 812 - use '``*``' to match zero or more characters
742 - the entire pattern must match the sample name 813 - the entire pattern must match the sample name
743 814
746 - '``??.samp*``' matches '``my.sample``' but not '``my.own.sample``' 817 - '``??.samp*``' matches '``my.sample``' but not '``my.own.sample``'
747 - '``*.sample``' matches '``my.sample``' and '``my.own.sample``' 818 - '``*.sample``' matches '``my.sample``' and '``my.own.sample``'
748 - '``*.sampl``' matches neither '``my.sample``' nor '``my.own.sample``' 819 - '``*.sampl``' matches neither '``my.sample``' nor '``my.own.sample``'
749 820
750 -------------------------------------------------- 821 --------------------------------------------------
751 'Regular expression' patterns to match class-names 822 Regular-expression patterns to match class names
752 -------------------------------------------------- 823 --------------------------------------------------
753 824
754 W4m Data Subset supports use of R "regular expression" patterns to select class-names. 825 W4M Data Subset supports use of R "extended regular expression" patterns to select class-names.
755 826
756 R uses POSIX 1003.2 standard regular expressions, which allow precise pattern-matching and are exhaustively defined at: 827 R extended regular expressions, which allow precise pattern-matching and are exhaustively defined at
757 http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html 828 https://stat.ethz.ch/R-manual/R-devel/library/base/html/regex.html
758 829
759 However, only a few basic building blocks of regular expressions need to be mastered for most cases: 830 However, only a few basic building blocks of regular expressions need to be mastered for most cases:
760 831
761 - '``^``' matches the beginning of a class-name 832 - '``^``' matches the beginning of a class-name
762 - '``$``' matches the end of a class-name 833 - '``$``' matches the end of a class-name
778 - The regular expression '``^[a-z][a-z]12$``' will match the same sample-classes as '``front12,marq12``' 849 - The regular expression '``^[a-z][a-z]12$``' will match the same sample-classes as '``front12,marq12``'
779 - The regular expression '``^[a-z][a-z][0-9]$``' will match the same sample-classes as '``front3,front6,front9,marq3,marq6,marq9``' 850 - The regular expression '``^[a-z][a-z][0-9]$``' will match the same sample-classes as '``front3,front6,front9,marq3,marq6,marq9``'
780 851
781 Second Example: Consider these regular expression patterns as possible matches to a sample-class name '``AB0123``': 852 Second Example: Consider these regular expression patterns as possible matches to a sample-class name '``AB0123``':
782 853
783 - '``^[A-Z][A-Z][0-9][0-9]*$``' - MATCHES '``**^AB0123$**``' 854 - '``^[A-Z][A-Z][0-9][0-9]*$``' MATCHES '``**^AB0123$**``'
784 - '``^[A-Z][A-Z]*[0-9][0-9]*$``' - MATCHES '``**^AB0123$**``' 855 - '``^[A-Z][A-Z]*[0-9][0-9]*$``' MATCHES '``**^AB0123$**``'
785 - '``^[A-Z][0-9]*``' - MATCHES '``**^A** B0123$``' - first character is a letter, '``*``' can specify zero characters, and end of line did not need to be matched. 856 - '``^[A-Z][0-9]*``' MATCHES '``**^A** B0123$``' - first character is a letter, '``*``' can specify zero characters, and end of line did not need to be matched.
786 - '``^[A-Z][A-Z][0-9]``' - MATCHES '``**^AB0** 123$``' - first two characters are letters aind the third is a digit. 857 - '``^[A-Z][A-Z][0-9]``' MATCHES '``**^AB0** 123$``' - first two characters are letters aind the third is a digit.
787 - '``^[A-Z][A-Z]*[0-9][0-9]$``' - NO MATCH - the name does not end with the pattern '``[A-Z][0-9][0-9]$``', i.e., it ends with four digits, not two. 858 - '``^[A-Z][A-Z]*[0-9][0-9]$``' DOES NOT MATCH - the name does not end with the pattern '``[A-Z][0-9][0-9]$``', i.e., it ends with four digits, not two.
788 - '``^[A-Z][0-9]*$``' - NO MATCH - the pattern specifies that second character and all those that follow, if present, must be digits. 859 - '``^[A-Z][0-9]*$``' DOES NOT MATCH - the pattern specifies that second character and all those that follow, if present, must be digits.
789 860
790 ---------------------- 861 ----------------------
791 Variable-range filters 862 Variable-range filters
792 ---------------------- 863 ----------------------
793 864
794 An array of range-specification strings may be supplied in the `variableRangeFilter` 865 An array of range-specification strings may be supplied in the '``Variable-range filters``'
795 argument. If supplied, only features having numerical values in the specified column 866 argument. If supplied, only features having numerical values in the specified column
796 of `variableMetadata` that fall within the specified ranges will be retained 867 of **variableMetadata** that fall within the specified ranges will be retained
797 in the output. Each range is a string of three colon-separated values (e.g., "mz:200:800") in the 868 in the output. Each range is a string of three colon-separated values (e.g., '``mz:200:800``') in the
798 following order: 869 following order:
799 870
800 - the **name of a column of `variableMetadata`** which must have numerical data (only letters, digits, periods, and underscores are permitted in the name itself), e.g., 'mz'; 871 - the **name of a column** of **variableMetadata** which must have numerical data (only letters, digits, periods, and underscores are permitted in the name itself), e.g., '``mz``';
801 - the **minimum allowed value** in that column for the feature to be retained, e.g., '200'; 872 - the **minimum allowed value** in that column for the feature to be retained, e.g., '``200``';
802 - the **maximum allowed value**, e.g., '800'. 873 - the **maximum allowed value**, e.g., '``800``'.
803 874
804 Note for the range specification strings: 875 Note for the range specification strings:
805 876
806 - **If the "maximum" is less than the "minimum", then the range is exclusive** (e.g., "mz:800:200" means retain only features whose mz is NOT in the range 200-800) 877 - **If the "maximum" is less than the "minimum", then the range is exclusive** (e.g., '``mz:800:200``' means retain only features whose mz is NOT in the range 200-800)
807 - **If the name supplied in the first field is 'FEATMAX',** then the string is defining the minimum (and possibly, though less useful, maximum) intensity for each feature in the dataMatrix. For example, "FEATMAX:1e6:" would specify that any feature would be excluded if no sample had an intensity for that feature greater than 1000000. 878 - **If the name supplied in the first field** is '``FEATMAX``', then the string is defining the **threshold for the maximum intensity** for each feature in the dataMatrix.
808 879
809 - Note, however, that when the "maximum" is greater than the "minimum" for the FEATMAX range specification, then the specification is ignored. 880 - For example, '``FEATMAX:1e6:``' would specify that any feature would be excluded if no sample had an intensity for that feature greater than 1,000,000.
881 - Although a maximum may be specified, it seems unlikely that this would be useful. Note that when the "maximum" is less than the "minimum" for the FEATMAX range specification, then the specification is ignored.
810 882
811 ---------------------------------- 883 ----------------------------------
812 Data transformation and imputation 884 Data transformation and imputation
813 ---------------------------------- 885 ----------------------------------
814 886
815 Data may optionally be log2- or log10-transformed. 887 Data may optionally be log2- or log10-transformed.
816 888
817 Negative intensity values are always substituted with zeros. 889 Negative intensities are always substituted with missing values before imputation, even when no transformation is chosen.
818 890
819 Missing intensity data values may optionally be imputed. Missing values may be substituted with zeros (as may be appropriate for univariate analysis) or with the median for the feature (as may be appropriate for multivariate analysis). (Note that the median feature-intensity is computed for the samples *before* variable-range filters are applied.) 891 Missing intensity data values may optionally be imputed. Missing values may be substituted:
892
893 - with zeros (as may be appropriate for univariate analysis)
894 - with the median for the feature (as may be appropriate for multivariate analysis).
895
896 - Note that the median feature-intensity is computed for the samples *before* variable-range filters are applied.
897
898 -----------------------------------------
899 Optional Computation of Treatment Centers
900 -----------------------------------------
901
902 A "center" for each treatment may be computed in lieu of all the samples for each treatment.
903
904 - '``none``' - Return all samples; do not compute centers.
905 - '``centroid``' - For each treatment, return only the centroid (the treatment-center computed as the mean intensity for each feature).
906 - '``median``' - For each treatment, return only the treatment-center computed as the median intensity for each feature.
907 - '``medoid``' - For each treatment, return only the medoid (the sample most similar to the other samples for that treatment). This choice requires that the '``Imputation of missing values``' argument must not be set to '``none``'.
908
909 The medoid is the sample having the smallest sum of its distances from other samples in the treatment:
910
911 - Because principal components are uncorrelated, distances are computed in the space defined by the principal-component scores to minimize the distortion of computed distances by correlated features.
912 - Because principal components are used to compute distances, no missing values are permitted, which is why the '``Imputation of missing values``' argument must not be set to '``none``'.
913 - The distances are used to identify the medoid using code adapted from https://web.archive.org/web/20191231012914/https://www.biostars.org/p/11987/#11989
820 914
821 ----------------------------------------------------------------------------- 915 -----------------------------------------------------------------------------
822 916
823 ---------------- 917 ----------------
824 WORKING EXAMPLES 918 WORKING EXAMPLES
837 +------------------------------------------------------------------------------------------------------------------------------------------------------+ 931 +------------------------------------------------------------------------------------------------------------------------------------------------------+
838 | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/input_variableMetadata.tsv | 932 | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/input_variableMetadata.tsv |
839 +------------------------------------------------------------------------------------------------------------------------------------------------------+ 933 +------------------------------------------------------------------------------------------------------------------------------------------------------+
840 934
841 ------------------------------- 935 -------------------------------
842 Running Without Range-Filtering 936 Example without Range-Filtering
843 ------------------------------- 937 -------------------------------
844 938
845 This example retains only samples whose 'gender' attribute is 'M'. 939 This example retains only samples whose '``gender``' attribute is '``M``'.
846 940
847 **Input parameters** 941 **Input parameters**
848 942
849 +---------------------------------------------+-------------------------------+ 943 +---------------------------------------------+-------------------------------+
850 | Input Parameter | Value | 944 | Input Parameter | Value |
851 +=============================================+===============================+ 945 +=============================================+===============================+
852 | Column that names the sample-class | gender | 946 | Column that names the sample class | gender |
853 +---------------------------------------------+-------------------------------+ 947 +---------------------------------------------+-------------------------------+
854 | Names of sample-classes | M | 948 | Sample-class names | M |
949 +---------------------------------------------+-------------------------------+
950 | Exclude/include named classes | filter-in |
855 +---------------------------------------------+-------------------------------+ 951 +---------------------------------------------+-------------------------------+
856 | Use 'wild-cards' or 'regular expressions' | wild-cards | 952 | Use 'wild-cards' or 'regular expressions' | wild-cards |
857 +---------------------------------------------+-------------------------------+ 953 +---------------------------------------------+-------------------------------+
858 | Exclude/include named classes | filter-in |
859 +---------------------------------------------+-------------------------------+
860 | Variable range-filters | (Leave this field empty.) | 954 | Variable range-filters | (Leave this field empty.) |
861 +---------------------------------------------+-------------------------------+ 955 +---------------------------------------------+-------------------------------+
862 | Data transforamtion | none | 956 | Data transformation | none |
863 +---------------------------------------------+-------------------------------+ 957 +---------------------------------------------+-------------------------------+
864 | Missing-value imputation | center | 958 | Missing-value imputation | center |
865 +---------------------------------------------+-------------------------------+ 959 +---------------------------------------------+-------------------------------+
866 | Sample-sort column | sampleMetadata | 960 | Sample-sort column | sampleMetadata |
867 +---------------------------------------------+-------------------------------+ 961 +---------------------------------------------+-------------------------------+
868 | Feature-sort column | variableMetadata | 962 | Feature-sort column | variableMetadata |
963 +---------------------------------------------+-------------------------------+
964 | Compute centers for classes | none |
869 +---------------------------------------------+-------------------------------+ 965 +---------------------------------------------+-------------------------------+
870 966
871 **Expected outputs** 967 **Expected outputs**
872 968
873 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+ 969 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+
879 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+ 975 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+
880 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/expected_variableMetadata.tsv | 976 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/expected_variableMetadata.tsv |
881 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+ 977 +-------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+
882 978
883 ---------------------------- 979 ----------------------------
884 Running With Range-Filtering 980 Example with Range-Filtering
885 ---------------------------- 981 ----------------------------
886 982
887 This example retains only features whose mz is greater than 200, whose rt is less than 800, and whose maximum intensity across all samples is 2,000,000. 983 This example retains only features whose ``mz`` is greater than 200, whose ``rt`` is less than 800, and whose maximum intensity across all samples is 2,000,000.
888 This example retains all samples (except those having zero variance for all feature), although it would be possible to filter on samples as well. 984 This example retains all samples (except those having zero variance for all feature), although it would be possible to filter on samples as well.
889 985
890 **Input parameters** 986 **Input parameters**
891 987
892 +---------------------------------------------+-----------------------------------+ 988 +---------------------------------------------+-----------------------------------+
893 | Input Parameter | Value | 989 | Input Parameter | Value |
894 +=============================================+===================================+ 990 +=============================================+===================================+
895 | Column that names the sample-class | sampleMetadata | 991 | Column that names the sample class | sampleMetadata |
896 +---------------------------------------------+-----------------------------------+ 992 +---------------------------------------------+-----------------------------------+
897 | Names of sample-classes | HU_13[48] | 993 | Sample-class names | HU_13[48] |
994 +---------------------------------------------+-----------------------------------+
995 | Exclude/include named classes | filter-out |
898 +---------------------------------------------+-----------------------------------+ 996 +---------------------------------------------+-----------------------------------+
899 | Use 'wild-cards' or 'regular expressions' | regular-expressions | 997 | Use 'wild-cards' or 'regular expressions' | regular-expressions |
900 +---------------------------------------------+-----------------------------------+ 998 +---------------------------------------------+-----------------------------------+
901 | Exclude/include named classes | filter-out |
902 +---------------------------------------------+-----------------------------------+
903 | Variable range-filters | FEATMAX:20.93157:,mz:200:,rt::800 | 999 | Variable range-filters | FEATMAX:20.93157:,mz:200:,rt::800 |
904 +---------------------------------------------+-----------------------------------+ 1000 +---------------------------------------------+-----------------------------------+
905 | Data transforamtion | log2 | 1001 | Data transformation | log2 |
906 +---------------------------------------------+-----------------------------------+ 1002 +---------------------------------------------+-----------------------------------+
907 | Missing-value imputation | zero | 1003 | Missing-value imputation | zero |
908 +---------------------------------------------+-----------------------------------+ 1004 +---------------------------------------------+-----------------------------------+
909 | Sample-sort column | sampleMetadata | 1005 | Sample-sort column | sampleMetadata |
910 +---------------------------------------------+-----------------------------------+ 1006 +---------------------------------------------+-----------------------------------+
911 | Feature-sort column | variableMetadata | 1007 | Feature-sort column | variableMetadata |
1008 +---------------------------------------------+-----------------------------------+
1009 | Compute centers for classes | none |
912 +---------------------------------------------+-----------------------------------+ 1010 +---------------------------------------------+-----------------------------------+
913 1011
914 **Expected outputs** 1012 **Expected outputs**
915 1013
916 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ 1014 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
920 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ 1018 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
921 | Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/rangefilter_sampleMetadata.tsv | 1019 | Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/rangefilter_sampleMetadata.tsv |
922 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ 1020 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
923 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/rangefilter_variableMetadata.tsv | 1021 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/tools/w4mclassfilter/test-data/rangefilter_variableMetadata.tsv |
924 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+ 1022 +-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------+
1023
1024 --------------------------------
1025 Example with Treatment-Centering
1026 --------------------------------
1027
1028 This example retains only the samples that are medoids for their gender.
1029
1030 **Input parameters**
1031
1032 +---------------------------------------------+-----------------------------------+
1033 | Input Parameter | Value |
1034 +=============================================+===================================+
1035 | Column that names the sample class | gender |
1036 +---------------------------------------------+-----------------------------------+
1037 | Sample-class names | (Leave this field empty.) |
1038 +---------------------------------------------+-----------------------------------+
1039 | Exclude/include named classes | filter-out |
1040 +---------------------------------------------+-----------------------------------+
1041 | Use 'wild-cards' or 'regular expressions' | wild-cards |
1042 +---------------------------------------------+-----------------------------------+
1043 | Variable range-filters | (Leave this field empty.) |
1044 +---------------------------------------------+-----------------------------------+
1045 | Data transformation | none |
1046 +---------------------------------------------+-----------------------------------+
1047 | Missing-value imputation | zero |
1048 +---------------------------------------------+-----------------------------------+
1049 | Sample-sort column | gender |
1050 +---------------------------------------------+-----------------------------------+
1051 | Feature-sort column | rt |
1052 +---------------------------------------------+-----------------------------------+
1053 | Compute centers for classes | medoid |
1054 +---------------------------------------------+-----------------------------------+
1055
1056 **Expected outputs**
1057
1058 +-------------------+----------------------------------------------------------------------------------------------------------+
1059 | Expected Output | Download from URL |
1060 +===================+==========================================================================================================+
1061 | Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/exp_cent_medoid_dm.tsv |
1062 +-------------------+----------------------------------------------------------------------------------------------------------+
1063 | Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/exp_cent_medoid_sm.tsv |
1064 +-------------------+----------------------------------------------------------------------------------------------------------+
1065 | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter/master/tests/testthat/exp_cent_medoid_vm.tsv |
1066 +-------------------+----------------------------------------------------------------------------------------------------------+
925 ]]></help> 1067 ]]></help>
926 <citations> 1068 <citations>
927 <!-- Giacomoni_2014 W4m 2.5 --> 1069 <!-- Giacomoni_2014 W4M 2.5 -->
928 <citation type="doi">10.1093/bioinformatics/btu813</citation> 1070 <citation type="doi">10.1093/bioinformatics/btu813</citation>
929 <!-- Guitton_2017 W4m 3.0 --> 1071 <!-- Guitton_2017 W4M 3.0 -->
930 <citation type="doi">10.1016/j.biocel.2017.07.002</citation> 1072 <citation type="doi">10.1016/j.biocel.2017.07.002</citation>
931 <!-- Kuhl_2012 CAMERA --> 1073 <!-- Kuhl_2012 CAMERA -->
932 <citation type="doi">10.1021/ac202450g</citation> 1074 <citation type="doi">10.1021/ac202450g</citation>
933 <!-- Smith_2006 XCMS --> 1075 <!-- Smith_2006 XCMS -->
934 <citation type="doi">10.1021/ac051437y</citation> 1076 <citation type="doi">10.1021/ac051437y</citation>