ramclustr: macros.xml comparison

comparison macros.xml @ 0:36104baf75da draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ramclustr commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"

author	recetox
date	Tue, 22 Mar 2022 16:09:16 +0000
parents
children	9a0d83c1b4b3

comparison

equal deleted inserted replaced

--1:000000000000
+:36104baf75da
+<macros>
+<token name="@TOOL_VERSION@">1.2.2</token>
+<xml name="creator">
+<creator>
+<person
+givenName="Helge"
+familyName="Hecht"
+url="https://github.com/hechth"
+identifier="0000-0001-6744-996X" />
+<person
+givenName="Maksym"
+familyName="Skoryk"
+url="https://github.com/maximskorik"
+identifier="0000-0003-2056-8018" />
+<person
+givenName="Matej"
+familyName="Troják"
+url="https://github.com/xtrojak"
+identifier="0000-0003-0841-2707" />
+<person
+givenName="Martin"
+familyName="Čech"
+url="https://github.com/martenson"
+identifier="0000-0002-9318-1781" />
+<organization
+url="https://www.recetox.muni.cz/"
+email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+name="RECETOX MUNI"/>
+</creator>
+</xml>
+<xml name="parameters_csv">
+<section name="ms_csv" title="Input MS Data as CSV" expanded="true">
+<param label="Input CSV" name="ms" type="data" format="csv"
+help="Features as columns, rows as samples. Column header in format mz_rt."/>
+<param label="idMSMS" name="idmsms" type="data" format="csv" optional="true"
+help="Optional idMSMS / MSe csv data. Same dimension and names as in input CSV are required."/>
+</section>
+</xml>
+<xml name="parameters_xcms">
+<section name="xcms" title="Input MS Data as XCMS" expanded="true">
+<param name="input_xcms" label="Input XCMS" type="data" format="rdata.xcms.fillpeaks"
+help="Grouped feature data for clustering." />
+<param label="Preserve phenotype" name="usePheno" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true"
+help="Transfer phenotype data from XCMS object to Spec abundance file."/>
+</section>
+</xml>
+<xml name="parameters_required">
+<param label="Sigma r" name="sr" type="float" value="0.5" help="Correlational similarity between features."/>
+<param label="Correlation method" name="cor_method" type="select" display="radio"
+help="Choose correlational method to be used - see [1] for details.">
+<option value="pearson" selected="true">pearson</option>
+<option value="everything">everything</option>
+<option value="spearman">spearman</option>
+<option value="kendall">kendall</option>
+</param>
+<param label="Maximum RT difference" name="maxt" value="60" type="float"
+help="Maximum difference to calculate RT similarity - values beyond this are assigned zero similarity."/>
+</xml>
+<xml name="main_parameters">
+<section name="clustering" title="Clustering" expanded="true">
+<param label="Clustering linkage method" name="linkage" type="select" display="radio"
+help="Choose hierarchical clustering linkage method - see [2] for details.">
+<option value="average" selected="true">average</option>
+<option value="ward.D">ward.D</option>
+<option value="ward.D2">ward.D2</option>
+<option value="single">single</option>
+<option value="complete">complete</option>
+<option value="mcquitty">mcquitty</option>
+<option value="median">median</option>
+<option value="centroid">centroid</option>
+</param>
+<param label="Minimal cluster size" name="minModuleSize" type="integer" value="2"
+help="Minimal size (number of features) of a cluster."/>
+<param label="Maximal tree height" name="hmax" type="float" value="0.3"
+help="Cut the Hierarchical Cluster Analysis tree at this height, see [3] for details."/>
+<param label="Use deepSplit" name="deepSplit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
+help="Check to produce more smaller clusters, uncheck for fewer bigger clusters, see [3] for details."/>
+</section>
+<section name="normalisation" title="Normalisation" expanded="true">
+<conditional name="normalisation_method">
+<param label="Normalisation method" name="normalize" type="select" display="radio"
+help="Choose method for normalization of feature intensities.">
+<option value="none" selected="true">none</option>
+<option value="TIC">TIC</option>
+<option value="quantile">quantile</option>
+<option value="batch.qc">batch.qc</option>
+</param>
+<when value="batch.qc">
+<param label="Metadata details" name="batch_order_qc" type="data" format="csv" optional="true"
+help="CSV with sample names (or indices, currently not handled) on rows and columns with:
+batch number ('batch'), position in sequence ('order'), and whether it is a QC sample or not
+('qc' with true/false OR 'sampleType' with 'sample/qc/blank')."/>
+<param label="QC injection range" name="qc_inj_range" type="integer" value="20"
+help="How many injections around each injection are to be scanned for presence of QC samples?
+A good rule of thumb is between 1 and 3 times the typical
+injection span between QC injections. i.e. if you inject QC ever 7 samples, set this to
+between 7 and 21. Smaller values provide more local precision but make normalization sensitive
+to individual poor outliers (though these are first removed using the boxplot function outlier
+detection), while wider values provide less local precision in normalization but better
+stability to individual peak areas."/>
+</when>
+</conditional>
+</section>
+<section name="performance" title="Performance">
+<param label="Blocksize" name="blocksize" type="integer" value="2000"
+help="Number of features processed in one block."/>
+<param label="Blocksize factor" name="mult" type="integer" value="5"
+help="Factor to scale blocksize to influence processing speed."/>
+</section>
+<section name="msp_output_details" title="MSP output">
+<param label="Merge MSP Files" name="merge_msp" type="boolean" truevalue="TRUE" falsevalue="FALSE"
+checked="true" help="Merge all MSP in one file or export one MSP per spectra."/>
+<param label="m/z decimal places" name="mzdec" type="integer" value="6"
+help="Number of decimal places used in printing m/z values."/>
+<!--
+Currently not forwarded because the MSP is exported always manually afterwards
+<param label="mspout" name="mspout" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" help="write msp formatted spectra to file?" />
+-->
+</section>
+<section name="extras" title="Extras">
+<param label="RT only low n" name="rt_only_low_n" type="boolean" truevalue="TRUE" falsevalue="FALSE"
+checked="true"
+help="At low injection numbers, correlational relationships of peak intensities may be unreliable.
+By default, RAMClustR will simply ignore the correlational Sigma r value and cluster on retention time alone.
+If you wish to use correlation with at n less than 5, set this value to FALSE."/>
+<param label="Replace zeros" name="replace_zeros" type="boolean" truevalue="TRUE" falsevalue="FALSE"
+checked="true"
+help="NA, NaN, and Inf values are replaced with zero, and zero values are sometimes returned from
+peak peaking. When TRUE, zero values will be replaced with a small amount of noise, with noise level
+set based on the detected signal intensities for that feature."/>
+<param label="Experimental design metadata" name="ExpDes" type="data" format="csv" optional="true"
+help="Definition of experimental design in CSV format." />
+</section>
+</xml>
+<xml name="output_msp">
+<collection label="Mass spectra from ${tool.name} on ${on_string}" name="mass_spectra_collection" type="list">
+<discover_datasets pattern="__name_and_ext__" directory="spectra" recurse="true" ext="msp"/>
+<filter>not msp_output_details['merge_msp']</filter>
+</collection>
+<data label="Mass spectra from ${tool.name} on ${on_string}" name="mass_spectra_merged" format="msp">
+<filter>msp_output_details['merge_msp']</filter>
+</data>
+</xml>
+<xml name="citations">
+<citations>
+<!-- Example of annotating a citation using a BibTex entry. -->
+<citation type="bibtex">
+@article{Broeckling2014e,
+abstract = {Metabolomic data are frequently acquired using chromatographically coupled mass spectrometry
+(MS) platforms. For such datasets, the first step in data analysis relies on feature detection, where a
+feature is defined by a mass and retention time. While a feature typically is derived from a single
+compound, a spectrum of mass signals is more a more-accurate representation of the mass spectrometric
+signal for a given metabolite. Here, we report a novel feature grouping method that operates in an
+unsupervised manner to group signals from MS data into spectra without relying on predictability of the
+in-source phenomenon. We additionally address a fundamental bottleneck in metabolomics, annotation of MS
+level signals, by incorporating indiscriminant MS/MS (idMS/MS) data implicitly: feature detection is
+performed on both MS and idMS/MS data, and feature-feature relationships are determined simultaneously
+from the MS and idMS/MS data. This approach facilitates identification of metabolites using in-source MS
+and/or idMS/MS spectra from a single experiment, reduces quantitative analytical variation compared to
+single-feature measures, and decreases false positive annotations of unpredictable phenomenon as novel
+compounds. This tool is released as a freely available R package, called RAMClustR, and is sufficiently
+versatile to group features from any chromatographic-spectrometric platform or feature-finding software.
+{\textcopyright} 2014 American Chemical Society.},
+author = {Broeckling, C. D. and Afsar, F. A. and Neumann, S. and Ben-Hur, A. and Prenni, J. E.},
+doi = {10.1021/ac501530d},
+issn = {15206882},
+journal = {Analytical Chemistry},
+number = {14},
+pages = {6812--6817},
+pmid = {24927477},
+title = {{RAMClust: A novel feature clustering method enables spectral-matching-based annotation for
+metabolomics data}},
+volume = {86},
+year = {2014}
+}
+</citation>
+</citations>
+</xml>
+<token name="@HELP@">
+<![CDATA[
+Documentation
+For documentation on the tool see https://github.com/cbroeckl/RAMClustR/blob/master/vignettes/RAMClustR.Rmd
+Upstream Tools
++------------------------------+-------------------------------+----------------------+---------------------+
+| Name                         | Output File                   | Format               | Parameter           |
++==============================+===============================+======================+=====================+
+| xcms                         | xset.fillPeaks.RData          | rdata.xcms.fillpeaks | xcmsObj             |
++------------------------------+-------------------------------+----------------------+---------------------+
+| RAMClustR define experiment  | Table with experiment details | csv                  | Experimental design |
++------------------------------+-------------------------------+----------------------+---------------------+
+The tool takes an **xcmsSet** object as input and extracts all relevant information.
++-------+------------------------+--------+------------+
+| Name  | Output File            | Format | Parameter  |
++=======+========================+========+============+
+| ???   | Feature Table with MS1 | csv    | ms         |
++-------+------------------------+--------+------------+
+| ???   | Feature Table with MS2 | csv    | idmsms     |
++-------+------------------------+--------+------------+
+Alternatively, the tool takes a **csv** table as input which has to fulfill the following requirements
+(1) no more than one sample (or file) name column and one feature name row;
+(2) feature names that contain the mass and retention times, separated by a constant delimiter; and
+(3) features in columns and samples in rows.
++----------------------+-------------------+-------------------+--------------------+--------------------+
+| sample               |    100.88_262.464 |    100.01_423.699 |    100.003_128.313 |   100.0057_154.686 |
++======================+===================+===================+====================+====================+
+| 10_qc_16x_dil_milliq |    0              |    195953.6376	   |     0              |   0                |
++----------------------+-------------------+-------------------+--------------------+--------------------+
+| 11_qc_8x_dil_milliq  |    0              |    117742.1828    |    4247300.664     |   0                |
++----------------------+-------------------+-------------------+--------------------+--------------------+
+| 12_qc_32x_dil_milliq |    4470859.38     |    0              |    2206092.112     |   0                |
++----------------------+-------------------+-------------------+--------------------+--------------------+
+| 15_qc_16x_dil_milliq |    0              |    0              |    2767477.481     |   0                |
++----------------------+-------------------+-------------------+--------------------+--------------------+
+Downstream Tools
+The output is a msp file or a collection of msp files, with additional Spec Abundance file.
++---------+--------------+----------------------+
+| Name    | Output File  | Format               |
++=========+==============+======================+
+| matchMS | Mass Spectra | collection (tgz/msp) |
++---------+--------------+----------------------+
+@GENERAL_HELP@
+]]>
+</token>
+<token name="@GENERAL_HELP@">
+Background
+Metabolomics
+Metabolomics is frequently performed using chromatographically coupled mass spectrometry, with gas
+chromatography, liquid chromatography, and capillary electrophoresis being the most frequently utilized
+methods of separation. The coupling of chromatography to mass spectrometry is enabled with an
+appropriate ionization source - electron impact (EI) for gas phase separations and electrospray
+ionization (ESI) for liquid phase separations. XCMS is a commonly used tool to detect all the signals
+from a metabolomics dataset, generating aligned features, where a feature is represented by a mass and
+retention time. Each feature is presumed to derive from a single compound. However, each compound is
+represented by several features. With any ionization method, isotopic peaks will be observed reflective
+of the elemental composition of the analyte. In EI, fragmentation is a byproduct of ionization, and has
+driven the generation of large mass spectral libraries. In ESI, in-source fragmentation frequently
+occurs, the magnitude of which is compound dependent, with more labile compounds being more prone to
+in-source fragmentation. ESI can also product multiple adduct forms (protonated, potassiated, sodiated,
+ammoniated...), and can produce multimers (i.e. [2M+H]+, [3M+K]+, etc) and multiple charged species
+([M+2H]++). This can become further complicated by considering combinations of these phenomena. For
+example [2M+3H]+++ (triply charged dimer) or an in-source fragment of a dimer.
+RAMClustR approach
+RAMClustR was designed to group features designed from the same compound using an approach which is
+**1.** unsupervised, **2.** platform agnostic, and **3.** devoid of curated rules, as the depth of
+understanding of these processes is insufficient to enable accurate curation/prediction of all phenomenon
+that may occur. We achieve this by making two assumptions. The first is that two features derived
+from the same compound with have (approximately) the same retention time. The second is that two
+features derived from the same compound will have (approximately) the same quantitative trend across
+all samples in the xcms sample set. From these assumptions, we can calculate a retention time
+similarity score and a correlational similarity score for each feature pair. A high similarity score
+for both retention time and correlation indicates a strong probability that two features derive from
+the same compound. Since both conditions must be met, the product of the two similarity scores provides
+the best approximation of the total similarity score - i.e. a feature pair with retention time similarity
+of 1 and correlational similarity of 0 is unlikely to derive from one compound - 1 x 0 = 0, the final
+similarity score is zero, indicating the two features represent two different compounds. Similarly, a
+feature pair with retention time similarity of 0 and correlational similarity of 1 is unlikely to derive
+from one compound - 0 x 1 = 0. Alternatively - a feature pair with retention time similarity of 1 and
+correlational similarity of 1 is likely to derive from one compound - 1 x 1 = 1.
+The RAMClustR algorithm is built on creating similarity scores for all pairs of features, submitting
+this score matrix for hierarchical clustering, and then cutting the resulting dendrogram into neat
+chunks using the dynamicTreeCut package - where each 'chunk' of the dendrogram results in a group of
+features likely to be derived from a single compound. Importantly, this is achieved without looking for
+specific phenomenon (i.e. sodiation), meaning that grouping can be performed on any dataset, whether it
+is positive or negative ionization mode, EI or ESI, LC-MS GC-MS or CE-MS, in-source fragment or complex
+adduction event, and predictable or unpredictable signals.
+</token>
+<token name="@HELP_experiment@">
+<![CDATA[
+Create an Experimental Design specification for RAMClustR experiment.
+Downstream Tools
++-----------+-----------------------+--------+
+| Name      | Output File           | Format |
++===========+=======================+========+
+| RAMClustR | Experiment definition | csv    |
++-----------+-----------------------+--------+
+]]>
+</token>
+</macros>

Mercurial > repos > recetox > ramclustr

comparison macros.xml @ 0:36104baf75da draft