Mercurial > repos > prog > lcmsmatching

diff lcmsmatching.xml @ 6:f86fec07f392 draft default tip
planemo upload commit c397cd8a93953798d733fd62653f7098caac30ce
author: prog
date: Fri, 22 Feb 2019 16:04:22 -0500
parents: fb9c0409d85c
--- a/lcmsmatching.xml	Wed Apr 19 10:00:05 2017 -0400
+++ b/lcmsmatching.xml	Fri Feb 22 16:04:22 2019 -0500
@@ -1,43 +1,38 @@
-<tool id="lcmsmatching" name="LC/MS matching" version="3.3.1" profile="16.01">
+<!-- vi: se fdm=marker : -->
+<tool id="lcmsmatching" name="LCMS matching" version="4.0.2" profile="18.05">
 
-	<description>Annotation of MS peaks using matching on a spectra database.</description>
+	<description>Annotation of LCMS peaks using matching on a in-house spectra database or on PeakForest spectra database.</description>
 
+	<!-- Requirements {{{1 -->
+	<!-- **************************************************************** -->
 	<requirements>
-		<!--<requirement type="package" version="3.3.3">r</requirement>-->
-		<requirement type="package" version="7.0">readline</requirement> <!-- Try readline 7.0 -->
-		<requirement type="package" version="1.20.0">r-getopt</requirement>
-		<requirement type="package" version="1.0.0">r-stringr</requirement>
-		<requirement type="package" version="1.8.3">r-plyr</requirement>
-		<requirement type="package" version="3.98">r-xml</requirement>
-		<requirement type="package" version="1.0_6">r-bitops</requirement>
-		<requirement type="package" version="1.95">r-rcurl</requirement>
-		<requirement type="package" version="1.1">r-jsonlite</requirement>
+		<requirement type="package" version="1.2.2">r-biodb</requirement>
+		<requirement type="package" version="1.20.2">r-getopt</requirement>
+		<requirement type="package" version="0.2_15">r-codetools</requirement> <!-- R_VERSION="0.2-15" IMPORTANT Do not remove, used by travis_install_deps.sh script. --> <!-- codetools package is needed because of the following error when running Galaxy on Travis-CI in planemo tests: "code for methods in class “HtmlWriter” was not checked for suspicious field assignments (recommended package ‘codetools’ not available?)". -->
+		
 	</requirements>
 
-	<code file="list-chrom-cols.py"/>
-	<code file="list-file-cols.py"/>
-	<code file="list-ms-mode-values.py"/>
-
-	<!--=======
-	= COMMAND =
-	========-->
+	<!-- Command {{{1 -->
+	<!-- **************************************************************** -->
 
 	<command>
 		<![CDATA[
 		## @@@BEGIN_CHEETAH@@@
-		$__tool_directory__/search-mz
+		$__tool_directory__/lcmsmatching
+
+		--log-to-stdout
 
 		## Input file
 		-i "$mzrtinput"
-		--input-col-names "mz=$inputmzfield,rt=$inputrtfield"
+		--input-col-names "$inputfields"
 		--rtunit "$inputrtunit"
 
 		## Database
 		#if $db.dbtype == "inhouse"
 			-d file
-			--db-fields "mztheo=$db.dbmzreffield,chromcolrt=$db.dbchromcolrtfield,compoundid=$db.dbspectrumidfield,chromcol=$db.dbchromcolfield,msmode=$db.dbmsmodefield,peakattr=$db.dbpeakattrfield,pubchemcompid=$db.dbpubchemcompidfield,chebiid=$db.dbchebiidfield,hmdbid=$db.dbhmdbidfield,keggid=$db.dbkeggidfield"
-			--db-ms-modes "pos=$db.dbmsposmode,neg=$db.dbmsnegmode"
-			--db-rt-unit $db.dbrtunit
+			--db-fields "$db.dbfields"
+			--db-ms-modes "$db.dbmsmodes"
+			--db-rt-unit "$db.dbrtunit"
 		#end if
 		#if $db.dbtype == "peakforest"
 			-d peakforest
@@ -46,7 +41,7 @@
 			--url "$db.dburl"
 
 		## M/Z matching
-		-m $mzmode -p $mzprec -s $mzshift
+		-m $mzmode -p $mzprec -s $mzshift -u $mztolunit
 
 		## Precursor matching
 		#if $prec.match == "true"
@@ -72,14 +67,13 @@
 		## @@@END_CHEETAH@@@
 	]]></command>
 
-	<!--======
-	= INPUTS =
-	=======-->
+	<!-- Inputs {{{1 -->
+	<!-- **************************************************************** -->
 
 	<inputs>
 
-		<!-- DATABASE -->
-
+		<!-- Database {{{2 -->
+		<!-- **************************************************************** -->
 		<conditional name="db">
 
 			<param name="dbtype" label="Database" type="select" refresh_on_change="true">
@@ -87,25 +81,17 @@
 				<option value="peakforest">Peakforest</option>
 			</param>
 
+			<!-- In-house database parameters {{{3 -->
+			<!-- **************************************************************** -->
 			<when value="inhouse">
 				<!-- Database file -->
 				<param name="dburl" label="Database file" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. Retention time values must be in seconds."/>
 
 				<!-- File database field names -->
-				<param name="dbspectrumidfield" type="select" label="Database file Spectrum ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'spectrumid,accession,compoundid,molid')" help="Select the Spectrum ID column of the database file."/>
-				<param name="dbmzreffield" type="select" label="Database file Reference MZ column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'mztheo,mzexp,mz')" help="Select the Reference MZ column of the database file."/>
-				<param name="dbchromcolfield" type="select" label="Database file Chromatographic Column Name column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chromcol,col')" help="Select the Chromatographic Column Name column of the database file." refresh_on_change="true"/>
-				<param name="dbchromcolrtfield" type="select" label="Database file Chromatographic Column Retention Time column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chromcolrt,colrt,rt')" help="Select the Chromatographic Column Retention Time column of the database file."/>
-				<param name="dbmsmodefield" type="select" label="Database file MS Mode column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'msmode,mode')" help="Select the MS Mode column of the database file." refresh_on_change="true"/>
-				<param name="dbpeakattrfield" type="select" label="Database file Peak Attribution column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'peakattr,attr')" help="Select the Peak Attribution column of the database file."/>
-				<param name="dbpubchemcompidfield" type="select" label="Database file PubChem Compound ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'pubchemcompid,pubchemid,pubchemcomp,pubchem')" help="Select the PubChem Compound ID column of the database file."/>
-				<param name="dbchebiidfield" type="select" label="Database file ChEBI ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'chebiid,chebi')" help="Select the ChEBI ID column of the database file."/>
-				<param name="dbhmdbidfield" type="select" label="Database file HMDB Metabolite ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'hmdbid,hmdb')" help="Select the HMDB Metabolite ID column of the database file."/>
-				<param name="dbkeggidfield" type="select" label="Database file KEGG Compound ID column name" dynamic_options="get_file_cols(file = db['dburl'], preferred = 'keggid,kegg')" help="Select the KEGG Compound ID column of the database file."/>
-
+				<param name="dbfields" label="Column names" type="text" size="256" value="mztheo=mztheo,chromcolrt=chromcolrt,compoundid=compoundid,chromcol=chromcol,msmode=msmode,peakattr=peakattr,pubchemcompid=pubchemcompid,chebiid=chebiid,hmdbid=hmdbid,keggid=keggid" help="The list of column names of your database in-house file, as a coma separated list of key/value pairs."/>
+					
 				<!-- File database MS modes -->
-				<param name="dbmsposmode" label="File database MS Positive mode" type="select" dynamic_options="get_ms_mode_value(file = db['dburl'], col = db['dbmsmodefield'], preferred = 'POS,pos,+')" help="Select the value used to identify the positive MS mode."/>
-				<param name="dbmsnegmode" label="File database MS Negative mode" type="select" dynamic_options="get_ms_mode_value(file = db['dburl'], col = db['dbmsmodefield'], preferred = 'NEG,neg,-')" help="Select the value used to identify the negitive MS mode."/>
+				<param name="dbmsmodes" label="MS modes" help="Values used for the file database MS modes, as a coma separated list of key/value pairs." type="text" size="64" value="pos=pos,neg=neg"/>
 
 				<!-- File database RT unit -->
 				<param name="dbrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help="">
@@ -116,8 +102,10 @@
 				<param name="dbtoken" type="text" size="32" value="" hidden="true"/>
 			</when>
 
+			<!-- PeakForest database parameters {{{3 -->
+			<!-- **************************************************************** -->
 			<when value="peakforest">
-				<param name="dburl" type="text" size="128" value="https://peakforest-alpha.inra.fr/rest" refresh_on_change="true"/>
+				<param name="dburl" type="text" size="128" value="https://metabohub.peakforest.org/rest/" refresh_on_change="true"/>
 
 				<param name="dbtoken" label="Peakforest security token" type="text" size="32" value="" refresh_on_change="true" help="If you do not have yet a Peakforest token, go to Peakforest website and request one from your account."/>
 
@@ -125,44 +113,51 @@
 			</when>
 		</conditional>
 
-		<!-- INPUT -->
+		<!-- Input file {{{2 -->
+		<!-- **************************************************************** -->
 
-			<!-- Input file -->
-			<param name="mzrtinput" label="Input file - MZ(/RT) values" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. RT values must be in seconds."/>
+		<!-- Input file -->
+		<param name="mzrtinput" label="Input file - MZ(/RT) values" type="data" format="tabular,tsv" refresh_on_change="true" help="Decimal: '.', missing: NA, mode: character and numerical, sep: tabular. RT values must be in seconds."/>
+
+		<!-- Input field field names -->
+		<param name="inputfields" type="text" label="Input column names" size="64" help="Input file column names, as a coma separated list of key/value pairs." value="mz=mz,rt=rt"/>
 
-			<!-- Input field field names -->
-			<param name="inputmzfield" type="select" label="Input file MZ column name" dynamic_options="get_file_cols(file = mzrtinput, preferred = 'mzmed,mz')" help="Select the MZ column of the input file."/>
-			<param name="inputrtfield" type="select" label="Input file RT column name" dynamic_options="get_file_cols(file = mzrtinput, preferred = 'rtmed,rt')" help="Select the RT column of the input file."/>
+		<!-- Input file RT unit -->
+		<param name="inputrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help="">
+			<option value="sec">Seconds</option>
+			<option value="min">Minutes</option>
+		</param>
 
-			<!-- Input file RT unit -->
-			<param name="inputrtunit" label="Retention time unit" type="select" display="radio" multiple="false" help="">
-				<option value="sec">Seconds</option>
-				<option value="min">Minutes</option>
-			</param>
-
-		<!-- M/Z MATCHING -->
+		<!-- M/Z matching {{{2 -->
+		<!-- **************************************************************** -->
 
-			<!-- Mode -->
-			<param name="mzmode" label="MS mode" type="select" display="radio" multiple="false" help="">
-				<option value="pos">Positive</option>
-				<option value="neg">Negative</option>
-			</param>
+		<!-- Mode -->
+		<param name="mzmode" label="MS mode" type="select" display="radio" multiple="false" help="">
+			<option value="pos">Positive</option>
+			<option value="neg">Negative</option>
+		</param>
 
-			<!-- MZ matching parameters -->
-			<param name="mzprec" label="M/Z precision (in ppm)" type="float" help="" value="5"/>
-			<param name="mzshift" label="M/Z shift (in ppm)" type="float" help="" value="0"/>
+		<!-- MZ matching parameters -->
+		<param name="mzprec" label="M/Z precision" type="float" help="" value="5"/>
+		<param name="mzshift" label="M/Z shift" type="float" help="" value="0"/>
+		<param name="mztolunit" label="M/Z tolerance unit" type="select" display="radio" multiple="false" help="">
+			<option value="ppm">PPM</option>
+			<option value="plain">Plain</option>
+		</param>
 
-		<!-- RETENTION TIME PARAMETERS -->
+		<!-- RT matching {{{2 -->
+		<!-- **************************************************************** -->
 
-			<!-- List of chromatographic columns -->
-			<param name="chromcols" type="select" label="Chromatographic columns" multiple="true" dynamic_options="get_chrom_cols(dbtype = db['dbtype'], dburl = db['dburl'], dbtoken = db['dbtoken'], col_field = db['dbchromcolfield'])" help="Select here the set of chromatographic columns against which the retention time matching will be run."/>
+		<!-- List of chromatographic columns -->
+		<param name="chromcols" type="text" label="Chromatographic columns" size="2048" value=""/>
 
-			<!-- Tolerances -->
-			<param name="tolx" label="RTX retention time tolerance, parameter x (in seconds)" type="float" help="" value="5"/>
-			<param name="toly" label="RTY retention time tolerance, parameter y" type="float" help="" value="0.8"/>
-			<param name="tolz" label="RTZ retention time tolerance, used when precursor matching is enabled." type="float" help="" value="5"/>
+		<!-- Tolerances -->
+		<param name="tolx" label="RTX" help="The retention time tolerance X parameter (in seconds)." type="float" value="5"/>
+		<param name="toly" label="RTY" help="The retention time tolerance Y parameter (no unit)." type="float" value="0.8"/>
+		<param name="tolz" label="RTZ" help="The retention time tolerance used when precursor matching is enabled." type="float" value="5"/>
 
-		<!-- PRECURSOR MATCH -->
+		<!-- Precursor matching {{{2 -->
+		<!-- **************************************************************** -->
 		<conditional name="prec">
 
 			<param name="match" label="Precursor match" type="select">
@@ -198,9 +193,11 @@
 			</when>
 		</conditional>
 
-		<!-- OUTPUT -->
+		<!-- Output format {{{2 -->
+		<!-- **************************************************************** -->
+
 		<!-- Molecule IDs separator character -->
-		<param name="molidssep" label="Molecule IDs separator character" type="text" size="3" value="|" help="">
+		<param name="molidssep" label="Multiple matches separator character" type="text" size="3" value="|" help="">
 			<sanitizer>
 				<valid initial="string.printable">
 					<remove value='"'/>
@@ -213,60 +210,77 @@
 
 	</inputs>
 
-	<!--=======
-	= OUTPUTS =
-	========-->
+	<!-- Outputs {{{1 -->
+	<!-- **************************************************************** -->
 
 	<outputs>
 
-		<!-- Output file -->
 		<data name="mainoutput"  label="lcmsmatch_${mzrtinput.name}" format="tabular"/>
 		<data name="peaksoutput" label="lcmsmatch_${mzrtinput.name}_peaks" format="tabular"/>
 		<data name="htmloutput"  label="lcmsmatch_${mzrtinput.name}.html" format="html"/>
 
 	</outputs>
 
-	<!--=====
-	= TESTS =
-	======-->
+	<!-- Tests {{{1 -->
+	<!-- **************************************************************** -->
 
 	<tests>
 
-		<!-- File database test -->
+		<!-- Test 1, MZ only {{{2 -->
+		<!-- **************************************************************** -->
+		<test>
+			<param name="dbtype" value="inhouse"/>
+			<param name="dburl" value="filedb.tsv"/>
+			<param name="mzrtinput" value="mz-input-small.tsv"/>
+			<param name="inputfields" value="mz=mz"/>
+			<param name="mzmode" value="pos"/>
+			<output name="mainoutput" file="test_1_main_output.tsv"/>
+			<output name="peaksoutput" file="test_1_peaks_output.tsv"/>
+			<output name="htmloutput" file="test_1_peaks_output.html"/>
+		</test>
+
+		<!-- Test 2, MZ & RT {{{2 -->
+		<!-- **************************************************************** -->
 		<test>
 			<param name="dbtype" value="inhouse"/>
 			<param name="dburl" value="filedb.tsv"/>
-			<param name="dbfields" value=""/>
-			<param name="dbmsmodes" value=""/>
-			<param name="mzrtinput" value="mz-input-small.tsv"/>
-			<param name="inputmzfield" value="mzmed"/>
-			<param name="inputrtfield" value="rtmed"/>
+			<param name="mzrtinput" value="mzrt-input-small.tsv"/>
+			<param name="inputfields" value="mz=mz,rt=rt"/>
 			<param name="mzmode" value="pos"/>
-			<output name="mainoutput" file="filedb-small-mz-match-output.tsv"/>
-			<output name="peaksoutput" file="filedb-small-mz-match-peaks-output.tsv"/>
-			<output name="htmloutput" file="filedb-small-mz-match-html-output.html"/>
+			<param name="dbrtunit" value="min"/>
+			<param name="chromcols" value="col12"/>
+			<param name="tolx" value="5"/>
+			<param name="toly" value="0.8"/>
+			<output name="mainoutput" file="test_2_main_output.tsv"/>
+			<output name="peaksoutput" file="test_2_peaks_output.tsv"/>
+			<output name="htmloutput" file="test_2_peaks_output.html"/>
 		</test>
 
-		<!-- File database test -->
-<!--
+		<!-- Test 3, MZ & RT with precursor match {{{2 -->
+		<!-- **************************************************************** -->
 		<test>
-			<param name="dbtype" value="peakforest"/>
-			<param name="dbtoken" value="@PEAKFOREST_TOKEN@"/>
-			<param name="mzrtinput" value="mz-input-small.tsv"/>
-			<param name="inputfields" value=""/>
+			<param name="dbtype" value="inhouse"/>
+			<param name="dburl" value="filedb.tsv"/>
+			<param name="mzrtinput" value="mzrt-input-small.tsv"/>
+			<param name="inputfields" value="mz=mz,rt=rt"/>
 			<param name="mzmode" value="pos"/>
-			<output name="mainoutput">
-				<assert_contents>
-					<has_text text="mz"/>
-				</assert_contents>
-			</output>
+			<param name="dbrtunit" value="min"/>
+			<param name="chromcols" value="col12"/>
+			<param name="tolx" value="5"/>
+			<param name="toly" value="0.8"/>
+			<param name="match" value="true"/>
+			<param name="neg" value="[(M-H)]-,[M-H]-"/>
+			<param name="pos" value="[(M+H)]+,[M+H]+"/>
+			<param name="tolz" value="60"/>
+			<output name="mainoutput" file="test_3_main_output.tsv"/>
+			<output name="peaksoutput" file="test_3_peaks_output.tsv"/>
+			<output name="htmloutput" file="test_3_peaks_output.html"/>
 		</test>
--->
+
 	</tests>
 
-	<!--====
-	= HELP =
-	=====-->
+	<!-- Help {{{1 -->
+	<!-- **************************************************************** -->
 
 	<help>
 <!-- @@@BEGIN_RST@@@ -->
@@ -296,13 +310,14 @@
 Single file database
 ====================
 
-The database used is provided as a single file, in tabular format, through the *Database file* field. This file must contain a list of MS peaks, with possibly retention times.
+In this case, the database used is provided as a single file by the user, in tabular format, through the *Database file* field. This file must contain a list of MS peaks, with possibly retention times.
 Peaks are "duplicated" as much as necessary. For instance if 3 retention times are available on a compound with 10 peaks in positive mode, then there will be 30 lines for this compound in positive mode.
 
-The file must contain a header with the column names. The names are free, but must be provided through the different fields named *Database file ... column name*.
-Then you must provide the values used to identify the MS modes (positive and negative).
+The file must contain a header with the column names. The names are free, but must be provided through the *Column names* field as a comma separated list of key/value pairs. See default value as an example. Of course it is much easier if your database file uses the default column names used in the default value of the *Column names* field. The column names shown in the default values, are only the ones used by the algorithm. You can provide any additional columns in your database file, they will be copied in the output.
 
-A last information about the single file database is the unit of the retention times, either in seconds or in minutes.
+Then you must provide the values used to identify the MS modes (positive and negative), using field *MS modes*.
+
+A last information about the single file database is the unit of the retention times, either in seconds or in minutes. Use the field "Retention time unit" to provide this information.
 
 Example of database file (totally fake, no meaning):
 
@@ -332,13 +347,18 @@
 | A10   | "POS" | 145.097154 | "P92Z6W413 O2"     | "[(M+H)-(H2)]+"         | "hcoltt"  | 0.8   | "J114L6M62O2" | 146.10553 | "Blablaine"  |
 +-------+-------+------------+--------------------+-------------------------+-----------+-------+---------------+-----------+--------------+
 
+The corresponding value of the *Column names* field for this database field would be:
+**mztheo=mz,chromcolrt=rt,compoundid=molid,chromcol=col,msmode=mode,peakattr=attribution**.
+
+And the value of the *MS modes* field would be: **pos=POS,neg=NEG**.
+
 MZ/RT input file
 ================
 
-The input to provide is a file, in a tabular format (or TSV: Tab Seperated Values), containing the list of M/Z values, with possibly also RT values.
+The input to provide is a dataset in a tabular format (or TSV: Tab Seperated Values), containing the list of M/Z values, with possibly also RT values. The dataset is chosen through the field *Input file - MZ(/RT) values*.
 
-The column names for the M/Z and RT values must be provided through the fields *Input file MZ column name* and *Input file RT column name*.
-As a consequence, the file must contain a header line.
+The column names for the M/Z and RT values must be provided through the field *Input column names*, as a comma separated list of key/value pairs.
+The file/dataset must contain a header line with the same names specified in the field *Input column names*.
 
 The unit of the retention time has to be provided with the field *Retention time unit*.
 
@@ -364,27 +384,29 @@
 M/Z matching
 ------------
 
-In the simplest form of the algorithm only the *m/z* values are matched against the database peaks. This happens if both *Retention time match* and *Precursor match* are off.
+In the simplest form of the algorithm only the *M/Z* values are matched against the database peaks. This happens if both *Retention time match* and *Precursor match* are off.
 
 The first parameter is the MS mode, specified through the *MS mode* parameter.
 
-The parameters *M/Z precision* and *M/Z shift* are used by the algorithm in the following formula in order to match an *m/z* value:
+The parameters *M/Z precision* and *M/Z shift* are used by the algorithm in the following formula in order to match an *M/Z* value:
+
+	mz - shift - precision &lt; mzref &lt; mz - shift + precision
 
-	mz (1 + (- shift - precision) / 10^6) &lt; mzref &lt; mz (1 + (- shift - precision) / 10^6)
+Where *mzref* is the M/Z of reference from the database peak that is tested. If this double inequality is true, then the *M/Z* value is matched with this peak.
 
-Where *mzref* is the M/Z of reference from the database peak that is tested. If this double inequality is true, then the *m/z* value is matched with this peak.
+The parameters *shift* and *precision* can be input in either PPM values of M/Z or in plain values. Use the field *M/Z tolerance unit* to set the unit.
 
 --------------------
 Retention time match
 --------------------
 
-If at least one column is selected inside the *Chromatographic columns* parameter section, then retention time is also matched, in addition to the *m/z* value, according to the following formula:
+If at least one column is selected inside the *Chromatographic columns* parameter section, then retention time is also matched, in addition to the *M/Z* value, according to the following formula:
 
 	rt - x - rt^y &lt; colrt &lt; rt + x + rt^y
 
 Where *x* is the value of the parameter *RTX* and *y* the value of the parameter *RTY*.
 
-If for a reference compound the database does not contain retention time for at least one of the specified columns, then only the *m/z* value is matched against the peaks of the reference compound. This means that in the results you can find compounds that do no match the provided retention time value.
+If for a reference compound the database does not contain retention time for at least one of the specified columns, then only the *M/Z* value is matched against the peaks of the reference compound. This means that in the results you can find compounds that do no match the provided retention time value.
 
 The *RTZ* parameter is used in the *Precursor match* algorithm (see below).
 
@@ -394,7 +416,7 @@
 
 If the "Precursor match" option is enabled inside the parameters section, then a more sophisticated version of the algorithm, which is executed in two steps, is used.
 
-This algorithm takes two more parameters, one for each MS mode. These are the lists of precursors. Since the matching is run for one MS mode only, only one of the two parameters is used. Inside the single file database, all the peaks whose **attr** column value is equal to one of the precursor listed in *List of negative precursors* or *List of positive precursors*, depending on the mode, are considered as precursor peaks.
+This algorithm takes two more parameters, one for each MS mode. These are the lists of precursors. Since the matching is run for one MS mode only, only one of the two parameters is used. Inside the single file database, all the peaks whose **peakattr** column value is equal to one of the precursor listed in *List of negative precursors* or *List of positive precursors*, depending on the mode, are considered as precursor peaks.
 
 M/Z matching using precursor matching
 =====================================
@@ -414,7 +436,7 @@
 Output settings
 ---------------
 
-The *Molecule IDs separator character* is used to customize the character used to separate the molecule IDs of the **molid** column inside the *main* output file.
+The *Multiple matches separator character* is used to customize the character used to separate the multiple values inside each row in the *main* output dataset. The *main* output contains as much rows as the MZ/RT input dataset, thus when for one MZ/RT value the algorithm finds more than one match, it concatenates the matches using this separator character.
 
 Output files
 ============
@@ -424,18 +446,25 @@
 +-------------+--------------------------------------+--------------------------------------------------------+
 |   Outputs   |              File name               |                      Description                       |
 +-------------+--------------------------------------+--------------------------------------------------------+
-| Main output | lcmsmatching_{input_file_name}       | Contains the list of compounds that have been matched. |
+| Main output | lcmsmatching_{input_file_name}       | Contains the same data as the input dataset, with      |
+|             |                                      | match result included on each row. If more than one    |
+|             |                                      | match is found for a row, the different values of the  |
+|             |                                      | match are concatenated using the provided separator    |
+|             |                                      | character.                                             |
 +-------------+--------------------------------------+--------------------------------------------------------+
-| Peak list   | lcmsmatching_peaks_{input_file_name} | Contains all matched database peaks.                   |
+| Peak list   | lcmsmatching_{input_file_name}_peaks | Contains the same data as the input dataset, with      |
+|             |                                      | match result included on each row. If more than one    |
+|             |                                      | match is found for a row, then the row is duplicated.  |
+|             |                                      | Hence there is either no match for a row, or one       |
+|             |                                      | single match.                                          |
 +-------------+--------------------------------------+--------------------------------------------------------+
-| HTML output | lcmsmatching_{input_file_name}.html  | Contains the two tables on one page.                   |
+| HTML output | lcmsmatching_{input_file_name}.html  | Contains the same table as *Peak list* but in HTML     |
+|             |                                      | format and with links to external databases if columns |
+|             |                                      | for PubChem Compound, ChEBI, HMDB Metabolites or KEGG  |
+|             |                                      | Compounds are provided.                                |
 +-------------+--------------------------------------+--------------------------------------------------------+
 
-The **main** output is identical to the input file, to which is added an *msmatching* column. This column contains a list of IDs of the compounds that have been matched for this couple of (m/z, rt) values.
-
-The **peak list** output contains all database peaks that have been matched, for each (m/z, rt) input couple. Thus for each (m/z, rt) couple, there will be zero, one or more matched peaks output. The columns output are *mz*, *rt*, *id*, *mztheo*, *col*, *colrt*, *attribution* and *composition*, where *id* is the compound ID, *mztheo* is the theoretical mass of the fragment, *col* is the matched column and *colrt* is the retention time measured on the column for the reference compound.
-
-The **HTML** output contains the peak table with links toward HMDB, KEGG, ChEBI and PubChem public databases, when IDs are available.
+The match results are output as new columns appended to the columns provided inside the MZ/RT input dataset, and prefixed with "lcmsmatching.".
 
 =====
 About
@@ -455,15 +484,30 @@
 .. class:: infomark
 
 **Please cite**
-	R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org
+	R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org.
+
+==============
+Changelog/News
+==============
+
+**Version 4.0.0 - 02/01/2019**
+
+- NEW: Use of R biodb library. Connection to databases and matching have been moved to biodb library, which is maintained separately at http://github.com/pkrog/biodb.
 
 <!-- @@@END_RST@@@ -->
 	</help>
 
-	<!--=========
-	= CITATIONS =
-	==========-->
+	<!-- Citations {{{1 -->
+	<!-- **************************************************************** -->
 
-	<citations/>
+	<citations>
+		<citation type="bibtex">@unpublished{FGiacomoni2017,
+			title  = {PeakForest [Internet], a spectral data portal for Metabolomics community - storing, curating and annotation services for metabolic profiles of biological matrix.},
+			author = {Franck Giacomoni, Nils Paulhe},
+			institution = {INRA / MetaboHUB},
+			year = {2017},
+			note = {Unpublished paper, available from: https://peakforest.org/.}
+			}</citation>
+	</citations>
 
 </tool>
author	prog
date	Fri, 22 Feb 2019 16:04:22 -0500
parents	fb9c0409d85c
children