changeset 2:ddb9d330ecc0 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/mfassignr commit c6e502d8af84750003e4ba001c61817acedd1896
author recetox
date Fri, 13 Sep 2024 10:09:02 +0000
parents 7e5ccc8b6f6e
children 1104682fb7cc
files help.xml macros.xml mfassignr_recallist.xml test-data/findrecalseries/selected_series.tabular test-data/recallist/recal_series.tabular
diffstat 5 files changed, 62 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/help.xml	Fri Aug 16 08:27:27 2024 +0000
+++ b/help.xml	Fri Sep 13 10:09:02 2024 +0000
@@ -32,9 +32,10 @@
 (3) Use IsoFiltR() to identify potential 13C and 34S isotope masses.
 (4) Using the S/N threshold, and the two data frames output from IsoFiltR(), run MFAssignCHO() to assign MF with C, H, and O to assess the mass accuracy.
 (5) Use RecalList() to generate a list of the potential recalibrant series.
-(6) After choosing recalibrant series, use Recal() to recalibrate the mass lists.
-(7) Assign MF to the recalibrated mass list using MFAssign().
-(8) Check the output plots from MFAssign() to evaluate the quality of the assignments.
+(6) Choose the most suitable recalibrant series using FindRecalSeries().
+(7) After choosing recalibrant series, use Recal() to recalibrate the mass lists.
+(8) Assign MF to the recalibrated mass list using MFAssign().
+(9) Check the output plots from MFAssign() to evaluate the quality of the assignments.
 
 For detailed documentation on the individual steps please see the individual tool wrappers.
 </token>
@@ -49,8 +50,8 @@
 
 Output:     
 
-- noise estimate - (this noise level can then be multiplied by the user chosen value (3, 6, 10) in order     to set the signal to noise cut for formula assignment.)
-- KMD plot - bounds of the noise estimation area are highlighted in red
+- noise estimate - this noise level can then be multiplied by the user chosen value (3, 6, 10) in order to set the signal to noise cut for formula assignment.
+- KMD plot - bounds of the noise estimation area are highlighted in red.
 </token>
 
 <token name="@HISTNOISE_HELP@">
@@ -64,7 +65,7 @@
 Output:
 
 - noise estimate - this noise level can then be multiplied by the user chosen value in order to set the signal to noise cut for formula assignment
-- Histogram - shows where the cut is being applied123
+- Histogram - shows where the cut is being applied
 
 </token>
 
@@ -118,7 +119,7 @@
 MFAssignR - RecalList
 =============================
 
-This tool is the fifth step of the MFAssignR workflow (MFAssignCHO -> RecalList -> Recal)
+This tool is the fifth step of the MFAssignR workflow (MFAssignCHO -> RecalList -> FindRecalSeries)
 
 RecalList() function identifies the homologous series that could be used for recalibration. On the input, there is the output from MFAssign() or MFAssignCHO() functions. It returns a dataframe that contains the CH2 homologous series that contain more than 3 members.
 
@@ -127,11 +128,34 @@
 - Dataframe that contains the CH2 homologous series that contain more than 3 members.
 </token>
 
+<token name="@FINDRECALSERIES_HELP@">
+MFAssignR - FindRecalSeries
+=============================
+
+This tool is the sixth step of the MFAssignR workflow (RecalList -> FindRecalSeries -> Recal)
+
+This function takes on input the CH2 homologous recalibration series, which are provided by the RecalList function and tries to find the most suitable series combination for recalibration based on the following criteria:
+
+(1) Series should cover the full mass spectral range,
+(2) Series should be optimally long and combined have a “Tall Peak” at least every 100 m/z,
+(3) Abundance score: the higher, the better,
+(4) Peak score: the closer to 0, the better,
+(5) Peak Distance: the closer to 1, the better,
+(6) Series Score: the closer to this value, the better.
+
+Combinations of 5 series are assembled, scores are computed for other metrics (in case of Peak proximity and Peak
+distance, an inverted score is computed) and these are summed. Finally, either a series of the size of combination or top 10 unique series having the highest score are outputted.
+
+Output:     
+
+- Dataframe of n or 10 most suitable recalibrant series.
+</token>
+
 <token name="@RECAL_HELP@">
 MFAssignR - Recal
 =============================
 
-This tool is the sixth step of the MFAssignR workflow (RecalList -> Recal -> MFAssign)
+This tool is the seventh step of the MFAssignR workflow (FindRecalSeries -> Recal -> MFAssign)
 
 Recal() function recalibrates the 'Mono' and 'Iso' outputs from the IsoFiltR() function and prepares a dataframe containing chose recalibrants. Also it outputs a plot for the qualitative assessment of recalibrants. The input to the function is output from MFAssign() or MFAssignCHO(). 
 
--- a/macros.xml	Fri Aug 16 08:27:27 2024 +0000
+++ b/macros.xml	Fri Sep 13 10:09:02 2024 +0000
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.0.3</token>
+    <token name="@TOOL_VERSION@">1.1.1</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">r-mfassignr</requirement>
@@ -96,6 +96,26 @@
                help= "Upper limit of molecular mass to be assigned."/>
     </xml>
 
+    <xml name="findrecalseries_param">
+        <param name="input_file" type="data" format="tabular" label="Input data"
+               help= "Recalibration series, output from RecalList"/>
+        <param name="global_min" type="float" label="Global min"
+               help= "A lower bound of the instrument m/z range."/>
+        <param name="global_max" type="float" label="Global max"
+               help= "A higher bound of the instrument m/z range."/>
+        <param name="number_of_combinations" type="integer" label="Number of combinations"
+               help= "Combinations of how many series should be computed." value="5"/>
+        <param name="abundance_score_threshold" type="float" label="Abundance score threshold" value="100"
+               help= "A threshold for filtering abundance score parameter. The series with higher values are better."/>
+        <param name="peak_distance_threshold" type="float" label="Peak distance threshold" value="2"
+               help= "A threshold for the peak distance parameter. The closer this value is to 1, the better."/>
+        <param name="coverage_threshold" type="integer" label="How many % of the m/z range should be covered."
+               help= "How many % of the m/z range should be covered." value="90"/>
+        <param name="fill_series" type="boolean" truevalue="TRUE" falsevalue="FALSE" label="Whether to return top 10 unique series (TRUE) or series only from the best combination."
+               help= "If TRUE, top 10 unique series will be returned, otherwise only the series from the best
+               combination will be returned." value="false"/>
+    </xml>
+
     <xml name="recal_param">
         <param name="input_file" type="data" format="tabular" label="Input data (Output from MFAssign)"
                help= "Input data frame, the output from MFAssign or MFAssignCHO"/>
@@ -104,7 +124,7 @@
         <param name="peaks" type="data" format="tabular" label="Peaks dataframe (Mono from IsoFiltR)"
                help= "Peaks data frame, the Mono output from IsoFiltR"/>              
         <param name="isopeaks" type="data" format="tabular" label="Isopeaks dataframe (Iso from IsoFiltR)" 
-               optional="true" help= "Isopeaks data frame, the Mono output from IsoFiltR"/>
+               help= "Isopeaks data frame, the Mono output from IsoFiltR"/>
         <expand macro="ionmode_param" />
         <expand macro="noise_threshold_params" />
         <param name="mzRange" type="float" label="Mass windows used for the segmented recalibration" value="30"
--- a/mfassignr_recallist.xml	Fri Aug 16 08:27:27 2024 +0000
+++ b/mfassignr_recallist.xml	Fri Sep 13 10:09:02 2024 +0000
@@ -33,7 +33,7 @@
         <data name="recal_series" format="tabular" label="Recalibration series by ${tool.name} on ${on_string}"/>
     </outputs>
     <tests>
-        <test>
+         <test>
             <param name="input_file" value="mfassigncho/unambig.tabular" />
             <output name="recal_series" file="recallist/recal_series.tabular" lines_diff="100"/>
         </test>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/findrecalseries/selected_series.tabular	Fri Sep 13 10:09:02 2024 +0000
@@ -0,0 +1,6 @@
+"Series"	"total_abundance"	"total_series_length"	"peak_proximity"	"peak_distance_proximity"	"series_id"	"sum_score"
+"O_H_7"	437.136255030871	504.562	129.612788237483	2723.59808058946	"O_H_7 O2_H_6 O2_H_11"	3794.90912385781
+"O2_H_6"	437.136255030871	504.562	129.612788237483	2723.59808058946	"O_H_7 O2_H_6 O2_H_11"	3794.90912385781
+"O2_H_11"	437.136255030871	504.562	129.612788237483	2723.59808058946	"O_H_7 O2_H_6 O2_H_11"	3794.90912385781
+"O4_H_11"	943.304144088114	392.438	134.36084248065	1826.47532994759	"O2_H_6 O2_H_11 O4_H_11"	3296.57831651636
+"O3_H_12"	330.037060987448	364.407	135.12153276257	1826.47538570915	"O2_H_6 O3_H_12 O2_H_11"	2656.04097945917
--- a/test-data/recallist/recal_series.tabular	Fri Aug 16 08:27:27 2024 +0000
+++ b/test-data/recallist/recal_series.tabular	Fri Sep 13 10:09:02 2024 +0000
@@ -1,4 +1,4 @@
-"Series"	"Number Observed"	"Series Index"	"Mass Range"	"Tall Peak"	"Abundance Score"	"Peak Score"	"Peak Distance"	"Series Score"
+"Series"	"Number.Observed"	"Series.Index"	"Mass.Range"	"Tall.Peak"	"Abundance.Score"	"Peak.Score"	"Peak.Distance"	"Series.Score"
 "_H_4"	8	54	"121.101-177.164"	121.101133005817	-40.744548219864	0.513008460106113	2.0022239614575	0.6255625
 "_H_5"	7	60	"119.085-203.179"	119.085478343665	0	0.00189700255432302	2.002230609633	1.00095918367347
 "_H_6"	9	49	"117.07-173.133"	145.101380561463	-56.0021549559649	0.13569292865165	2.00220483719043	0.556055555555556