comparison ipapy2_gibbs_sampler.xml @ 0:b2253cf7db76 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2 commit 64b61ff2823b4f54868c0ab7a4c0dc49eaf2979a
author recetox
date Fri, 16 May 2025 08:02:01 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b2253cf7db76
1 <tool id="ipapy2_gibbs_sampler" name="ipaPy2 gibbs sampler" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
2 <description>combine multiple information sources in a Gibbs sampler to improve annotation accuracy</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6
7 <expand macro="requirements"/>
8
9 <command detect_errors="exit_code"><![CDATA[
10 python3 '${__tool_directory__}/ipapy2_gibbs_sampler.py'
11 --input_dataset_mapped_isotope_patterns '${mapped_isotope_patterns}' '${mapped_isotope_patterns.ext}'
12 --input_dataset_annotations '${annotations}' '${annotations.ext}'
13 --integrating_mode '${integrating_mode.integrating_mode}'
14 #if $integrating_mode.integrating_mode == "adducts"
15 --delta_add '${integrating_mode.delta_add}'
16 #elif $integrating_mode.integrating_mode == "biochemical"
17 --input_dataset_bio '${integrating_mode.Bio}' '${integrating_mode.Bio.ext}'
18 --delta_bio '${integrating_mode.delta_bio}'
19 #else
20 --delta_add '${integrating_mode.delta_add}'
21 --input_dataset_bio '${integrating_mode.Bio}' '${integrating_mode.Bio.ext}'
22 --delta_bio '${integrating_mode.delta_bio}'
23 #end if
24 --noits '${noits}'
25 --burn '${burn}'
26 --all_out '${all_out}'
27 #if $zs:
28 --zs '${zs}' '${zs.ext}'
29 #else:
30 --zs '' ''
31 #end if
32 #if $zs_out:
33 --zs_out '${zs_out}' '${zs_out.ext}'
34 #else:
35 --zs_out '' ''
36 #end if
37 --output_dataset '${annotations_out}' '${annotations_out.ext}'
38
39 ]]></command>
40
41 <inputs>
42 <expand macro="gibbs"/>
43
44 <conditional name="integrating_mode">
45 <param name="integrating_mode" type="select" label="integrating mode" help="select the integrating mode">
46 <option value="adducts">adducts</option>
47 <option value="biochemical">biochemical</option>
48 <option value="both">adducts and biochemical</option>
49 </param>
50 <when value="adducts">
51 <param name="delta_add" type="float" value="1" min="0" label="adducts weight"
52 help="parameter used when computing the conditional priors. The parameter must be positive.
53 The smaller the parameter the more weight the adducts connections have on the posterior probabilities. Default 1." />
54 </when>
55 <when value="biochemical">
56 <param name="Bio" type="data" format="csv,tsv,tabular,parquet" label="biochemical connections"
57 help="dataframe (2 columns), reporting all the possible connections between compounds. It uses the unique ids from the database.
58 It could be the output of Compute_Bio() or Compute_Bio_Parallel()." />
59 <param name="delta_bio" type="float" value="1" min="0" label="biochemical weight"
60 help="parameter used when computing the conditional priors. The parameter must be positive.
61 The smaller the parameter the more weight the biochemical connections have on the posterior probabilities. Default 1." />
62 </when>
63 <when value="both">
64 <param name="delta_add" type="float" value="1" min="0" label="adducts weight"
65 help="parameter used when computing the conditional priors. The parameter must be positive.
66 The smaller the parameter the more weight the adducts connections have on the posterior probabilities. Default 1." />
67 <param name="Bio" type="data" format="csv,tsv,tabular,parquet" label="biochemical connections"
68 help="dataframe (2 columns), reporting all the possible connections between compounds. It uses the unique ids from the database.
69 It could be the output of Compute_Bio() or Compute_Bio_Parallel()." />
70 <param name="delta_bio" type="float" value="1" min="0" label="biochemical weight"
71 help="parameter used when computing the conditional priors. The parameter must be positive.
72 The smaller the parameter the more weight the biochemical connections have on the posterior probabilities. Default 1." />
73 </when>
74 </conditional>
75 </inputs>
76
77 <outputs>
78 <data label="${tool.name} annotations on ${on_string}" name="annotations_out" format_source="mapped_isotope_patterns"/>
79 <data label="${tool.name} zs on ${on_string}" name="zs_out" format="txt">
80 <filter>options['all_out']</filter>
81 </data>
82 </outputs>
83
84 <tests>
85 <test expect_num_outputs="2">
86 <param name="mapped_isotope_patterns" value="mapped_isotope_patterns.parquet"/>
87 <param name="annotations" value="clean_annotations.csv"/>
88 <!-- Not the best way to test, but the results are stochastic hence difficult to test-->
89 <output name="annotations_out">
90 <assert_contents>
91 <has_size value="9185" delta="100" />
92 </assert_contents>
93 </output>
94 </test>
95 </tests>
96
97 <help><![CDATA[
98
99 .. _ipapy2_gibbs_sampler:
100
101 ==========================
102 ipaPy2 Gibbs Sampler Tool
103 ==========================
104
105 **Tool Description**
106
107 This tool implements a Gibbs sampler that integrates multiple sources of information—biochemical connections and adducts connections—to improve the accuracy of metabolite annotation. By iteratively sampling from the posterior distribution, the tool refines annotation probabilities based on both network and chemical relationships.
108
109 How it works
110 ------------
111
112 - The Gibbs sampler updates annotation probabilities by considering:
113 - **Adducts connections**: Relationships between features that can be explained by known adduct transformations.
114 - **Biochemical connections**: Relationships between compounds based on known biochemical pathways or reactions.
115 - The user can select to use only adducts, only biochemical connections, or both.
116 - The influence of each connection type is controlled by the `adducts weight` and `biochemical weight` parameters: smaller values increase the influence of the respective connection type on the posterior probabilities.
117 - The process is stochastic, so results may vary between runs.
118
119 Inputs
120 ------
121
122 1. **Mapped isotope patterns**
123 Dataset containing mapped isotope patterns (e.g., output from the ipaPy2 map isotope patterns tool).
124
125 2. **Annotations**
126 Initial annotation table to be refined by the Gibbs sampler.
127
128 3. **Integrating mode**
129 - **adducts**: Use only adducts connections.
130 - **biochemical**: Use only biochemical connections (requires a biochemical connections table).
131 - **both**: Use both adducts and biochemical connections.
132
133 4. **Adducts weight (`delta_add`)**
134 Controls the influence of adducts connections (smaller = more influence).
135
136 5. **Biochemical connections**
137 Table (2 columns) reporting all possible biochemical connections between compounds (required if using biochemical mode).
138
139 6. **Biochemical weight (`delta_bio`)**
140 Controls the influence of biochemical connections (smaller = more influence).
141
142 7. **Other parameters**
143 - **noits**: Number of Gibbs sampler iterations.
144 - **burn**: Number of burn-in iterations.
145 - **all_out**: Output all intermediate results.
146 - **zs**: Optional input for initial state.
147 - **zs_out**: Optional output for sampled states.
148
149 Outputs
150 -------
151
152 - **annotations_out**
153 Refined annotation table with updated posterior probabilities.
154
155 - **zs_out**
156 (Optional) File containing sampled states from the Gibbs sampler (if `all_out` is enabled).
157
158 Example
159 -------
160
161 Suppose you have mapped isotope patterns and an initial annotation table. You can run the Gibbs sampler as follows:
162
163 .. code-block::
164
165 mapped_isotope_patterns.parquet
166 clean_annotations.csv
167
168 Choose the integrating mode (e.g., both), set the weights, and run the tool. The output will be a refined annotation table.
169
170 Notes
171 -----
172
173 - The results are stochastic; repeated runs may yield slightly different outputs.
174 - For best results, ensure all input files are correctly formatted and contain the required columns.
175 - The biochemical connections table should use unique IDs consistent with your annotation table.
176
177 References
178 ----------
179
180 - For more details on the Gibbs sampling algorithm and its application in metabolomics, refer to the ipaPy2 documentation or associated publications.
181
182 ]]></help>
183
184 <expand macro="citations"/>
185 </tool>