Mercurial > repos > iuc > mcl
comparison mcl.xml @ 0:649b9cb20668 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/mcl commit 6fcfaa3d5fefc854ec7398c2848e8db669593b71
author | iuc |
---|---|
date | Mon, 13 Jun 2022 17:34:32 +0000 |
parents | |
children | e092787c0a29 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:649b9cb20668 |
---|---|
1 <tool id="mcl" name="MCL" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> | |
2 <description>Markov Cluster Algorithm for graphs</description> | |
3 <macros> | |
4 <token name="@TOOL_VERSION@">14.137</token> | |
5 <token name="@VERSION_SUFFIX@">0</token> | |
6 </macros> | |
7 <xrefs> | |
8 <xref type="bio.tools">mcl</xref> | |
9 </xrefs> | |
10 <requirements> | |
11 <requirement type="package" version="@TOOL_VERSION@">mcl</requirement> | |
12 </requirements> | |
13 <version_command>mcl --version</version_command> | |
14 <command detect_errors="aggressive"> | |
15 <![CDATA[ | |
16 | |
17 mcl '$input' | |
18 -I '$inflation' | |
19 $input_type_select.input_type | |
20 -V all -te \${GALAXY_SLOTS:-1} | |
21 $sum_loops | |
22 $discard_loops | |
23 #if $verbosity | |
24 #for $v in $verbosity | |
25 -v $v | |
26 #end for | |
27 #end if | |
28 #if $reweight | |
29 -c $reweight | |
30 #end if | |
31 #if $transform | |
32 #if $input_type_select.input_type == "--abc" or $input_type_select.input_type == "--sif" | |
33 -abc-tf '$transform' | |
34 #else if $input_type_select.input_type == "" | |
35 -tf '$transform' | |
36 #end if | |
37 #end if | |
38 #if $input_type_select.input_type == "--sif" | |
39 $input_type_select.expect_values | |
40 #else if $input_type_select.input_type == "" and $input_type_select.use_tab | |
41 -use-tab $input_type_select.use_tab | |
42 #end if | |
43 #if $pruning_options.cutoff | |
44 -P $pruning_options.cutoff | |
45 #end if | |
46 #if $pruning_options.selection_number | |
47 -S $pruning_options.selection_number | |
48 #end if | |
49 #if $pruning_options.recover_number | |
50 -R $pruning_options.recover_number | |
51 #end if | |
52 #if $pruning_options.recover_percentage | |
53 -pct $pruning_options.recover_percentage | |
54 #end if | |
55 -o '$output' | |
56 | |
57 ]]> | |
58 </command> | |
59 <inputs> | |
60 <conditional name="input_type_select"> | |
61 <param name="input_type" type="select" label="Input type"> | |
62 <option value="--abc" selected="true">Labeled</option> | |
63 <option value="--sif">SIF</option> | |
64 <option value="">Matrix</option> | |
65 </param> | |
66 <when value=""> | |
67 <param type="data" format="tabular" name="use_tab" argument="-use-tab" label="tabular label input" optional="true" help="use tab file to convert the output to labels"/> | |
68 </when> | |
69 <when value="--sif"> | |
70 <param type="boolean" name="expect_values" checked="false" truevalue="--expect-values" falsevalue="" argument="--expect-values" label="expect label:weight format" optional="true" help="accept extended SIF (label:weight fields)" /> | |
71 </when> | |
72 <when value="--abc" /> | |
73 </conditional> | |
74 <param type="data" format="txt,tabular,sif" name="input" label="Input" optional="false" /> | |
75 <param name="inflation" argument="-I" type="float" value="2.0" label="Inflation" min="1" max="6" help="This value is the main handle for affecting cluster granularity. It is usually chosen somewhere in the range [1.2-5.0]. -I 5.0 will tend to result in fine-grained clusterings, and -I 1.2 will tend to result in very coarse grained clusterings. Your mileage will vary depending on the characteristics of your data. That is why it is a good idea to test the quality and coherency of your clusterings using clm dist and clm info. This will most likely reveal that certain values of -I are simply not right for your data." /> | |
76 <param name="transform" type="text" area="true" label="Transform input matrix values" help="See https://micans.org/mcl/man/mcxio.html#trans for the transformation syntax" > | |
77 <sanitizer> | |
78 <valid initial="default"> | |
79 <add value="#" /> | |
80 </valid> | |
81 </sanitizer> | |
82 </param> | |
83 <param argument="--discard-loops" type="boolean" checked="true" truevalue="--discard-loops=y" falsevalue="--discard-loops=n" label="Discard loops in input" help="Remove any loops that are present in the input. Bear in mind that loops will still be modified in all cases where the loop weight is not maximal among the list of edge weights for a given node." /> | |
84 <param name="reweight" argument="-c" optional="true" type="float" value="1" label="Reweight loops" help="As the final step of loop computation (i.e. after initialization and shadowing) all loop weights are multiplied by the provided value." /> | |
85 <param type="boolean" checked="false" truevalue="--sum-loops" falsevalue="" argument="--sum-loops" label="Set loops to sum of other arcs weights" /> | |
86 <param name="verbosity" type="select" multiple="true" optional="true" label="Verbosity"> | |
87 <option value="pruning">pruning</option> | |
88 <option value="explain">explain</option> | |
89 <option value="cls">cls</option> | |
90 </param> | |
91 <section title="Pruning options" name="pruning_options"> | |
92 <param name="cutoff" type="integer" value="4000" label="cutoff" argument="-P" optional="true" help="inverse cutoff pruning value. Entries smaller than cutoff are removed"></param> | |
93 <param name="selection_number" type="integer" value="500" label="selection number" argument="-S" optional="true" help="pruning selection value. maximum number of entries (if applicable)"></param> | |
94 <param name="recover_number" type="integer" value="600" label="recover number" argument="-R" optional="true" help="Pruning recover number. Revover the largest discarded entries during pruning, if number of entries less then -R"></param> | |
95 <param name="recover_percentage" type="integer" value="90" label="recover percentage" argument="-pct" min="0" max="100" optional="true" help="Pruning recover percentage. Revover the largest discarded entries during pruning, if sum of remaining entries is less than -pct/100"></param> | |
96 </section> | |
97 </inputs> | |
98 <outputs> | |
99 <data format="tabular" name="output" /> | |
100 </outputs> | |
101 <tests> | |
102 <test expect_num_outputs="1"> | |
103 <param name="input" value="mcl-simple.tabular" ftype="tabular" /> | |
104 <conditional name="input_type_select"> | |
105 <param name="input_type" value="--abc" /> | |
106 </conditional> | |
107 <param name="transform" value="gq(0.7),add(-0.7)" /> | |
108 <param name="inflation" value="3"/> | |
109 <output name="output" file="mcl-simple.out" ftype="tabular" /> | |
110 </test> | |
111 <test expect_num_outputs="1"> | |
112 <param name="input" value="mcl-simple.mci" ftype="txt" /> | |
113 <conditional name="input_type_select"> | |
114 <param name="input_type" value="" /> | |
115 <param name="use_tab" value="mcl-simple.mci.tab" ftype="tabular" /> | |
116 </conditional> | |
117 <param name="transform" value="gq(0.7),add(-0.7)" /> | |
118 <param name="inflation" value="3"/> | |
119 <output name="output" file="mcl-simple.out" ftype="tabular" /> | |
120 </test> | |
121 <test expect_num_outputs="1"> | |
122 <param name="input" value="mcl-simple.sif" ftype="sif" /> | |
123 <conditional name="input_type_select"> | |
124 <param name="input_type" value="--sif" /> | |
125 <param name="expect_values" value="true" ftype="tabular" /> | |
126 </conditional> | |
127 <param name="transform" value="gq(0.7),add(-0.7)" /> | |
128 <param name="inflation" value="3"/> | |
129 <output name="output" file="mcl-simple.out" ftype="tabular" /> | |
130 </test> | |
131 <test expect_num_outputs="1"> | |
132 <param name="input" value="mcl-simple.tabular" ftype="tabular" /> | |
133 <conditional name="input_type_select"> | |
134 <param name="input_type" value="--abc" /> | |
135 </conditional> | |
136 <param name="cutoff" value="1"/> | |
137 <param name="recover_number" value="1"/> | |
138 <param name="selection_number" value="1"/> | |
139 <param name="discard_loops" value="false"/> | |
140 <param name="reweight" value="0.5"/> | |
141 <param name="sum_loops" value="true"/> | |
142 <output name="output" file="mcl-simple-pruning.out" ftype="tabular" /> | |
143 </test> | |
144 <test> | |
145 <param name="input" value="mcl-simple.mci" ftype="txt" /> | |
146 <conditional name="input_type_select"> | |
147 <param name="input_type" value="" /> | |
148 </conditional> | |
149 <output name="output" file="mcl-simple.mci-no-tab.out" ftype="tabular" lines_diff="2"/> | |
150 </test> | |
151 </tests> | |
152 <help> | |
153 <![CDATA[ | |
154 **What it does** | |
155 | |
156 The `Markov Cluster Algorithm`_, aka the MCL algorithm. | |
157 | |
158 The MCL algorithm is short for the Markov Cluster Algorithm, a fast and scalable unsupervised cluster algorithm for graphs (also known as networks) based on simulation of (stochastic) flow in graphs. It has found usage in bioinformatics and other disciplines. | |
159 | |
160 The MCL algorithm finds cluster structure in graphs by a mathematical bootstrapping procedure. The process deterministically computes (the probabilities of) random walks through the graph, and uses two operators transforming one set of probabilities into another. It does so using the language of stochastic matrices (also called Markov matrices) which capture the mathematical concept of random walks on a graph. | |
161 | |
162 The MCL algorithm simulates random walks within a graph by alternation of two operators called expansion and inflation. Expansion coincides with taking the power of a stochastic matrix using the normal matrix product (i.e. matrix squaring). Inflation corresponds with taking the Hadamard power of a matrix (taking powers entrywise), followed by a scaling step, such that the resulting matrix is stochastic again, i.e. the matrix elements (on each column) correspond to probability values. | |
163 | |
164 The basic interface to the algorithm is very simple - you need only one option (the -I flag) to get to the heart of it. The number of clusters cannot be specified. It is implicitly controlled using the inflation parameter. Inflation affects the granularity or resolution of the clustering outcome, with low values (1.3, 1.4) leading to fewer and larger clusters and high values (5, 6) leading to more and smaller clusters; the default value of 2 is a good starting point. For large graphs you should also be aware of the pruning options for regulating resources. | |
165 | |
166 Network construction and reduction techniques should not be considered as part of a clustering algorithm. Nevertheless particular techniques may benefit particular methods or applications. In mcl many transformations are accessible through the *transform* option. It can be used for edge weight transformations and selection, as well as transformations that act on a graph as a whole. It is for example possible to remove edges with weight below 0.7 by issuing -tf 'gq(0.7)', where the quotes are necessary to prevent the shell from interpreting the parentheses. The option accepts more complicated sequences, such as -tf 'gq(0.7),add(-0.7)'. This causes all remaining edge weights to be shifted to the range [0-0.3], assuming that the input contains correlations. Many more transformations are supported, as documented in mcxio_. | |
167 | |
168 .. _Markov Cluster Algorithm: https://micans.org/mcl/man/mcl.html | |
169 .. _mcxio: https://micans.org/mcl/man/mcxio.html | |
170 | |
171 **Input** | |
172 | |
173 MCL supports a number of different input formats. The recommended wayis to use a labeled input (ABC-format) The input is then a file or stream in which each line encodes an edge in terms of two labels (the 'A' and the 'B') and a numerical value (the 'C'), all separated by white space. MCL also supports SIF format and exposes a native matrix representation, which is useful whenever other programs of the mcl-suite are used in tandem:: | |
174 | |
175 Labeled | |
176 This simple format expects two or three fields separated by white space on each line. The first and sec- | |
177 ond fields are interpreted as labels specifying source and destination node respectively. The third fie- | |
178 ld, if present, specifies the weight of the arc connecting the two nodes. | |
179 | |
180 SIF | |
181 This option tells mcl to expect SIF (Simple Interaction File) format. This format is line based. The fi- | |
182 rst two fields specify the source node (as a label) and the relationship type. An arbitrary number of f- | |
183 ields may follow, each containing a label identifying a destination node. The second field is simply ig- | |
184 nored by mcl. As an extension to the SIF format weights may optionally follow the labels, separated from | |
185 them with a colon character. It is in this case necessary to use the --expect-values option. | |
186 | |
187 --expect-values(expect label:weight format) | |
188 accept extended SIF format (label:weight fields) | |
189 | |
190 Matrix | |
191 MCL internal matrix representation. See https://micans.org/mcl/man/mcxio.html#gspec for detailed informa. | |
192 tion. Use -use-tab to write label output using dictionary file | |
193 | |
194 -use-tab <fname> (use mapping to write) | |
195 -use-tab is only useful when matrix input is used. It will use the tab file to convert the output to lab- | |
196 els; it does not fail on indices missing from the tab file, but will bind these to generated dummy labels. | |
197 | |
198 **Options**:: | |
199 | |
200 -I <num> (inflation) | |
201 Sets the main inflation value to <num>. This value is the main handle for affecting cluster granularity. | |
202 It is usually chosen somewhere in the range [1.2-5.0]. -I 5.0 will tend to result in fine-grained clust- | |
203 erings, and -I 1.2 will tend to result in very coarse grained clusterings. Your mileage will vary depen- | |
204 ding on the characteristics of your data. | |
205 | |
206 -tf <tf-spec> (transform) | |
207 transforms the values of the input matrix according to <tf-spec> | |
208 | |
209 -c <num> (reweight loops) | |
210 increase loop-weights <num>-fold | |
211 --sum-loops (set loops to sum of other arcs weights) | |
212 with the -c <num> option, as the final step of loop computation (i.e. after initialization and shadowing) | |
213 all loop weights are multiplied by <num>, if supplied. | |
214 | |
215 --discard-loops=<y/n> (discard loops in input) | |
216 By default mcl will remove any loops that are present in the input. Use --discard-loops=n to turn this | |
217 off. Bear in mind that loops will still be modified in all cases where the loop weight is not maximal a- | |
218 mong the list of edge weights for a given node. | |
219 | |
220 **Pruning options** | |
221 | |
222 After computing a new (column stochastic) matrix vector during expansion (which is matrix multiplication c.q. squaring), the vector is successively exposed to different pruning strategies.The intent of pruning is that many small entries are removed while retaining much of the stochastic mass of the original vector. After pruning, vectors are rescaled to be stochastic again.MCL iterands are theoretically known to be sparse in a weighted sense, and this manoever effectively perturbs the MCL process a little in order to obtain matrices that are genuinely sparse, thus keeping the computation tractable. | |
223 | |
224 mcl proceeds as follows. First, entries that are smaller than cutoff are removed, resulting in a vector with at most 1/cutoff entries. The cutoff can be supplied as the inverse value (1/cutoff) by **-P**. | |
225 | |
226 Second, if the remaining stochastic mass (i.e. the sum of all remaining entries) is less than <pct>/100 (specified by the **-pct** flag) and the number of remaining entries is less than <r> (as specified by the **-R** flag), mcl will try to regain ground by recovering the largest discarded entries. The total number of entries is not allowed to grow larger than <r>. If recovery was not necessary, mcl tries to prune the vector further down to at most s entries (if applicable), as specified by the **-S** flag. If this results in a vector that satisfies the recovery condition then recovery is attempted, exactly as described above. The latter will not occur of course if <r> <= <s>.:: | |
227 | |
228 -P <int> (1/cutoff) | |
229 (inverted) rigid pruning threshold | |
230 | |
231 -S <int> (selection number) | |
232 select down to <int> entries if needed | |
233 | |
234 -R <int> (recover number) | |
235 recover to maximally <int> entries if needed | |
236 | |
237 -pct <pct> (recover percentage) | |
238 try recovery if mass is less than <pct> | |
239 | |
240 **Output** | |
241 | |
242 The output is then a file where each line is a cluster of tab-separated labels. | |
243 | |
244 | |
245 ]]> | |
246 </help> | |
247 <citations> | |
248 <citation type="doi">10.5281/zenodo.3364789</citation> | |
249 <citation type="bibtex"> | |
250 <![CDATA[ | |
251 @article{dongen29graph, | |
252 title={Graph Clustering by Flow Simulation. 2000}, | |
253 author={Dongen, SV}, | |
254 journal={Domplein}, | |
255 volume={29}, | |
256 pages={3512} | |
257 } | |
258 ]]> | |
259 </citation> | |
260 <citation type="doi">10.1093/nar/30.7.1575</citation> | |
261 </citations> | |
262 </tool> |