comparison mcl.xml @ 0:649b9cb20668 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/mcl commit 6fcfaa3d5fefc854ec7398c2848e8db669593b71
author iuc
date Mon, 13 Jun 2022 17:34:32 +0000
parents
children e092787c0a29
comparison
equal deleted inserted replaced
-1:000000000000 0:649b9cb20668
1 <tool id="mcl" name="MCL" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
2 <description>Markov Cluster Algorithm for graphs</description>
3 <macros>
4 <token name="@TOOL_VERSION@">14.137</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
7 <xrefs>
8 <xref type="bio.tools">mcl</xref>
9 </xrefs>
10 <requirements>
11 <requirement type="package" version="@TOOL_VERSION@">mcl</requirement>
12 </requirements>
13 <version_command>mcl --version</version_command>
14 <command detect_errors="aggressive">
15 <![CDATA[
16
17 mcl '$input'
18 -I '$inflation'
19 $input_type_select.input_type
20 -V all -te \${GALAXY_SLOTS:-1}
21 $sum_loops
22 $discard_loops
23 #if $verbosity
24 #for $v in $verbosity
25 -v $v
26 #end for
27 #end if
28 #if $reweight
29 -c $reweight
30 #end if
31 #if $transform
32 #if $input_type_select.input_type == "--abc" or $input_type_select.input_type == "--sif"
33 -abc-tf '$transform'
34 #else if $input_type_select.input_type == ""
35 -tf '$transform'
36 #end if
37 #end if
38 #if $input_type_select.input_type == "--sif"
39 $input_type_select.expect_values
40 #else if $input_type_select.input_type == "" and $input_type_select.use_tab
41 -use-tab $input_type_select.use_tab
42 #end if
43 #if $pruning_options.cutoff
44 -P $pruning_options.cutoff
45 #end if
46 #if $pruning_options.selection_number
47 -S $pruning_options.selection_number
48 #end if
49 #if $pruning_options.recover_number
50 -R $pruning_options.recover_number
51 #end if
52 #if $pruning_options.recover_percentage
53 -pct $pruning_options.recover_percentage
54 #end if
55 -o '$output'
56
57 ]]>
58 </command>
59 <inputs>
60 <conditional name="input_type_select">
61 <param name="input_type" type="select" label="Input type">
62 <option value="--abc" selected="true">Labeled</option>
63 <option value="--sif">SIF</option>
64 <option value="">Matrix</option>
65 </param>
66 <when value="">
67 <param type="data" format="tabular" name="use_tab" argument="-use-tab" label="tabular label input" optional="true" help="use tab file to convert the output to labels"/>
68 </when>
69 <when value="--sif">
70 <param type="boolean" name="expect_values" checked="false" truevalue="--expect-values" falsevalue="" argument="--expect-values" label="expect label:weight format" optional="true" help="accept extended SIF (label:weight fields)" />
71 </when>
72 <when value="--abc" />
73 </conditional>
74 <param type="data" format="txt,tabular,sif" name="input" label="Input" optional="false" />
75 <param name="inflation" argument="-I" type="float" value="2.0" label="Inflation" min="1" max="6" help="This value is the main handle for affecting cluster granularity. It is usually chosen somewhere in the range [1.2-5.0]. -I 5.0 will tend to result in fine-grained clusterings, and -I 1.2 will tend to result in very coarse grained clusterings. Your mileage will vary depending on the characteristics of your data. That is why it is a good idea to test the quality and coherency of your clusterings using clm dist and clm info. This will most likely reveal that certain values of -I are simply not right for your data." />
76 <param name="transform" type="text" area="true" label="Transform input matrix values" help="See https://micans.org/mcl/man/mcxio.html#trans for the transformation syntax" >
77 <sanitizer>
78 <valid initial="default">
79 <add value="#" />
80 </valid>
81 </sanitizer>
82 </param>
83 <param argument="--discard-loops" type="boolean" checked="true" truevalue="--discard-loops=y" falsevalue="--discard-loops=n" label="Discard loops in input" help="Remove any loops that are present in the input. Bear in mind that loops will still be modified in all cases where the loop weight is not maximal among the list of edge weights for a given node." />
84 <param name="reweight" argument="-c" optional="true" type="float" value="1" label="Reweight loops" help="As the final step of loop computation (i.e. after initialization and shadowing) all loop weights are multiplied by the provided value." />
85 <param type="boolean" checked="false" truevalue="--sum-loops" falsevalue="" argument="--sum-loops" label="Set loops to sum of other arcs weights" />
86 <param name="verbosity" type="select" multiple="true" optional="true" label="Verbosity">
87 <option value="pruning">pruning</option>
88 <option value="explain">explain</option>
89 <option value="cls">cls</option>
90 </param>
91 <section title="Pruning options" name="pruning_options">
92 <param name="cutoff" type="integer" value="4000" label="cutoff" argument="-P" optional="true" help="inverse cutoff pruning value. Entries smaller than cutoff are removed"></param>
93 <param name="selection_number" type="integer" value="500" label="selection number" argument="-S" optional="true" help="pruning selection value. maximum number of entries (if applicable)"></param>
94 <param name="recover_number" type="integer" value="600" label="recover number" argument="-R" optional="true" help="Pruning recover number. Revover the largest discarded entries during pruning, if number of entries less then -R"></param>
95 <param name="recover_percentage" type="integer" value="90" label="recover percentage" argument="-pct" min="0" max="100" optional="true" help="Pruning recover percentage. Revover the largest discarded entries during pruning, if sum of remaining entries is less than -pct/100"></param>
96 </section>
97 </inputs>
98 <outputs>
99 <data format="tabular" name="output" />
100 </outputs>
101 <tests>
102 <test expect_num_outputs="1">
103 <param name="input" value="mcl-simple.tabular" ftype="tabular" />
104 <conditional name="input_type_select">
105 <param name="input_type" value="--abc" />
106 </conditional>
107 <param name="transform" value="gq(0.7),add(-0.7)" />
108 <param name="inflation" value="3"/>
109 <output name="output" file="mcl-simple.out" ftype="tabular" />
110 </test>
111 <test expect_num_outputs="1">
112 <param name="input" value="mcl-simple.mci" ftype="txt" />
113 <conditional name="input_type_select">
114 <param name="input_type" value="" />
115 <param name="use_tab" value="mcl-simple.mci.tab" ftype="tabular" />
116 </conditional>
117 <param name="transform" value="gq(0.7),add(-0.7)" />
118 <param name="inflation" value="3"/>
119 <output name="output" file="mcl-simple.out" ftype="tabular" />
120 </test>
121 <test expect_num_outputs="1">
122 <param name="input" value="mcl-simple.sif" ftype="sif" />
123 <conditional name="input_type_select">
124 <param name="input_type" value="--sif" />
125 <param name="expect_values" value="true" ftype="tabular" />
126 </conditional>
127 <param name="transform" value="gq(0.7),add(-0.7)" />
128 <param name="inflation" value="3"/>
129 <output name="output" file="mcl-simple.out" ftype="tabular" />
130 </test>
131 <test expect_num_outputs="1">
132 <param name="input" value="mcl-simple.tabular" ftype="tabular" />
133 <conditional name="input_type_select">
134 <param name="input_type" value="--abc" />
135 </conditional>
136 <param name="cutoff" value="1"/>
137 <param name="recover_number" value="1"/>
138 <param name="selection_number" value="1"/>
139 <param name="discard_loops" value="false"/>
140 <param name="reweight" value="0.5"/>
141 <param name="sum_loops" value="true"/>
142 <output name="output" file="mcl-simple-pruning.out" ftype="tabular" />
143 </test>
144 <test>
145 <param name="input" value="mcl-simple.mci" ftype="txt" />
146 <conditional name="input_type_select">
147 <param name="input_type" value="" />
148 </conditional>
149 <output name="output" file="mcl-simple.mci-no-tab.out" ftype="tabular" lines_diff="2"/>
150 </test>
151 </tests>
152 <help>
153 <![CDATA[
154 **What it does**
155
156 The `Markov Cluster Algorithm`_, aka the MCL algorithm.
157
158 The MCL algorithm is short for the Markov Cluster Algorithm, a fast and scalable unsupervised cluster algorithm for graphs (also known as networks) based on simulation of (stochastic) flow in graphs. It has found usage in bioinformatics and other disciplines.
159
160 The MCL algorithm finds cluster structure in graphs by a mathematical bootstrapping procedure. The process deterministically computes (the probabilities of) random walks through the graph, and uses two operators transforming one set of probabilities into another. It does so using the language of stochastic matrices (also called Markov matrices) which capture the mathematical concept of random walks on a graph.
161
162 The MCL algorithm simulates random walks within a graph by alternation of two operators called expansion and inflation. Expansion coincides with taking the power of a stochastic matrix using the normal matrix product (i.e. matrix squaring). Inflation corresponds with taking the Hadamard power of a matrix (taking powers entrywise), followed by a scaling step, such that the resulting matrix is stochastic again, i.e. the matrix elements (on each column) correspond to probability values.
163
164 The basic interface to the algorithm is very simple - you need only one option (the -I flag) to get to the heart of it. The number of clusters cannot be specified. It is implicitly controlled using the inflation parameter. Inflation affects the granularity or resolution of the clustering outcome, with low values (1.3, 1.4) leading to fewer and larger clusters and high values (5, 6) leading to more and smaller clusters; the default value of 2 is a good starting point. For large graphs you should also be aware of the pruning options for regulating resources.
165
166 Network construction and reduction techniques should not be considered as part of a clustering algorithm. Nevertheless particular techniques may benefit particular methods or applications. In mcl many transformations are accessible through the *transform* option. It can be used for edge weight transformations and selection, as well as transformations that act on a graph as a whole. It is for example possible to remove edges with weight below 0.7 by issuing -tf 'gq(0.7)', where the quotes are necessary to prevent the shell from interpreting the parentheses. The option accepts more complicated sequences, such as -tf 'gq(0.7),add(-0.7)'. This causes all remaining edge weights to be shifted to the range [0-0.3], assuming that the input contains correlations. Many more transformations are supported, as documented in mcxio_.
167
168 .. _Markov Cluster Algorithm: https://micans.org/mcl/man/mcl.html
169 .. _mcxio: https://micans.org/mcl/man/mcxio.html
170
171 **Input**
172
173 MCL supports a number of different input formats. The recommended wayis to use a labeled input (ABC-format) The input is then a file or stream in which each line encodes an edge in terms of two labels (the 'A' and the 'B') and a numerical value (the 'C'), all separated by white space. MCL also supports SIF format and exposes a native matrix representation, which is useful whenever other programs of the mcl-suite are used in tandem::
174
175 Labeled
176 This simple format expects two or three fields separated by white space on each line. The first and sec-
177 ond fields are interpreted as labels specifying source and destination node respectively. The third fie-
178 ld, if present, specifies the weight of the arc connecting the two nodes.
179
180 SIF
181 This option tells mcl to expect SIF (Simple Interaction File) format. This format is line based. The fi-
182 rst two fields specify the source node (as a label) and the relationship type. An arbitrary number of f-
183 ields may follow, each containing a label identifying a destination node. The second field is simply ig-
184 nored by mcl. As an extension to the SIF format weights may optionally follow the labels, separated from
185 them with a colon character. It is in this case necessary to use the --expect-values option.
186
187 --expect-values(expect label:weight format)
188 accept extended SIF format (label:weight fields)
189
190 Matrix
191 MCL internal matrix representation. See https://micans.org/mcl/man/mcxio.html#gspec for detailed informa.
192 tion. Use -use-tab to write label output using dictionary file
193
194 -use-tab <fname> (use mapping to write)
195 -use-tab is only useful when matrix input is used. It will use the tab file to convert the output to lab-
196 els; it does not fail on indices missing from the tab file, but will bind these to generated dummy labels.
197
198 **Options**::
199
200 -I <num> (inflation)
201 Sets the main inflation value to <num>. This value is the main handle for affecting cluster granularity.
202 It is usually chosen somewhere in the range [1.2-5.0]. -I 5.0 will tend to result in fine-grained clust-
203 erings, and -I 1.2 will tend to result in very coarse grained clusterings. Your mileage will vary depen-
204 ding on the characteristics of your data.
205
206 -tf <tf-spec> (transform)
207 transforms the values of the input matrix according to <tf-spec>
208
209 -c <num> (reweight loops)
210 increase loop-weights <num>-fold
211 --sum-loops (set loops to sum of other arcs weights)
212 with the -c <num> option, as the final step of loop computation (i.e. after initialization and shadowing)
213 all loop weights are multiplied by <num>, if supplied.
214
215 --discard-loops=<y/n> (discard loops in input)
216 By default mcl will remove any loops that are present in the input. Use --discard-loops=n to turn this
217 off. Bear in mind that loops will still be modified in all cases where the loop weight is not maximal a-
218 mong the list of edge weights for a given node.
219
220 **Pruning options**
221
222 After computing a new (column stochastic) matrix vector during expansion (which is matrix multiplication c.q. squaring), the vector is successively exposed to different pruning strategies.The intent of pruning is that many small entries are removed while retaining much of the stochastic mass of the original vector. After pruning, vectors are rescaled to be stochastic again.MCL iterands are theoretically known to be sparse in a weighted sense, and this manoever effectively perturbs the MCL process a little in order to obtain matrices that are genuinely sparse, thus keeping the computation tractable.
223
224 mcl proceeds as follows. First, entries that are smaller than cutoff are removed, resulting in a vector with at most 1/cutoff entries. The cutoff can be supplied as the inverse value (1/cutoff) by **-P**.
225
226 Second, if the remaining stochastic mass (i.e. the sum of all remaining entries) is less than <pct>/100 (specified by the **-pct** flag) and the number of remaining entries is less than <r> (as specified by the **-R** flag), mcl will try to regain ground by recovering the largest discarded entries. The total number of entries is not allowed to grow larger than <r>. If recovery was not necessary, mcl tries to prune the vector further down to at most s entries (if applicable), as specified by the **-S** flag. If this results in a vector that satisfies the recovery condition then recovery is attempted, exactly as described above. The latter will not occur of course if <r> <= <s>.::
227
228 -P <int> (1/cutoff)
229 (inverted) rigid pruning threshold
230
231 -S <int> (selection number)
232 select down to <int> entries if needed
233
234 -R <int> (recover number)
235 recover to maximally <int> entries if needed
236
237 -pct <pct> (recover percentage)
238 try recovery if mass is less than <pct>
239
240 **Output**
241
242 The output is then a file where each line is a cluster of tab-separated labels.
243
244
245 ]]>
246 </help>
247 <citations>
248 <citation type="doi">10.5281/zenodo.3364789</citation>
249 <citation type="bibtex">
250 <![CDATA[
251 @article{dongen29graph,
252 title={Graph Clustering by Flow Simulation. 2000},
253 author={Dongen, SV},
254 journal={Domplein},
255 volume={29},
256 pages={3512}
257 }
258 ]]>
259 </citation>
260 <citation type="doi">10.1093/nar/30.7.1575</citation>
261 </citations>
262 </tool>