Mercurial > repos > iuc > raceid_filtnormconf
comparison macros_cluster.xml @ 0:8dc8ff057b0f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit f880060c478d42202df5b78a81329f8af56b1138
author | iuc |
---|---|
date | Thu, 22 Nov 2018 04:44:44 -0500 |
parents | |
children | d55e29ac02e3 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8dc8ff057b0f |
---|---|
1 <macros> | |
2 <macro name="cluster_inputs" > | |
3 <param name="intable" type="data" format="tabular" label="Count Matrix" /> | |
4 <section name="filt" title="Filtering" expanded="true" > | |
5 <param name="mintotal" type="integer" min="1" value="3000" label="Min Transcripts" help="The minimum total transcripts required. Cells with less than mintotal transcripts are filtered out." /> | |
6 <param name="minexpr" type="integer" min="1" value="5" label="Min Expression" help="The minimum required transcript counts of a gene in the minimum number of cells (below)" /> | |
7 <param name="minnumber" type="integer" min="1" value="5" label="Min Cells" help="The minumum number of cells for gene expression to be counted" /> | |
8 <expand macro="use_defaults_no" > | |
9 <param name="knn" type="integer" min="0" value="10" label="K-nearest-neighbours" help="Number of nearest neighbors used to infer corresponding cell types in different batches" /> | |
10 <param name="CGenes" type="text" optional="true" label="CGenes" help="Filter out genes with correlated expression for cell type inference" > | |
11 <expand macro="sanitize_string_vector" /> | |
12 </param> | |
13 <param name="FGenes" type="text" optional="true" label="FGenes" help="Explicitly filter out genes for cell type inference" > | |
14 <expand macro="sanitize_string_vector" /> | |
15 </param> | |
16 <param name="LBatch_regexes" type="text" optional="true" label="Batch Regex" help="List of regexes to capture experimental batches for batch effect correction" > | |
17 <expand macro="sanitize_string_vector" /> | |
18 </param> | |
19 <param name="ccor" type="float" value="0.4" label="CCor" help="Correlation coefficient used as a threshold for determining correlated genes" /> | |
20 <param name="bmode" type="select" label="Batch Mode" help="Method to regress out batch effects" > | |
21 <option value="RaceID" selected="true" >RaceID</option> | |
22 <option value="scran">SCRAN</option> | |
23 </param> | |
24 <conditional name="ccc" > | |
25 <param name="use" type="select" label="Perform Cell-cycle correction?" > | |
26 <option value="yes" >Yes</option> | |
27 <option value="no" selected="true" >No</option> | |
28 </param> | |
29 <when value="no" /> | |
30 <when value="yes" > | |
31 <param name="vset" type="text" optional="true" label="List of Gene Sets" > | |
32 <expand macro="sanitize_string_vector" /> | |
33 </param> | |
34 <param name="pvalue" type="float" value="0.01" min="0" max="1" label="P-value Cutoff" help="P-value cutoff for determining enriched components" /> | |
35 <param name="quant" type="float" value="0.01" min="0" max="1" label="Quantification Fraction" help="Upper and lower fraction of gene loadings use for determining enriched components" /> | |
36 <param name="ncomp" type="integer" min="0" optional="true" label="Number of components to use" help="If left blank, the maximum number of components are used" /><!-- 0 = NULL --> | |
37 <param name="dimr" type="boolean" value="true" label="Derive Components from saturation criterion" /> | |
38 <param name="mode" type="select" label="Type of Component Analysis" help="If ICA is selected, ensure that the number of components value above is sufficiently high" > | |
39 <option value="pca" selected="true">PCA</option> | |
40 <option value="ica">ICA</option> | |
41 </param> | |
42 <param name="logscale" type="boolean" value="false" label="Log-transform data prior to PCA or ICA" help="" /> | |
43 </when> | |
44 </conditional> | |
45 </expand> | |
46 </section> | |
47 <section name="clust" title="Clustering" expanded="true" > | |
48 <!-- CompDist --> | |
49 <param name="metric" type="select" label="Distance Metric" > | |
50 <option value="pearson" selected="true" >Pearson</option> | |
51 <option value="spearman">Spearman</option> | |
52 <option value="logpearson">Log Pearson</option> | |
53 <option value="euclidean">Euclidean</option> | |
54 </param> | |
55 <!-- ClustExp --> | |
56 <param name="funcluster" type="select" label="Clustering method" > | |
57 <option value="kmedoids" selected="true" >K-medoids</option> | |
58 <option value="kmeans">K-means</option> | |
59 <option value="hclust">H-Clust</option> | |
60 </param> | |
61 <expand macro="use_defaults_no" > | |
62 <!-- CompDist --> | |
63 <param name="fselect" type="boolean" value="true" label="Perform feature selection" /> | |
64 <param name="knn" type="integer" min="0" optional="true" label="KNN" help="Number of nearest neighbours for imputing gene expression" /><!-- 0: NULL --> | |
65 <!-- ClustExp --> | |
66 <param name="sat" type="boolean" checked="true" label="Saturation-based clustering?" help="Determine number of clusters on saturation point of the mean within-cluster dispersion as a function of the cluster number." /> | |
67 <param name="clustnr" type="integer" min="0" value="30" label="Max number of clusters using Saturation-by-mean" help="Max number of clusters for the derivation of the cluster number by the saturation of mean within-cluster-dispersion." /> | |
68 <param name="samp" type="integer" min="0" optional="true" label="Sample random number of cells" help="Number of random sample of cells used for the inference of cluster number and Jaccard similarity" /><!-- 0:NULL --> | |
69 <param name="cln" type="integer" min="0" optional="true" label="Number of clusters" /><!-- 0:Null --> | |
70 <param name="bootnr" type="integer" min="0" value="50" label="Number of booststrapping runs" /> | |
71 <param name="rseed" type="integer" value="17000" label="Random seed" /> | |
72 </expand> | |
73 </section> | |
74 <section name="outlier" title="Outliers" expanded="true" > | |
75 <!-- Find Outliers --> | |
76 <param name="outminc" type="integer" min="0" value="5" label="Minimum Transcripts" help="minimal transcript count of a gene in a clusters to be tested for being an outlier gene" /> | |
77 <param name="outlg" type="integer" min="1" value="2" label="Minimum Genes" help="Minimum number of outlier genes required for being an outlier cell" /> | |
78 <!-- RFCorrect --> | |
79 <param name="final" type="boolean" value="true" label="Plot Final Clusters?" help="Reclassification of cell types using out-of-bag analysis is performed based on the final clusters after outlier identification. If 'FALSE', then the cluster partition prior to outlier identification is used for reclassification." /> | |
80 <expand macro="use_defaults_no" > | |
81 <!-- Find Outliers --> | |
82 <param name="probthr" type="float" min="0" value="0.001" label="Outlier Probability Threshold" help="Probability threshold for the above specified minimum number of genes to be an outlier cell. This probability is computed from a negative binomial background model of expression in a cluster" /> | |
83 <param name="outdistquant" type="float" min="0" max="1" value="0.95" label="Outlier Distance Quantile" help="Outlier cells are merged to outlier clusters if their distance smaller than the outdistquant-quantile of the distance distribution of pairs of cells in the orginal clusters after outlier removal" /> | |
84 <!-- RFCorrect --> | |
85 <param name="nbtree" type="integer" optional="true" label="Number of trees to be built" /><!-- 0:Null --> | |
86 <param name="nbfactor" type="integer" min="0" value="5" label="Tree Factor" help="Number of trees based on the number of cells multiplied by this factor. Effective only if the number of trees parameter is set to 0" /> | |
87 <param name="rfseed" type="integer" value="12345" label="Random Seed" /> | |
88 </expand> | |
89 </section> | |
90 <section name="tsne" title="tSNE and FR" expanded="true" > | |
91 <!-- CompTSNE --> | |
92 <param name="perplexity" type="integer" min="0" value="30" label="Perplexity" help="Perplexity of the t-SNE map" /> | |
93 <!-- CompFR --> | |
94 <param name="knn" type="integer" min="0" value="10" label="KNN" help="Number of nearest neighbours used for the inference of the Fruchterman-Rheingold layout" /> | |
95 <expand macro="use_defaults_no" > | |
96 <!-- CompTSNE --> | |
97 <param name="initial_cmd" type="boolean" checked="true" label="tSNE map initialised by classical multidimensional scaling" /> | |
98 <param name="rseed_tsne" type="integer" value="15555" label="Random Seed (tSNE)" /> | |
99 <!-- CompFR --> | |
100 <param name="rseed_fr" type="integer" min="0" value="15555" label="Random Seed (FR)" /> | |
101 </expand> | |
102 </section> | |
103 <section name="extra" title="Extra Parameters" expanded="false" > | |
104 <param name="tablelim" type="integer" min="1" value="25" label="Table Limit" help="Top N genes to print per cluster" /> | |
105 <param name="plotlim" type="integer" min="1" value="10" label="Plot Limit" help="Top N genes to plot. Must be less than or equal to the Table Limit" /> | |
106 <param name="foldchange" type="float" min="0" value="1" label="Fold change" /> | |
107 <param name="pvalue" type="float" min="0" max="1" value="0.01" label="P-value Cutoff" help="P-value cutoff for the inference of differential gene expression" /> | |
108 </section> | |
109 </macro> | |
110 <macro name="cluster_tests" > | |
111 <test> | |
112 <!-- default test --> | |
113 <conditional name="tool" > | |
114 <param name="mode" value="cluster" /> | |
115 <!-- This is a file with a single word 'test', which prompts the scripts to use the test intestinalData in the library --> | |
116 <param name="intable" value="use.intestinal" /> | |
117 </conditional> | |
118 <output name="outgenelist" value="intestinal.genelist" /> | |
119 <output name="outpdf" value="intestinal.pdf" compare="sim_size" delta="50" /> | |
120 </test> | |
121 <test> | |
122 <!-- defaults, feeding in a matrix with reduced filtering --> | |
123 <conditional name="tool" > | |
124 <param name="mode" value="cluster" /> | |
125 <param name="intable" value="matrix.tabular" /> | |
126 <section name="filt" > | |
127 <param name="mintotal" value="1000" /> | |
128 <param name="minexpr" value="1" /> | |
129 <param name="minnumber" value="3" /> | |
130 </section> | |
131 <param name="use_log" value="true" /> | |
132 <output name="outgenelist" value="matrix.genelist" /> | |
133 <output name="outrdat" value="matrix.rdat" compare="sim_size" delta="15" /> | |
134 <output name="outpdf" value="matrix.pdf" compare="sim_size" delta="10" /> | |
135 <output name="outlog" value="matrix.log" /> | |
136 </conditional> | |
137 </test> | |
138 <test> | |
139 <!-- defaults, but manually specified. No opts, no CC. Generates identical to above --> | |
140 <conditional name="tool" > | |
141 <param name="mode" value="cluster" /> | |
142 <param name="intable" value="use.intestinal" /> | |
143 <section name="filt" > | |
144 <param name="mintotal" value="3000" /> | |
145 <param name="minexpr" value="5" /> | |
146 <param name="minnumber" value="5" /> | |
147 <expand macro="test_nondef" > | |
148 <param name="knn" value="10" /> | |
149 <param name="ccor" value="0.4" /> | |
150 <param name="bmode" value="RaceID" /> | |
151 </expand> | |
152 </section> | |
153 <section name="clust" > | |
154 <param name="metric" value="pearson" /> | |
155 <param name="funcluster" value="kmedoids" /> | |
156 <expand macro="test_nondef" > | |
157 <param name="fselect" value="true" /> | |
158 <param name="sat" value="true" /> | |
159 <param name="clustnr" value="30" /> | |
160 <param name="bootnr" value="50" /> | |
161 <param name="rseed" value="17000" /> | |
162 </expand> | |
163 </section> | |
164 <section name="outlier" > | |
165 <param name="outminc" value="5" /> | |
166 <param name="outlg" value="2" /> | |
167 <param name="final" value="false" /> | |
168 <expand macro="test_nondef" section_name="outlier" > | |
169 <param name="probthr" value="0.001" /> | |
170 <param name="outdistquant" value="0.95" /> | |
171 <param name="rfseed" value="12345" /> | |
172 <param name="nbfactor" value="5" /> | |
173 </expand> | |
174 </section> | |
175 <section name="tsne" > | |
176 <param name="perplexity" value="30" /> | |
177 <param name="knn" value="10" /> | |
178 <expand macro="test_nondef" section_name="tsne" > | |
179 <param name="initial_cmd" value="true" /> | |
180 <param name="rseed_tsne" value="15555" /> | |
181 <param name="rfseed_fr" value="15555" /> | |
182 </expand> | |
183 </section> | |
184 </conditional> | |
185 <output name="outgenelist" value="intestinal.genelist" /> | |
186 <output name="outpdf" value="intestinal.pdf" compare="sim_size" delta="50" /> | |
187 </test> | |
188 <test> | |
189 <!-- Advanced. Opts, CC used --> | |
190 <conditional name="tool" > | |
191 <param name="mode" value="cluster" /> | |
192 <param name="intable" value="use.intestinal" /> | |
193 <section name="filt" > | |
194 <param name="mintotal" value="2000" /> | |
195 <param name="minexpr" value="3" /> | |
196 <param name="minnumber" value="2" /> | |
197 <expand macro="test_nondef" > | |
198 <param name="knn" value="5" /> | |
199 <param name="ccor" value="0.5" /> | |
200 <param name="CGenes" value="Gga3,Ggact,Ggct" /> | |
201 <param name="FGenes" value="Zxdc,Zyg11a,Zyg11b,Zyx" /> | |
202 <param name="LBatch_regexes" value="^I5,^II5,^III5,^IV5d,^V5d" /> | |
203 <param name="bmode" value="scran" /> | |
204 <conditional name="ccc" > | |
205 <param name="use" value="yes" /> | |
206 <param name="pvalue" value="0.05" /> | |
207 <param name="quant" value="0.05" /> | |
208 <param name="ncomp" value="3" /> | |
209 <param name="dimr" value="true" /> | |
210 <param name="mode" value="pca" /> | |
211 <param name="logscale" value="true" /> | |
212 </conditional> | |
213 </expand> | |
214 </section> | |
215 <section name="clust" > | |
216 <param name="metric" value="euclidean" /> | |
217 <param name="funcluster" value="hclust" /> | |
218 <expand macro="test_nondef" > | |
219 <param name="fselect" value="false" /> | |
220 <param name="knn" value="5" /> | |
221 <param name="sat" value="false" /> | |
222 <param name="samp" value="10" /> | |
223 <param name="cln" value="10" /> | |
224 <param name="clustnr" value="10" /> | |
225 <param name="bootnr" value="30" /> | |
226 <param name="rseed" value="17000" /> | |
227 </expand> | |
228 </section> | |
229 <section name="outlier" > | |
230 <param name="outminc" value="3" /> | |
231 <param name="outlg" value="5" /> | |
232 <param name="final" value="true" /> | |
233 <expand macro="test_nondef" > | |
234 <param name="probthr" value="0.01" /> | |
235 <param name="outdistquant" value="0.5" /> | |
236 <param name="rfseed" value="12345" /> | |
237 <param name="nbfactor" value="5" /> | |
238 <param name="nbtree" value="10" /> | |
239 </expand> | |
240 </section> | |
241 <section name="tsne" > | |
242 <param name="perplexity" value="20" /> | |
243 <param name="knn" value="6" /> | |
244 <expand macro="test_nondef" > | |
245 <param name="initial_cmd" value="false" /> | |
246 <param name="rseed_tsne" value="15555" /> | |
247 <param name="rfseed_fr" value="15555" /> | |
248 </expand> | |
249 </section> | |
250 </conditional> | |
251 <output name="outgenelist" value="intestinal_advanced.genelist" /> | |
252 <output name="outpdf" value="intestinal_advanced.pdf" compare="sim_size" delta="150" /> | |
253 </test> | |
254 </macro> | |
255 <token name="@FILTNORM_CHEETAH@"><![CDATA[ | |
256 ## Perform do.filter | |
257 use.filtnormconf = TRUE | |
258 | |
259 ## Perform do.cluster, do.outlier, do.clustmap, mkgenelist | |
260 use.cluster = FALSE | |
261 | |
262 in.table = read.table( | |
263 '${intable}', | |
264 stringsAsFactors = F, | |
265 na.strings=c("NA", "-", "?", "."), | |
266 sep='\t', | |
267 header=TRUE, | |
268 row.names=1 | |
269 ) | |
270 | |
271 ## Hidden flag to use test data instead | |
272 ## see: test-data/use.intestinal | |
273 | |
274 use.test.data = (names(in.table)[1] == "test") | |
275 | |
276 sc = NULL | |
277 if (use.test.data) { | |
278 sc = SCseq(intestinalData) | |
279 message("Loading test data from library") | |
280 } else { | |
281 sc = SCseq(in.table) | |
282 } | |
283 | |
284 | |
285 filt = formals(filterdata) | |
286 filt.ccc = formals(CCcorrect) | |
287 filt.use.ccorrect = FALSE | |
288 filt.lbatch.regexes = NULL | |
289 | |
290 filt\$mintotal = as.integer( '$filt.mintotal' ) | |
291 filt\$minexpr = as.integer( '$filt.minexpr' ) | |
292 filt\$minnumber = as.integer( '$filt.minnumber' ) | |
293 #if str($filt.use.def) == "no": | |
294 filt\$knn = as.integer( '$filt.use.knn' ) | |
295 filt\$ccor = as.numeric( '$filt.use.ccor' ) | |
296 filt\$bmode = as.character( '$filt.use.bmode' ) | |
297 #if $filt.use.LBatch_regexes: | |
298 filt.lbatch.regexes = string2textvector( '$filt.use.LBatch_regexes' ) | |
299 #end if | |
300 #if $filt.use.CGenes: | |
301 filt\$CGenes = string2textvector( '$filt.use.CGenes' ) | |
302 #end if | |
303 #if $filt.use.FGenes: | |
304 filt\$FGenes = string2textvector( '$filt.use.FGenes' ) | |
305 #end if | |
306 #if str($filt.use.ccc.use) == "yes" | |
307 filt.use.ccorrect = TRUE | |
308 #if $filt.use.ccc.vset: | |
309 filt.ccc\$vset = string2textvector( '$filt.use.ccc.vset' ) | |
310 #end if | |
311 #if $filt.use.ccc.ncomp: | |
312 filt.ccc\$nComp = as.integer( '$filt.use.ccc.ncomp' ) | |
313 #end if | |
314 filt.ccc\$pvalue = as.numeric( '$filt.use.ccc.pvalue' ) | |
315 filt.ccc\$quant = as.numeric( '$filt.use.ccc.quant' ) | |
316 filt.ccc\$dimR = as.logical( '$filt.use.ccc.dimr' ) | |
317 filt.ccc\$mode = as.character( '$filt.use.ccc.mode.value' ) | |
318 filt.ccc\$logscale = as.logical( '$filt.use.ccc.logscale' ) | |
319 #end if | |
320 #end if | |
321 | |
322 out.pdf = '${outpdf}' | |
323 out.rdat = '${outrdat}' | |
324 | |
325 ]]></token> | |
326 <token name="@CLUSTER_CHEETAH@"><![CDATA[ | |
327 | |
328 in.rdat = readRDS('${inputrds}') | |
329 | |
330 sc = in.rdat | |
331 | |
332 ## Perform do.filter | |
333 use.filtnormconf = FALSE | |
334 | |
335 ## Perform do.cluster, do.outlier, do.clustmap, mkgenelist | |
336 use.cluster = TRUE | |
337 | |
338 | |
339 clust.compdist = formals(compdist) | |
340 clust.clustexp = formals(clustexp) | |
341 clust.compdist\$metric = as.character( '$clust.metric' ) | |
342 clust.clustexp\$FUNcluster = as.character( '$clust.funcluster' ) | |
343 | |
344 #if str($clust.use.def) == "no": | |
345 | |
346 clust.compdist\$FSelect = as.logical( '$clust.use.fselect' ) | |
347 #if $clust.use.knn: | |
348 clust.compdist\$knn = as.integer( '$clust.use.knn' ) | |
349 #end if | |
350 clust.clustexp\$sat = as.logical( '$clust.use.sat' ) | |
351 #if $clust.use.samp: | |
352 clust.clustexp\$samp = as.integer( '$clust.use.samp' ) | |
353 #end if | |
354 #if $clust.use.cln: | |
355 clust.clustexp\$cln = as.integer( '$clust.use.cln' ) | |
356 clust.clustexp\$clustnr = as.integer( '$clust.use.clustnr' ) | |
357 clust.clustexp\$bootnr = as.integer( '$clust.use.bootnr' ) | |
358 ##clust.clustexp\$rseed = as.integer( '$clust.use.rseed' ) | |
359 #end if | |
360 #end if | |
361 | |
362 outlier.use.randomforest = FALSE | |
363 outlier.findoutliers = formals(findoutliers) | |
364 outlier.clustheatmap = formals(clustheatmap) | |
365 outlier.rfcorrect = formals(rfcorrect) | |
366 | |
367 outlier.findoutliers\$outminc = as.integer( '$outlier.outminc' ) | |
368 outlier.findoutliers\$outlg = as.integer( '$outlier.outlg' ) | |
369 outlier.rfcorrect\$final = as.logical( '$outlier.final' ) | |
370 | |
371 #if str($outlier.use.def) == "no": | |
372 #if $outlier.use.nbtree: | |
373 outlier.rfcorrect\$nbtree = as.integer( '$outlier.use.nbtree' ) | |
374 #end if | |
375 outlier.findoutliers\$probthr = as.numeric( '$outlier.use.probthr' ) | |
376 outlier.findoutliers\$outdistquant = as.numeric( '$outlier.use.outdistquant' ) | |
377 ##outlier.rfcorrect\$rfseed = as.integer( '$outlier.use.rfseed' ) | |
378 outlier.rfcorrect\$nbfactor = as.integer( '$outlier.use.nbfactor' ) | |
379 #end if | |
380 | |
381 cluster.comptsne = formals(comptsne) | |
382 cluster.compfr = formals(compfr) | |
383 | |
384 cluster.comptsne\$perplexity = as.integer( '$tsne.perplexity' ) | |
385 cluster.compfr\$knn = as.integer( '$tsne.knn' ) | |
386 #if str($tsne.use.def) == "no": | |
387 cluster.comptsne\$initial_cmd = as.logical( '$tsne.use.initial_cmd' ) | |
388 cluster.comptsne\$rseed = as.integer( '$tsne.use.rseed_tsne' ) | |
389 cluster.compfr\$rseed = as.integer( '$tsne.use.rseed_fr' ) | |
390 #end if | |
391 | |
392 genelist.tablelim = as.integer( '$extra.tablelim' ) | |
393 genelist.plotlim = as.integer( '$extra.plotlim' ) | |
394 genelist.foldchange = as.integer( '$extra.foldchange' ) | |
395 genelist.pvalue = as.numeric( '$extra.pvalue' ) | |
396 | |
397 out.pdf = '${outpdf}' | |
398 out.rdat = '${outrdat}' | |
399 out.genelist = '${outgenelist}' | |
400 | |
401 ]]> | |
402 </token> | |
403 </macros> |