comparison scanpy-integrate-bbknn.xml @ 0:e6d5b3fed639 draft

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 62f47287c7e8449c59a1f1f454852ddc669b1b1e-dirty"
author ebi-gxa
date Mon, 07 Sep 2020 13:05:34 +0000
parents
children c2be8a21eaa2
comparison
equal deleted inserted replaced
-1:000000000000 0:e6d5b3fed639
1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id="scanpy_integrate_bbknn" name="Scanpy BBKNN" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
3 <description>batch-balanced K-nearest neighbours</description>
4 <macros>
5 <import>scanpy_macros2.xml</import>
6 </macros>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 #if $batch_key
10 ln -s '${input_obj_file}' input.h5 &&
11 PYTHONIOENCODING=utf-8 scanpy-integrate bbknn
12 --batch-key '${batch_key}'
13 #if $settings.default == "false"
14 #if $settings.use_rep
15 --use-rep '${settings.use_rep}'
16 #end if
17 #if $settings.key_added
18 --key-added '${settings.key_added}'
19 #end if
20 #if $settings.n_pcs
21 --n-pcs '${settings.n_pcs}'
22 #end if
23 #if not $settings.approx
24 ${settings.approx}
25 #end if
26 #if $settings.metric
27 --metric '${settings.metric}'
28 #end if
29 #if $settings.neighbors_within_batch
30 --neighbors-within-batch '${settings.neighbors_within_batch}'
31 #end if
32 #if $settings.trim
33 --trim '${settings.trim}'
34 #end if
35 #if $settings.n_trees
36 --n-trees '${settings.n_trees}'
37 #end if
38 #if not $settings.use_faiss
39 ${settings.use_faiss}
40 #end if
41 #if $settings.set_op_mix_ratio
42 --set-op-mix-ratio '${settings.set_op_mix_ratio}'
43 #end if
44 #if $settings.local_connectivity
45 --local-connectivity '${settings.local_connectivity}'
46 #end if
47 #end if
48 @INPUT_OPTS@
49 @OUTPUT_OPTS@
50 #else
51 echo "No batch variables passed, simply passing original input as output unchanged.";
52 cp '${input_obj_file}' '${output_h5}'
53 #end if
54 ]]></command>
55
56 <inputs>
57 <expand macro="input_object_params"/>
58 <expand macro="output_object_params"/>
59 <param name="batch_key" type="text" argument="--batch-key" label="The name of the column in adata.obs that differentiates among experiments/batches.">
60 <sanitizer>
61 <valid initial="string.printable"/>
62 </sanitizer>
63 </param>
64 <conditional name="settings">
65 <param name="default" type="boolean" checked="true" label="Use programme defaults"/>
66 <when value="true"/>
67 <when value="false">
68 <param name="use_rep" argument="--use-rep" type="text" value='X_pca' label="The dimensionality reduction in .obsm to use for neighbour detection.">
69 <sanitizer>
70 <valid initial="string.printable"/>
71 </sanitizer>
72 </param>
73 <param name="key_added" argument="--key-added" type="text" optional="true" label="Key under which to add the computed results." help="If not specified, the neighbors data is stored in .uns[‘neighbors’], distances and connectivities are stored in .obsp[‘distances’] and .obsp[‘connectivities’] respectively. If specified, the neighbors data is added to .uns[key_added], distances are stored in .obsp[key_added+’_distances’] and connectivities in .obsp[key_added+’_connectivities’].">
74 <sanitizer>
75 <valid initial="string.printable"/>
76 </sanitizer>
77 </param>
78 <param name="n_pcs" argument="--n-pcs" type="integer" value="50" optional="true" label="Number of PCs to use"/>
79 <param name="approx" argument="--no-approx" type="boolean" truevalue="" falsevalue="--no-approx" checked="True"
80 label="Use annoy’s approximate neighbour finding?" help="This results in a quicker run time for large datasets while also potentially increasing the degree of batch correction."/>
81 <param name="metric" argument="--metric" type="select" label="A known metric’s name">
82 <option value="angular" selected="true">angular</option>
83 <option value="euclidean">Euclidean</option>
84 <option value="cityblock">cityblock</option>
85 <option value="cosine">cosine</option>
86 <option value="l1">l1</option>
87 <option value="l2">l2</option>
88 <option value="manhattan">manhattan</option>
89 <option value="braycurtis">braycurtis</option>
90 <option value="canberra">canberra</option>
91 <option value="chebyshev">chebyshev</option>
92 <option value="correlation">correlation</option>
93 <option value="dice">dice</option>
94 <option value="hamming">hamming</option>
95 <option value="jaccard">jaccard</option>
96 <option value="kulsinski">kulsinski</option>
97 <option value="mahalanobis">mahalanobis</option>
98 <option value="minkowski">minkowski</option>
99 <option value="rogerstanimoto">rogerstanimoto</option>
100 <option value="russelrao">russelrao</option>
101 <option value="seuclidan">seuclidian</option>
102 <option value="sokalmichener">sokalmichener</option>
103 <option value="sokalsneath">sokalsneath</option>
104 <option value="sqeuclidean">sqeuclidean</option>
105 <option value="yule">yule</option>
106 </param>
107 <param name="neighbors_within_batch" argument="--neighbors-within-batch" type="integer" value="3" optional="true" label="How many top neighbours to report for each batch" help="Total number of neighbours will be this number times the number of batches."/>
108 <param name="trim" argument="--trim" type="integer" value="" optional="true" label="Trim the neighbours of each cell to these many top connectivities." help="May help with population independence and improve the tidiness of clustering. The lower the value the more independent the individual populations, at the cost of more conserved batch effect. If not set, sets the parameter value automatically to 10 times the total number of neighbours for each cell. Set to 0 to skip."/>
109 <param name="n_trees" argument="--n-trees" type="integer" value="10" optional="true" label="The number of trees to construct in the annoy forest." help="More trees give higher precision when querying, at the cost of increased run time and resource intensity."/>
110 <param name="use_faiss" argument="--no-use-faiss" type="boolean" truevalue="" falsevalue="--no-use-faiss" checked="True"
111 label="Use the faiss package to compute nearest neighbours if installed" help="If approx=False and the metric is 'euclidean' use the faiss package to compute nearest neighbours if installed. This improves performance at a minor cost to numerical precision as faiss operates on float32."/>
112 <param name="set_op_mix_ratio" argument="--set-op-mix-ratio" type="float" value="1" min="0" max="1" label="UMAP connectivity computation parameter" help="Float between 0 and 1, controlling the blend between a connectivity matrix formed exclusively from mutual nearest neighbour pairs (0) and a union of all observed neighbour relationships with the mutual pairs emphasised (1)."/>
113 <param name="local_connectivity" argument="--local-connectivity" type="integer" value="1" label="UMAP connectivity computation parameter, how many nearest neighbors of each cell are assumed to be fully connected (and given a connectivity value of 1)."/>
114 </when>
115 </conditional>
116 </inputs>
117
118 <outputs>
119 <expand macro="output_data_obj" description="Batch-corrected for ${batch_key}"/>
120 </outputs>
121
122 <tests>
123 <test>
124 <param name="input_obj_file" value="find_cluster.h5"/>
125 <param name="input_format" value="anndata"/>
126 <param name="output_format" value="anndata"/>
127 <param name="batch_key" value="louvain"/>
128 <output name="output_h5" file="bbknn.h5" ftype="h5" compare="sim_size"/>
129 </test>
130 </tests>
131
132 <help><![CDATA[
133 .. class:: infomark
134
135 **What it does**
136
137 Batch balanced kNN alters the kNN procedure to identify each cell’s top neighbours in each batch separately instead of the entire cell pool with no accounting for batch. Aligns batches in a quick and lightweight manner.
138
139 Use as an alternative to Scanpy ComputeGraph.
140
141 @HELP@
142
143 @VERSION_HISTORY@
144 ]]></help>
145 <expand macro="citations"/>
146 </tool>