comparison anndata_operations.xml @ 19:53a251c6d991 draft

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit ebe77c8718ec65277f4dc0d71fa5f4c5677df62d-dirty"
author ebi-gxa
date Wed, 05 May 2021 12:15:47 +0000
parents e4bb4666449e
children fe75d2414dc0
comparison
equal deleted inserted replaced
18:e4bb4666449e 19:53a251c6d991
6 </macros> 6 </macros>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code"><![CDATA[
9 ln -s '${input_obj_file}' input.h5 && 9 ln -s '${input_obj_file}' input.h5 &&
10 10
11 #if $copy_r.default:
12 ln -s '${copy_r.r_source}' r_source.h5 &&
13 #end if
14
15 #if $copy_x.default:
16 #for $i, $xs in enumerate($copy_x.xlayers):
17 ln -s '${xs.x_source}' x_source_${i}.h5 &&
18 #end for
19 #end if
20
21 #if $copy_l.default:
22 #for $i, $ls in enumerate($copy_l.layer_sources):
23 ln -s '${ls}' layer_source_${i}.h5 &&
24 #end for
25 #end if
26
11 #if $copy_o.default: 27 #if $copy_o.default:
12 #for $i, $os in enumerate($copy_o.obs_sources): 28 #for $i, $os in enumerate($copy_o.obs_sources):
13 ln -s '${os}' obs_source_${i}.h5 && 29 ln -s '${os}' obs_source_${i}.h5 &&
14 #end for 30 #end for
15 #end if 31 #end if
58 adata.var['${flag.flag}'] = k_cat 74 adata.var['${flag.flag}'] = k_cat
59 qc_vars.append('${flag.flag}') 75 qc_vars.append('${flag.flag}')
60 else: 76 else:
61 logging.warning('No genes starting with {} found, skip calculating expression of {} genes'.format('${flag.startswith}', '${flag.flag}')) 77 logging.warning('No genes starting with {} found, skip calculating expression of {} genes'.format('${flag.startswith}', '${flag.flag}'))
62 #end for 78 #end for
79
80 #if $copy_r.default and $copy_r.r_source:
81 ad_s = sc.read('r_source.h5')
82 if not all(adata.obs.index.isin(ad_s.obs.index)):
83 logging.error("Specified object for .raw must contain all .obs from main object.")
84 sys.exit(1)
85 else:
86 adata.raw = ad_s[adata.obs.index]
87 del ad_s
88 #end if
89
90 #if $copy_x.default and len($copy_x.xlayers) > 0:
91 #for $i, $x_s in enumerate($copy_x.xlayers):
92 ad_s = sc.read('x_source_${i}.h5')
93 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
94 #set xs=$copy_x.xlayers[$i]
95 if "${xs.dest}" == '':
96 logging.error("%sth destination layer for %sth X source not specified" % ("${i}", "${i}"))
97 sys.exit(1)
98 adata.layers["${xs.dest}"] = ad_s.X
99 else:
100 logging.error("X source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
101 sys.exit(1)
102 del ad_s
103 #end for
104 #end if
105
106 #if $copy_l.default and len($copy_l.layers) > 0:
107 #for $i, $layer_s in enumerate($copy_l.layer_sources):
108 ad_s = sc.read('layer_source_${i}.h5')
109 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
110 #for $j, $l_key in enumerate($copy_l.layers):
111 layers_to_copy = (k for k in ad_s.layers.keys() if "${l_key.contains}" in k)
112 for l_to_copy in layers_to_copy:
113 suffix=''
114 if l_to_copy in adata.layers:
115 suffix = "_${i}"
116
117 adata.layers[l_to_copy+suffix] = ad_s.layers[l_to_copy]
118 #end for
119 else:
120 logging.error("Layer source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
121 sys.exit(1)
122 del ad_s
123 #end for
124 #end if
63 125
64 #if $copy_o.default and len($copy_o.obs_keys) > 0: 126 #if $copy_o.default and len($copy_o.obs_keys) > 0:
65 #for $i, $obs_s in enumerate($copy_o.obs_sources): 127 #for $i, $obs_s in enumerate($copy_o.obs_sources):
66 ad_s = sc.read('obs_source_${i}.h5') 128 ad_s = sc.read('obs_source_${i}.h5')
67 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names): 129 if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
75 adata.obs[[k_to_copy+suffix]] = ad_s.obs[[k_to_copy]] 137 adata.obs[[k_to_copy+suffix]] = ad_s.obs[[k_to_copy]]
76 if k_to_copy in ad_s.uns.keys(): 138 if k_to_copy in ad_s.uns.keys():
77 adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy] 139 adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy]
78 #end for 140 #end for
79 else: 141 else:
80 logging.warning("Observation source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.") 142 logging.error("Observation source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
143 sys.exit(1)
144 del ad_s
81 #end for 145 #end for
82 #end if 146 #end if
83 147
84 148
85 #if $copy_e.default and len($copy_e.embedding_keys) > 0: 149 #if $copy_e.default and len($copy_e.embedding_keys) > 0:
93 if k_to_copy in adata.obsm: 157 if k_to_copy in adata.obsm:
94 suffix = "_${i}" 158 suffix = "_${i}"
95 adata.obsm[k_to_copy+suffix] = ad_s.obsm[k_to_copy] 159 adata.obsm[k_to_copy+suffix] = ad_s.obsm[k_to_copy]
96 #end for 160 #end for
97 else: 161 else:
98 logging.warning("Embedding source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.") 162 logging.error("Embedding source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
163 sys.exit(1)
164 del ad_s
99 #end for 165 #end for
100 #end if 166 #end if
101 167
102 #if $copy_u.default and len($copy_u.uns_keys) > 0: 168 #if $copy_u.default and len($copy_u.uns_keys) > 0:
103 #for $i, $uns_s in enumerate($copy_u.uns_sources): 169 #for $i, $uns_s in enumerate($copy_u.uns_sources):
110 if k_to_copy in adata.uns: 176 if k_to_copy in adata.uns:
111 suffix="_${i}" 177 suffix="_${i}"
112 adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy] 178 adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy]
113 #end for 179 #end for
114 else: 180 else:
115 logging.warning("Uns source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.") 181 logging.error("Uns source ${i} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
182 sys.exit(1)
183 del ad_s
116 #end for 184 #end for
117 #end if 185 #end if
118 186
119 #if $sanitize_varm: 187 #if $sanitize_varm:
120 if hasattr(adata, 'raw') and hasattr(adata.raw, 'X') and hasattr(adata.raw, 'var'): 188 if hasattr(adata, 'raw') and hasattr(adata.raw, 'X') and hasattr(adata.raw, 'var'):
156 <repeat name="gene_flags" title="Flag genes that start with these names"> 224 <repeat name="gene_flags" title="Flag genes that start with these names">
157 <param name="startswith" type="text" label="Starts with" help="Text that you expect the genes to be flagged to start with, such as 'MT-' for mito genes"/> 225 <param name="startswith" type="text" label="Starts with" help="Text that you expect the genes to be flagged to start with, such as 'MT-' for mito genes"/>
158 <param name="flag" type="text" label="Var name" help="Name of the column in var.names where this boolean flag is stored, for example 'mito' for mitochondrial genes."/> 226 <param name="flag" type="text" label="Var name" help="Name of the column in var.names where this boolean flag is stored, for example 'mito' for mitochondrial genes."/>
159 </repeat> 227 </repeat>
160 <param name="top_genes" label="Number of top genes" value='50' help="to calculate percentage of the flagged genes in that number of top genes. Used by sc.pp.calculate_qc_metrics (integer)." type="integer"/> 228 <param name="top_genes" label="Number of top genes" value='50' help="to calculate percentage of the flagged genes in that number of top genes. Used by sc.pp.calculate_qc_metrics (integer)." type="integer"/>
229 <conditional name="copy_r">
230 <param name="default" type="boolean" checked="false" label="Copy adata.X to adata.raw"/>
231 <when value="true">
232 <param name="r_source" type="data" label="AnnData object .X with to copy to .raw" help="Copies adata (subset to matching obs) from this AnnData object into the main input as .raw. Make sure to use an AnnData object containing all .obs in the main input." format="h5,h5ad" />
233 </when>
234 <when value="false"/>
235 </conditional>
236 <conditional name="copy_x">
237 <param name="default" type="boolean" checked="false" label="Copy .X to layers"/>
238 <when value="true">
239 <repeat name="xlayers" title="Source objects for .X and paired destination layers in the main AnnData object" help="make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." min="1">
240 <param name="x_source" type="data" label="AnnData objects with .X to copy" help="Extracts .X from these AnnData objects and merges them into the main input as layers. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." format="h5,h5ad" />
241 <param name="dest" type="text" label="Destination layer" />
242 </repeat>
243 </when>
244 <when value="false"/>
245 </conditional>
246 <conditional name="copy_l">
247 <param name="default" type="boolean" checked="false" label="Copy layers"/>
248 <when value="true">
249 <repeat name="layers" title="Layers from which matrices will be copied" help="will copy all layers in the given AnnData object to the main AnnData object. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." min="1">
250 <param name="contains" type="text" label="Key contains" help="Keys to be copied need to contain the text set here."/>
251 </repeat>
252 <param name="layer_sources" type="data" label="AnnData objects with layers to copy" help="Extracts layers from these AnnData objects and merges them into the main input. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." format="h5,h5ad" multiple="true"/>
253 </when>
254 <when value="false"/>
255 </conditional>
161 <conditional name="copy_o"> 256 <conditional name="copy_o">
162 <param name="default" type="boolean" checked="false" label="Copy observations (such as clusters)"/> 257 <param name="default" type="boolean" checked="false" label="Copy observations (such as clusters)"/>
163 <when value="true"> 258 <when value="true">
164 <repeat name="obs_keys" title="Keys from obs to copy" help="will copy all obs keys in the given AnnData object to the main AnnData object. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data. You can use this to copy clusters. Uns elements with the same name will be also transferred." min="1"> 259 <repeat name="obs_keys" title="Keys from obs to copy" help="will copy all obs keys in the given AnnData object to the main AnnData object. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data. You can use this to copy clusters. Uns elements with the same name will be also transferred." min="1">
165 <param name="contains" type="text" label="Key contains" help="Keys to be copied need to contain the text set here."/> 260 <param name="contains" type="text" label="Key contains" help="Keys to be copied need to contain the text set here."/>
200 <test> 295 <test>
201 <param name="input_obj_file" value="find_cluster.h5"/> 296 <param name="input_obj_file" value="find_cluster.h5"/>
202 <param name="input_format" value="anndata"/> 297 <param name="input_format" value="anndata"/>
203 <param name="color_by" value="louvain"/> 298 <param name="color_by" value="louvain"/>
204 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/> 299 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/>
300 </test>
301 <test>
302 <param name="input_obj_file" value="find_cluster.h5"/>
303 <param name="input_format" value="anndata"/>
304 <conditional name="copy_r">
305 <param name="default" value="true"/>
306 <param name="r_source" value="read_10x.h5"/>
307 </conditional>
308 <output name="output_h5ad" file="anndata_ops_raw.h5" ftype="h5ad" compare="sim_size">
309 <assert_contents>
310 <has_h5_keys keys="raw/X" />
311 </assert_contents>
312 </output>
313 </test>
314 <test>
315 <param name="input_obj_file" value="normalise_data.h5"/>
316 <param name="input_format" value="anndata"/>
317 <conditional name="copy_x">
318 <param name="default" value="true"/>
319 <repeat name="xlayers">
320 <param name="x_source" value='filter_genes.h5'/>
321 <param name="dest" value='filtered'/>
322 </repeat>
323 </conditional>
324 <output name="output_h5ad" file="anndata_ops_xlayer.h5" ftype="h5ad" compare="sim_size">
325 <assert_contents>
326 <has_h5_keys keys="layers/filtered" />
327 </assert_contents>
328 </output>
329 </test>
330 <test>
331 <param name="input_obj_file" value="find_cluster.h5"/>
332 <param name="input_format" value="anndata"/>
333 <conditional name="copy_l">
334 <param name="default" value="true"/>
335 <repeat name="layers">
336 <param name="contains" value='filtered'/>
337 </repeat>
338 <param name="layer_sources" value='anndata_ops_xlayer.h5'/>
339 </conditional>
340 <output name="output_h5ad" file="anndata_ops_layer.h5" ftype="h5ad" compare="sim_size">
341 <assert_contents>
342 <has_h5_keys keys="layers/filtered" />
343 </assert_contents>
344 </output>
205 </test> 345 </test>
206 </tests> 346 </tests>
207 347
208 <help><![CDATA[ 348 <help><![CDATA[
209 ============================= 349 =============================