comparison anndata_operations.xml @ 28:a0274bc43b7e draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit d01fa18235ac692874cd3cfddef33696c2df8ac1-dirty
author ebi-gxa
date Sat, 15 Jul 2023 08:44:46 +0000
parents 7ebc22f77d86
children 2e8022f1923c
comparison
equal deleted inserted replaced
27:7ebc22f77d86 28:a0274bc43b7e
1 <?xml version="1.0" encoding="utf-8"?> 1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy91" profile="@PROFILE@"> 2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy92" profile="@PROFILE@">
3 <description>modifies metadata and flags genes</description> 3 <description>modifies metadata and flags genes</description>
4 <macros> 4 <macros>
5 <import>scanpy_macros2.xml</import> 5 <import>scanpy_macros2.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
48 python $operations 48 python $operations
49 ]]></command> 49 ]]></command>
50 <configfiles> 50 <configfiles>
51 <configfile name="operations"> 51 <configfile name="operations">
52 import gc 52 import gc
53 from os import makedirs
53 import scanpy as sc 54 import scanpy as sc
54 import anndata 55 import anndata
55 from numpy import all 56 from numpy import all
56 import logging 57 import logging
57 58
59 if new_field is None: 60 if new_field is None:
60 new_field = f"{field}_u" 61 new_field = f"{field}_u"
61 appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '') 62 appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '')
62 df[new_field] = df[field].astype(str) + appendents.astype(str) 63 df[new_field] = df[field].astype(str) + appendents.astype(str)
63 return df 64 return df
65
66 adata = sc.read('input.h5')
64 67
65 adata = sc.read('input.h5')
66
67 #if $add_cell_metadata.default: 68 #if $add_cell_metadata.default:
68 import pandas as pd 69 import pandas as pd
69 70
70 def add_cell_metadata(ad, metadata_file="cell_metadata.tsv", drop_duplicates=True): 71 def add_cell_metadata(ad, metadata_file="cell_metadata.tsv", drop_duplicates=True):
71 metadata_df = pd.read_csv(metadata_file, sep="\t", index_col=0) 72 metadata_df = pd.read_csv(metadata_file, sep="\t", index_col=0)
94 if prev_dtype == str or prev_dtype == object: 95 if prev_dtype == str or prev_dtype == object:
95 prev_dtype = "category" 96 prev_dtype = "category"
96 print(f"Changing {col} from {merged_obs[col].dtype} to {prev_dtype}") 97 print(f"Changing {col} from {merged_obs[col].dtype} to {prev_dtype}")
97 merged_obs[col] = merged_obs[col].astype(prev_dtype) 98 merged_obs[col] = merged_obs[col].astype(prev_dtype)
98 return merged_obs 99 return merged_obs
99 100
100 adata.obs = add_cell_metadata(adata) 101 adata.obs = add_cell_metadata(adata)
101 #end if 102 #end if
102 103
103 #if $copy_adata_to_raw: 104 #if $copy_adata_to_raw:
104 adata.raw = adata 105 adata.raw = adata
106 #end if
107
108 #if $swap_layer_to_x.default:
109 #if $swap_layer_to_x.new_name_x:
110 adata.layers['${swap_layer_to_x.new_name_x}'] = adata.X
111 #end if
112 adata.X = adata.layers['${swap_layer_to_x.layer}']
113 del adata.layers['${swap_layer_to_x.layer}']
105 #end if 114 #end if
106 115
107 gene_name = '${gene_symbols_field}' 116 gene_name = '${gene_symbols_field}'
108 qc_vars = list() 117 qc_vars = list()
109 118
284 if 'n_cells' not in adata.var.columns: 293 if 'n_cells' not in adata.var.columns:
285 sc.pp.filter_genes(adata, min_cells=0) 294 sc.pp.filter_genes(adata, min_cells=0)
286 if 'n_counts' not in adata.var.columns: 295 if 'n_counts' not in adata.var.columns:
287 sc.pp.filter_genes(adata, min_counts=0) 296 sc.pp.filter_genes(adata, min_counts=0)
288 297
298 #if not $split_on_obs.default or $split_on_obs.output_main:
289 adata.write('output.h5', compression='gzip') 299 adata.write('output.h5', compression='gzip')
300 #end if
301
302 #if $split_on_obs.default:
303 s = 0
304 res_dir = "output_split"
305 makedirs(res_dir, exist_ok=True)
306 for field_value in adata.obs["${split_on_obs.key}"].unique():
307 ad_s = adata[adata.obs.${split_on_obs.key} == field_value]
308 ad_s.write(f"{res_dir}/${split_on_obs.key}_{s}.h5", compression='gzip')
309 if s > 0:
310 gc.collect()
311 s += 1
312 #end if
313
314
290 </configfile> 315 </configfile>
291 </configfiles> 316 </configfiles>
292 317
293 <inputs> 318 <inputs>
294 <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in hdf5 AnnData format"/> 319 <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in hdf5 AnnData format"/>
295 <expand macro="output_object_params_no_loom"/> 320 <expand macro="output_object_params_no_loom"/>
321 <conditional name="swap_layer_to_x">
322 <param name="default" type="boolean" checked="false" label="Swap layer to X"/>
323 <when value="true">
324 <param name="layer" type="text" value="" label="Name of layer to swap to X" help="This layer name needs to exist within ad.layers or this will fail."/>
325 <param name="new_name_x" type="text" value="old_X" label="Name of the new slot for X within layers" help="Leave empty and the old X will be lost."/>
326 </when>
327 <when value="false"/>
328 </conditional>
296 <conditional name="add_cell_metadata"> 329 <conditional name="add_cell_metadata">
297 <param name="default" type="boolean" checked="false" label="Merge additional cell metadata"/> 330 <param name="default" type="boolean" checked="false" label="Merge additional cell metadata"/>
298 <when value="true"> 331 <when value="true">
299 <param name="file" type="data" label="Cell metadata with headers" help="A tabular file with headers, where the first column contains cell barcodes. Will be merged via a left join, so not all cells in the obs need to be in the metadata. Currently duplicated column headers will be ignored and the originals in the AnnData will be kept." format="tsv,tabular"/> 332 <param name="file" type="data" label="Cell metadata with headers" help="A tabular file with headers, where the first column contains cell barcodes. Will be merged via a left join, so not all cells in the obs need to be in the metadata. Currently duplicated column headers will be ignored and the originals in the AnnData will be kept." format="tsv,tabular"/>
300 </when> 333 </when>
385 </repeat> 418 </repeat>
386 <param name="uns_sources" type="data" label="AnnData objects with uns to copy" help="Extracts uns (such as ranked_genes_groups) from these AnnData objects and merges them into the main input. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." format="h5,h5ad" multiple="true"/> 419 <param name="uns_sources" type="data" label="AnnData objects with uns to copy" help="Extracts uns (such as ranked_genes_groups) from these AnnData objects and merges them into the main input. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." format="h5,h5ad" multiple="true"/>
387 </when> 420 </when>
388 </conditional> 421 </conditional>
389 <param name="sanitize_varm" type="boolean" checked="false" label="Sanitise any null raw.varm objects if any" help="This might be relevant for interfacing with newer versions of AnnData, that might complain if .raw includes a varm null object."/> 422 <param name="sanitize_varm" type="boolean" checked="false" label="Sanitise any null raw.varm objects if any" help="This might be relevant for interfacing with newer versions of AnnData, that might complain if .raw includes a varm null object."/>
423 <conditional name="split_on_obs">
424 <param name="default" type="boolean" checked="false" label="Split on obs" help="Split the AnnData object into multiple AnnData objects based on the values of a given obs key. This is useful for example to split a dataset based on a cluster annotation."/>
425 <when value="true">
426 <param name="key" type="text" label="Obs key to split on" help="The obs key to split on. For example, if you want to split on cluster annotation, you can use the key 'louvain'."/>
427 <param name="output_main" type="boolean" checked="true" label="Output main AnnData object" help="If checked, the main AnnData object will be outputted as well."/>
428 </when>
429 <when value="false"/>
430 </conditional>
390 </inputs> 431 </inputs>
391 432
392 <outputs> 433 <outputs>
393 <expand macro="output_data_obj_no_loom" description="metadata changes on"/> 434 <data name="output_h5ad" format="h5ad" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData">
435 <filter>output_format == 'anndata_h5ad' and (('output_main' in split_on_obs and split_on_obs['output_main']) or (not split_on_obs['default']))</filter>
436 </data>
437 <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData (h5)">
438 <filter>output_format == 'anndata' and (('output_main' in split_on_obs and split_on_obs['output_main']) or (not split_on_obs['default']))</filter>
439 </data>
440 <collection name="output_h5ad_split" type="list" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData split">
441 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.h5" directory="output_split" format="h5ad" visible="true"/>
442 <filter>split_on_obs['default']</filter>
443 </collection>
394 </outputs> 444 </outputs>
395 445
396 <tests> 446 <tests>
397 <test> 447 <test>
398 <param name="input_obj_file" value="find_cluster.h5"/> 448 <param name="input_obj_file" value="find_cluster.h5"/>
399 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/> 449 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/>
450 </test>
451 <test>
452 <param name="input_obj_file" value="mnn.h5"/>
453 <conditional name="swap_layer_to_x">
454 <param name="default" value="true"/>
455 <param name="layer" value="mnn"/>
456 <param name="new_name_x" value="X_old"/>
457 </conditional>
458 <output name="output_h5ad" ftype="h5ad">
459 <assert_contents>
460 <has_h5_keys keys="layers/X_old" />
461 </assert_contents>
462 </output>
400 </test> 463 </test>
401 <test> 464 <test>
402 <param name="input_obj_file" value="anndata_ops.h5"/> 465 <param name="input_obj_file" value="anndata_ops.h5"/>
403 <param name="from_var" value = "gene_symbols" /> 466 <param name="from_var" value = "gene_symbols" />
404 <param name="to_var" value = "hello_all" /> 467 <param name="to_var" value = "hello_all" />
481 <output name="output_h5ad" file="anndata_ops_layer.h5" ftype="h5ad" compare="sim_size"> 544 <output name="output_h5ad" file="anndata_ops_layer.h5" ftype="h5ad" compare="sim_size">
482 <assert_contents> 545 <assert_contents>
483 <has_h5_keys keys="layers/filtered" /> 546 <has_h5_keys keys="layers/filtered" />
484 </assert_contents> 547 </assert_contents>
485 </output> 548 </output>
549 </test>
550 <test>
551 <param name="input_obj_file" value="find_cluster.h5"/>
552 <conditional name="split_on_obs">
553 <param name="default" value="true"/>
554 <param name="key" value="louvain"/>
555 <param name="output_main" value="true"/>
556 </conditional>
557 <output name="output_h5ad" ftype="h5ad">
558 <assert_contents>
559 <has_h5_keys keys="obs/louvain" />
560 </assert_contents>
561 </output>
562 <output_collection name="output_h5ad_split" type="list" count="5">
563 <element name="louvain_0" ftype="h5ad">
564 <assert_contents>
565 <has_h5_keys keys="obs/louvain" />
566 </assert_contents>
567 </element>
568 </output_collection>
486 </test> 569 </test>
487 </tests> 570 </tests>
488 571
489 <help><![CDATA[ 572 <help><![CDATA[
490 ============================= 573 =============================