Mercurial > repos > ebi-gxa > anndata_ops
changeset 28:a0274bc43b7e draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit d01fa18235ac692874cd3cfddef33696c2df8ac1-dirty
author | ebi-gxa |
---|---|
date | Sat, 15 Jul 2023 08:44:46 +0000 |
parents | 7ebc22f77d86 |
children | 2e8022f1923c |
files | anndata_operations.xml |
diffstat | 1 files changed, 88 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/anndata_operations.xml Fri Apr 14 13:12:01 2023 +0000 +++ b/anndata_operations.xml Sat Jul 15 08:44:46 2023 +0000 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="utf-8"?> -<tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy91" profile="@PROFILE@"> +<tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy92" profile="@PROFILE@"> <description>modifies metadata and flags genes</description> <macros> <import>scanpy_macros2.xml</import> @@ -50,6 +50,7 @@ <configfiles> <configfile name="operations"> import gc +from os import makedirs import scanpy as sc import anndata from numpy import all @@ -61,9 +62,9 @@ appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '') df[new_field] = df[field].astype(str) + appendents.astype(str) return df - + adata = sc.read('input.h5') - + #if $add_cell_metadata.default: import pandas as pd @@ -96,7 +97,7 @@ print(f"Changing {col} from {merged_obs[col].dtype} to {prev_dtype}") merged_obs[col] = merged_obs[col].astype(prev_dtype) return merged_obs - + adata.obs = add_cell_metadata(adata) #end if @@ -104,6 +105,14 @@ adata.raw = adata #end if +#if $swap_layer_to_x.default: +#if $swap_layer_to_x.new_name_x: +adata.layers['${swap_layer_to_x.new_name_x}'] = adata.X +#end if +adata.X = adata.layers['${swap_layer_to_x.layer}'] +del adata.layers['${swap_layer_to_x.layer}'] +#end if + gene_name = '${gene_symbols_field}' qc_vars = list() @@ -286,13 +295,37 @@ if 'n_counts' not in adata.var.columns: sc.pp.filter_genes(adata, min_counts=0) +#if not $split_on_obs.default or $split_on_obs.output_main: adata.write('output.h5', compression='gzip') +#end if + +#if $split_on_obs.default: +s = 0 +res_dir = "output_split" +makedirs(res_dir, exist_ok=True) +for field_value in adata.obs["${split_on_obs.key}"].unique(): + ad_s = adata[adata.obs.${split_on_obs.key} == field_value] + ad_s.write(f"{res_dir}/${split_on_obs.key}_{s}.h5", compression='gzip') + if s > 0: + gc.collect() + s += 1 +#end if + + </configfile> </configfiles> <inputs> <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in hdf5 AnnData format"/> <expand macro="output_object_params_no_loom"/> + <conditional name="swap_layer_to_x"> + <param name="default" type="boolean" checked="false" label="Swap layer to X"/> + <when value="true"> + <param name="layer" type="text" value="" label="Name of layer to swap to X" help="This layer name needs to exist within ad.layers or this will fail."/> + <param name="new_name_x" type="text" value="old_X" label="Name of the new slot for X within layers" help="Leave empty and the old X will be lost."/> + </when> + <when value="false"/> + </conditional> <conditional name="add_cell_metadata"> <param name="default" type="boolean" checked="false" label="Merge additional cell metadata"/> <when value="true"> @@ -387,10 +420,27 @@ </when> </conditional> <param name="sanitize_varm" type="boolean" checked="false" label="Sanitise any null raw.varm objects if any" help="This might be relevant for interfacing with newer versions of AnnData, that might complain if .raw includes a varm null object."/> + <conditional name="split_on_obs"> + <param name="default" type="boolean" checked="false" label="Split on obs" help="Split the AnnData object into multiple AnnData objects based on the values of a given obs key. This is useful for example to split a dataset based on a cluster annotation."/> + <when value="true"> + <param name="key" type="text" label="Obs key to split on" help="The obs key to split on. For example, if you want to split on cluster annotation, you can use the key 'louvain'."/> + <param name="output_main" type="boolean" checked="true" label="Output main AnnData object" help="If checked, the main AnnData object will be outputted as well."/> + </when> + <when value="false"/> + </conditional> </inputs> <outputs> - <expand macro="output_data_obj_no_loom" description="metadata changes on"/> + <data name="output_h5ad" format="h5ad" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData"> + <filter>output_format == 'anndata_h5ad' and (('output_main' in split_on_obs and split_on_obs['output_main']) or (not split_on_obs['default']))</filter> + </data> + <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData (h5)"> + <filter>output_format == 'anndata' and (('output_main' in split_on_obs and split_on_obs['output_main']) or (not split_on_obs['default']))</filter> + </data> + <collection name="output_h5ad_split" type="list" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData split"> + <discover_datasets pattern="(?P<designation>.+)\.h5" directory="output_split" format="h5ad" visible="true"/> + <filter>split_on_obs['default']</filter> + </collection> </outputs> <tests> @@ -399,6 +449,19 @@ <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/> </test> <test> + <param name="input_obj_file" value="mnn.h5"/> + <conditional name="swap_layer_to_x"> + <param name="default" value="true"/> + <param name="layer" value="mnn"/> + <param name="new_name_x" value="X_old"/> + </conditional> + <output name="output_h5ad" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="layers/X_old" /> + </assert_contents> + </output> + </test> + <test> <param name="input_obj_file" value="anndata_ops.h5"/> <param name="from_var" value = "gene_symbols" /> <param name="to_var" value = "hello_all" /> @@ -484,6 +547,26 @@ </assert_contents> </output> </test> + <test> + <param name="input_obj_file" value="find_cluster.h5"/> + <conditional name="split_on_obs"> + <param name="default" value="true"/> + <param name="key" value="louvain"/> + <param name="output_main" value="true"/> + </conditional> + <output name="output_h5ad" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/louvain" /> + </assert_contents> + </output> + <output_collection name="output_h5ad_split" type="list" count="5"> + <element name="louvain_0" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/louvain" /> + </assert_contents> + </element> + </output_collection> + </test> </tests> <help><![CDATA[