comparison anndata_operations.xml @ 27:7ebc22f77d86 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 2db372e91d658f2c139ff282ffb493ea56f581f8-dirty
author ebi-gxa
date Fri, 14 Apr 2023 13:12:01 +0000
parents 825dfd66e3fb
children a0274bc43b7e
comparison
equal deleted inserted replaced
26:825dfd66e3fb 27:7ebc22f77d86
1 <?xml version="1.0" encoding="utf-8"?> 1 <?xml version="1.0" encoding="utf-8"?>
2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy9" profile="@PROFILE@"> 2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy91" profile="@PROFILE@">
3 <description>modifies metadata and flags genes</description> 3 <description>modifies metadata and flags genes</description>
4 <macros> 4 <macros>
5 <import>scanpy_macros2.xml</import> 5 <import>scanpy_macros2.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
38 38
39 #if $copy_u.default: 39 #if $copy_u.default:
40 #for $i, $us in enumerate($copy_u.uns_sources): 40 #for $i, $us in enumerate($copy_u.uns_sources):
41 ln -s '${us}' uns_source_${i}.h5 && 41 ln -s '${us}' uns_source_${i}.h5 &&
42 #end for 42 #end for
43 #end if
44
45 #if $add_cell_metadata.default:
46 ln -s ${add_cell_metadata.file} cell_metadata.tsv &&
43 #end if 47 #end if
44 python $operations 48 python $operations
45 ]]></command> 49 ]]></command>
46 <configfiles> 50 <configfiles>
47 <configfile name="operations"> 51 <configfile name="operations">
55 if new_field is None: 59 if new_field is None:
56 new_field = f"{field}_u" 60 new_field = f"{field}_u"
57 appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '') 61 appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '')
58 df[new_field] = df[field].astype(str) + appendents.astype(str) 62 df[new_field] = df[field].astype(str) + appendents.astype(str)
59 return df 63 return df
60 64
61 adata = sc.read('input.h5') 65 adata = sc.read('input.h5')
66
67 #if $add_cell_metadata.default:
68 import pandas as pd
69
70 def add_cell_metadata(ad, metadata_file="cell_metadata.tsv", drop_duplicates=True):
71 metadata_df = pd.read_csv(metadata_file, sep="\t", index_col=0)
72 # we avoid renames in the original object or outright drop the column in the metadata
73 for col in ad.obs.columns:
74 if col in metadata_df.columns:
75 print(f"Renaming {col} to {col}_x")
76 if drop_duplicates:
77 metadata_df = metadata_df.drop(col, axis=1)
78 else:
79 metadata_df.rename(columns={col: col + "_x"}, inplace=True)
80 # merge metadata into ad.obs column by column, changing columns to category dtype if they become object dtype on merge
81 merged_obs = ad.obs.merge(
82 metadata_df, left_index=True, right_index=True, how="left"
83 )
84 for o_col in metadata_df.columns:
85 col = o_col
86 # lets consider cases where columns where renamed during merge
87 if o_col + "_x" in merged_obs.columns:
88 col = o_col + "_x"
89 if o_col + "_y" in merged_obs.columns:
90 col = o_col + "_y"
91 if col in merged_obs.columns:
92 if merged_obs[col].dtype == object:
93 prev_dtype = metadata_df[o_col].dtype
94 if prev_dtype == str or prev_dtype == object:
95 prev_dtype = "category"
96 print(f"Changing {col} from {merged_obs[col].dtype} to {prev_dtype}")
97 merged_obs[col] = merged_obs[col].astype(prev_dtype)
98 return merged_obs
99
100 adata.obs = add_cell_metadata(adata)
101 #end if
62 102
63 #if $copy_adata_to_raw: 103 #if $copy_adata_to_raw:
64 adata.raw = adata 104 adata.raw = adata
65 #end if 105 #end if
66 106
251 </configfiles> 291 </configfiles>
252 292
253 <inputs> 293 <inputs>
254 <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in hdf5 AnnData format"/> 294 <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in hdf5 AnnData format"/>
255 <expand macro="output_object_params_no_loom"/> 295 <expand macro="output_object_params_no_loom"/>
296 <conditional name="add_cell_metadata">
297 <param name="default" type="boolean" checked="false" label="Merge additional cell metadata"/>
298 <when value="true">
299 <param name="file" type="data" label="Cell metadata with headers" help="A tabular file with headers, where the first column contains cell barcodes. Will be merged via a left join, so not all cells in the obs need to be in the metadata. Currently duplicated column headers will be ignored and the originals in the AnnData will be kept." format="tsv,tabular"/>
300 </when>
301 <when value="false"/>
302 </conditional>
256 <param name="copy_adata_to_raw" type="boolean" label="Copy AnnData to .raw" help="If activated, it will do 'adata.raw = adata'" checked="false"/> 303 <param name="copy_adata_to_raw" type="boolean" label="Copy AnnData to .raw" help="If activated, it will do 'adata.raw = adata'" checked="false"/>
257 <repeat name="modifications" title="Change field names in AnnData observations" min="0"> 304 <repeat name="modifications" title="Change field names in AnnData observations" min="0">
258 <param name="from_obs" type="text" label="Original name" help="Name in observations that you want to change"> 305 <param name="from_obs" type="text" label="Original name" help="Name in observations that you want to change">
259 <sanitizer> 306 <sanitizer>
260 <valid initial="string.printable"/> 307 <valid initial="string.printable"/>
361 </assert_contents> 408 </assert_contents>
362 </output> 409 </output>
363 </test> 410 </test>
364 <test> 411 <test>
365 <param name="input_obj_file" value="anndata_ops.h5"/> 412 <param name="input_obj_file" value="anndata_ops.h5"/>
413 <conditional name="add_cell_metadata">
414 <param name="default" value="true"/>
415 <param name="file" value="test_incomplete_metadata.tsv"/>
416 </conditional>
417 <output name="output_h5ad" ftype="h5ad">
418 <assert_contents>
419 <has_h5_keys keys="obs/cell_type"/>
420 </assert_contents>
421 </output>
422 </test>
423 <test>
424 <param name="input_obj_file" value="anndata_ops.h5"/>
366 <repeat name="var_modifications" > 425 <repeat name="var_modifications" >
367 <param name="from_var" value = "gene_symbols" /> 426 <param name="from_var" value = "gene_symbols" />
368 <param name="to_var" value = "gene_symbols_unique" /> 427 <param name="to_var" value = "gene_symbols_unique" />
369 <param name="make_unique" value = "True" /> 428 <param name="make_unique" value = "True" />
370 </repeat> 429 </repeat>