comparison decoupler_pseudobulk.py @ 11:db14ac3f6b43 draft default tip

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
author ebi-gxa
date Wed, 19 Feb 2025 16:55:51 +0000
parents 2c5686d627c0
children
comparison
equal deleted inserted replaced
10:97c2c52a7ab4 11:db14ac3f6b43
1 import argparse 1 import argparse
2 2
3 import anndata 3 import anndata
4 import decoupler 4 import decoupler
5 import numpy as np
5 import pandas as pd 6 import pandas as pd
6 7
7 8
8 def get_pseudobulk( 9 def get_pseudobulk(
9 adata, 10 adata,
30 mode=mode, 31 mode=mode,
31 use_raw=use_raw, 32 use_raw=use_raw,
32 min_cells=min_cells, 33 min_cells=min_cells,
33 min_counts=min_counts, 34 min_counts=min_counts,
34 ) 35 )
36
37
38 def create_pseudo_replicates(adata, sample_key, num_replicates, seed=None):
39 """
40 Create pseudo replicates for each sample in the sample_key groups.
41
42 Parameters
43 ----------
44 adata : anndata.AnnData
45 The AnnData object.
46 sample_key : str
47 The column in adata.obs that defines the samples.
48 num_replicates : int
49 Number of pseudo replicates to create per sample.
50
51 Returns
52 -------
53 anndata.AnnData
54 The AnnData object with pseudo replicates.
55
56 Examples
57 --------
58 >>> import anndata
59 >>> import pandas as pd
60 >>> import numpy as np
61 >>> data = {
62 ... 'obs': pd.DataFrame({'sample': ['A', 'A', 'B', 'B']}),
63 ... 'X': np.array([[1, 0], [0, 1], [1, 1], [0, 0]])
64 ... }
65 >>> adata = anndata.AnnData(X=data['X'], obs=data['obs'])
66 >>> adata = create_pseudo_replicates(adata, 'sample', 2)
67 >>> adata.obs['sample_pseudo'].tolist()
68 ['A_rep1', 'A_rep2', 'B_rep1', 'B_rep2']
69 """
70 if seed is not None:
71 np.random.seed(seed)
72
73 new_sample_key = f"{sample_key}_pseudo"
74 adata.obs[new_sample_key] = adata.obs[sample_key].astype(str)
75
76 for sample in adata.obs[sample_key].unique():
77 sample_indices = adata.obs[
78 adata.obs[sample_key] == sample].index.to_numpy()
79 np.random.shuffle(sample_indices) # Shuffle the indices to randomize
80 replicate_size = int(len(sample_indices) / num_replicates)
81 for i in range(num_replicates):
82 start_idx = i * replicate_size
83 end_idx = start_idx + replicate_size
84 replicate_indices = sample_indices[start_idx:end_idx]
85 adata.obs.loc[replicate_indices, new_sample_key] = (
86 adata.obs.loc[replicate_indices, new_sample_key] + f"_rep{i+1}"
87 )
88
89 return adata
35 90
36 91
37 def prepend_c_to_index(index_value): 92 def prepend_c_to_index(index_value):
38 if index_value and index_value[0].isdigit(): 93 if index_value and index_value[0].isdigit():
39 return "C" + index_value 94 return "C" + index_value
304 359
305 factor_fields = None 360 factor_fields = None
306 if args.factor_fields: 361 if args.factor_fields:
307 factor_fields = args.factor_fields.split(",") 362 factor_fields = args.factor_fields.split(",")
308 check_fields(factor_fields, adata) 363 check_fields(factor_fields, adata)
364
365 # Create pseudo replicates if specified
366 if args.num_pseudo_replicates:
367 adata = create_pseudo_replicates(
368 adata, args.sample_key, args.num_pseudo_replicates, seed=args.seed
369 )
370 args.sample_key = f"{args.sample_key}_pseudo"
309 371
310 print(f"Using mode: {args.mode}") 372 print(f"Using mode: {args.mode}")
311 # Perform pseudobulk analysis 373 # Perform pseudobulk analysis
312 pseudobulk_data = get_pseudobulk( 374 pseudobulk_data = get_pseudobulk(
313 adata, 375 adata,
662 "--min_total_counts", 724 "--min_total_counts",
663 type=int, 725 type=int,
664 help="Minimum total count threshold for filtering by expression", 726 help="Minimum total count threshold for filtering by expression",
665 ) 727 )
666 parser.add_argument( 728 parser.add_argument(
729 "--num_pseudo_replicates",
730 type=int,
731 choices=range(3, 1000),
732 help="Number of pseudo replicates to create per sample (at least 3)",
733 required=False
734 )
735 parser.add_argument(
736 "--seed",
737 type=int,
738 default=None,
739 help="Random seed for pseudo replicate sampling",
740 )
741 parser.add_argument(
667 "--anndata_output_path", 742 "--anndata_output_path",
668 type=str, 743 type=str,
669 help="Path to save the filtered AnnData object or pseudobulk data", 744 help="Path to save the filtered AnnData object or pseudobulk data",
670 ) 745 )
671 parser.add_argument( 746 parser.add_argument(