comparison ipapy2_gibbs_sampler.py @ 0:7f84a8a5edde draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ipapy2 commit 64b61ff2823b4f54868c0ab7a4c0dc49eaf2979a
author recetox
date Fri, 16 May 2025 08:00:41 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7f84a8a5edde
1 from ipaPy2 import ipa
2 from utils import flattern_annotations, GibbsArgumentParser, group_by_peak_id
3
4
5 def main(
6 input_dataset_mapped_isotope_patterns,
7 input_dataset_annotations,
8 integrating_mode,
9 input_dataset_bio,
10 noits,
11 burn,
12 delta_bio,
13 delta_add,
14 all_out,
15 zs,
16 zs_out,
17 output_dataset,
18 ):
19 annotations_df = input_dataset_annotations
20 annotations_df["post"] = annotations_df["post"].replace("", 0)
21 annotations_df = annotations_df.replace("", None)
22 annotations = group_by_peak_id(annotations_df)
23
24 if not zs:
25 zs = None
26
27 if integrating_mode == "adducts":
28 zs = ipa.Gibbs_sampler_add(
29 input_dataset_mapped_isotope_patterns,
30 annotations,
31 noits=noits,
32 burn=burn,
33 delta_add=delta_add,
34 all_out=all_out,
35 zs=zs,
36 )
37 else:
38 if args.integrating_mode == "biochemical":
39 zs = ipa.Gibbs_sampler_bio(
40 input_dataset_mapped_isotope_patterns,
41 annotations,
42 Bio=input_dataset_bio,
43 noits=noits,
44 burn=burn,
45 delta_bio=delta_bio,
46 all_out=all_out,
47 zs=zs,
48 )
49 else:
50 zs = ipa.Gibbs_sampler_bio_add(
51 input_dataset_mapped_isotope_patterns,
52 annotations,
53 Bio=input_dataset_bio,
54 noits=noits,
55 burn=burn,
56 delta_bio=delta_bio,
57 delta_add=delta_add,
58 all_out=all_out,
59 zs=zs,
60 )
61
62 annotations_flat = flattern_annotations(annotations)
63 write_func, file_path = output_dataset
64 write_func(annotations_flat, file_path)
65
66 if args.all_out:
67 write_func, file_path = zs_out
68 write_func(zs, file_path)
69
70
71 if __name__ == "__main__":
72 parser = GibbsArgumentParser(
73 description="""Gibbs sampler considering both biochemical and adducts connections. The
74 function computes the posterior probabilities of the annotations
75 considering the possible biochemical connections reported in Bio and the
76 possible adducts connection.""",
77 )
78 parser.add_argument(
79 "--input_dataset_mapped_isotope_patterns",
80 nargs=2,
81 action="load_data",
82 required=True,
83 help="a dataframe containing the measured intensities across several samples.",
84 )
85 parser.add_argument(
86 "--input_dataset_annotations",
87 nargs=2,
88 action="load_data",
89 required=True,
90 help="a datset containing the annotations of the features.",
91 )
92 parser.add_argument(
93 "--integrating_mode",
94 type=str,
95 required=True,
96 choices=["adducts", "biochemical", "biochemical_adducts"],
97 help=(
98 "The mode of integration. Options are 'adducts', 'biochemical', or"
99 " 'biochemical_adducts'."
100 ),
101 )
102 parser.add_argument(
103 "--input_dataset_bio",
104 nargs=2,
105 action="load_data",
106 type=str,
107 help="""dataframe (2 columns), reporting all the possible connections between
108 compounds. It uses the unique ids from the database. It could be the
109 output of Compute_Bio() or Compute_Bio_Parallel()""",
110 )
111 parser.add_argument(
112 "--delta_bio",
113 type=float,
114 help="""parameter used when computing the conditional priors. The
115 parameter must be positive. The smaller the parameter the more
116 weight the adducts connections have on the posterior
117 probabilities. Default 1.""",
118 )
119 args = parser.parse_args()
120 main(
121 args.input_dataset_mapped_isotope_patterns,
122 args.input_dataset_annotations,
123 args.integrating_mode,
124 args.input_dataset_bio,
125 args.noits,
126 args.burn,
127 args.delta_bio,
128 args.delta_add,
129 args.all_out,
130 args.zs,
131 args.zs_out,
132 args.output_dataset,
133 )