annotate COBRAxy/rps_generator.py @ 293:7b8d9de81a86 draft

Uploaded
author francesco_lapi
date Thu, 15 May 2025 18:23:52 +0000
parents 5dd2ab4637aa
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
1 import math
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
2 import argparse
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
3
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
4 import numpy as np
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
5 import pickle as pk
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
6 import pandas as pd
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
7
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
8 from typing import Optional, List, Dict
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
9
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
10 import utils.general_utils as utils
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
11 import utils.reaction_parsing as reactionUtils
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
12
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
13 ########################## argparse ##########################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
14 ARGS :argparse.Namespace
147
3fca9b568faf Uploaded
bimib
parents: 4
diff changeset
15 def process_args(args:List[str] = None) -> argparse.Namespace:
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
16 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
17 Processes command-line arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
18
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
19 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
20 args (list): List of command-line arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
21
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
22 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
23 Namespace: An object containing parsed arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
24 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
25 parser = argparse.ArgumentParser(usage = '%(prog)s [options]',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
26 description = 'process some value\'s'+
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
27 ' abundances and reactions to create RPS scores.')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
28 parser.add_argument('-rc', '--reaction_choice',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
29 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
30 default = 'default',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
31 choices = ['default','custom'],
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
32 help = 'chose which type of reaction dataset you want use')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
33 parser.add_argument('-cm', '--custom',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
34 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
35 help='your dataset if you want custom reactions')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
36 parser.add_argument('-td', '--tool_dir',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
37 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
38 required = True,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
39 help = 'your tool directory')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
40 parser.add_argument('-ol', '--out_log',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
41 help = "Output log")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
42 parser.add_argument('-id', '--input',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
43 type = str,
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
44 required = True,
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
45 help = 'input dataset')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
46 parser.add_argument('-rp', '--rps_output',
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
47 type = str,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
48 required = True,
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
49 help = 'rps output')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
50
147
3fca9b568faf Uploaded
bimib
parents: 4
diff changeset
51 args = parser.parse_args(args)
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
52 return args
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
53
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
54 ############################ dataset name #####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
55 def name_dataset(name_data :str, count :int) -> str:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
56 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
57 Produces a unique name for a dataset based on what was provided by the user. The default name for any dataset is "Dataset", thus if the user didn't change it this function appends f"_{count}" to make it unique.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
58
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
59 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
60 name_data : name associated with the dataset (from frontend input params)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
61 count : counter from 1 to make these names unique (external)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
62
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
63 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
64 str : the name made unique
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
65 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
66 if str(name_data) == 'Dataset':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
67 return str(name_data) + '_' + str(count)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
68 else:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
69 return str(name_data)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
70
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
71 ############################ get_abund_data ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
72 def get_abund_data(dataset: pd.DataFrame, cell_line_index:int) -> Optional[pd.Series]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
73 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
74 Extracts abundance data and turns it into a series for a specific cell line from the dataset, which rows are
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
75 metabolites and columns are cell lines.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
76
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
77 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
78 dataset (pandas.DataFrame): The DataFrame containing abundance data for all cell lines and metabolites.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
79 cell_line_index (int): The index of the cell line of interest in the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
80
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
81 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
82 pd.Series or None: A series containing abundance values for the specified cell line.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
83 The name of the series is the name of the cell line.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
84 Returns None if the cell index is invalid.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
85 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
86 if cell_line_index < 0 or cell_line_index >= len(dataset.index):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
87 print(f"Errore: This cell line index: '{cell_line_index}' is not valid.")
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
88 return None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
89
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
90 cell_line_name = dataset.columns[cell_line_index]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
91 abundances_series = dataset[cell_line_name][1:]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
92
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
93 return abundances_series
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
94
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
95 ############################ clean_metabolite_name ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
96 def clean_metabolite_name(name :str) -> str:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
97 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
98 Removes some characters from a metabolite's name, provided as input, and makes it lowercase in order to simplify
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
99 the search of a match in the dictionary of synonyms.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
100
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
101 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
102 name : the metabolite's name, as given in the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
103
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
104 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
105 str : a new string with the cleaned name.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
106 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
107 return "".join(ch for ch in name if ch not in ",;-_'([{ }])").lower()
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
108
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
109 ############################ get_metabolite_id ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
110 def get_metabolite_id(name :str, syn_dict :Dict[str, List[str]]) -> str:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
111 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
112 Looks through a dictionary of synonyms to find a match for a given metabolite's name.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
113
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
114 Args:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
115 name : the metabolite's name, as given in the dataset.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
116 syn_dict : the dictionary of synonyms, using unique identifiers as keys and lists of clean synonyms as values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
117
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
118 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
119 str : the internal :str unique identifier of that metabolite, used in all other parts of the model in use.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
120 An empty string is returned if a match isn't found.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
121 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
122 name = clean_metabolite_name(name)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
123 for id, synonyms in syn_dict.items():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
124 if name in synonyms: return id
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
125
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
126 return ""
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
127
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
128 ############################ check_missing_metab ####################################
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
129 def check_missing_metab(reactions: Dict[str, Dict[str, int]], dataset_by_rows: Dict[str, List[float]], cell_lines_amt :int) -> List[str]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
130 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
131 Check for missing metabolites in the abundances dictionary compared to the reactions dictionary and update abundances accordingly.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
132
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
133 Parameters:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
134 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
135 dataset_by_rows (dict): A dictionary representing abundances where keys are metabolite names and values are their corresponding abundances for all cell lines.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
136 cell_lines_amt : amount of cell lines, needed to add a new list of abundances for missing metabolites.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
137
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
138 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
139 list[str] : list of metabolite names that were missing in the original abundances dictionary and thus their aboundances were set to 1.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
140
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
141 Side effects:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
142 dataset_by_rows : mut
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
143 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
144 missing_list = []
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
145 for reaction in reactions.values():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
146 for metabolite in reaction.keys():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
147 if metabolite not in dataset_by_rows:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
148 dataset_by_rows[metabolite] = [1] * cell_lines_amt
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
149 missing_list.append(metabolite)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
150
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
151 return missing_list
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
152
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
153 ############################ calculate_rps ####################################
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
154 def calculate_rps(reactions: Dict[str, Dict[str, int]], abundances: Dict[str, float], black_list: List[str], missing_list: List[str], substrateFreqTable: Dict[str, int]) -> Dict[str, float]:
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
155 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
156 Calculate the Reaction Propensity scores (RPS) based on the availability of reaction substrates, for (ideally) each input model reaction and for each sample.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
157 The score is computed as the product of the concentrations of the reacting substances, with each concentration raised to a power equal to its stoichiometric coefficient
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
158 for each reaction using the provided coefficient and abundance values. The value is then normalized, based on how frequent the metabolite is in the selected model's reactions,
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
159 and log-transformed.
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
160
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
161 Parameters:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
162 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
163 abundances (dict): A dictionary representing metabolite abundances where keys are metabolite names and values are their corresponding abundances.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
164 black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
165 missing_list (list): A list containing metabolite names that were missing in the original abundances dictionary and thus their values were set to 1.
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
166 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
167
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
168 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
169 dict: A dictionary containing Reaction Propensity Scores (RPS) where keys are reaction names and values are the corresponding RPS scores.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
170 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
171 rps_scores = {}
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
172
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
173 for reaction_name, substrates in reactions.items():
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
174 total_contribution = 1
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
175 metab_significant = False
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
176 for metabolite, stoichiometry in substrates.items():
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
177 abundance = 1 if math.isnan(abundances[metabolite]) else abundances[metabolite]
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
178 if metabolite not in black_list and metabolite not in missing_list:
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
179 metab_significant = True
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
180
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
181 total_contribution += math.log((abundance + np.finfo(float).eps) / substrateFreqTable[metabolite]) * stoichiometry
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
182
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
183 rps_scores[reaction_name] = total_contribution if metab_significant else math.nan
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
184
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
185 return rps_scores
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
186
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
187 ############################ rps_for_cell_lines ####################################
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
188 def rps_for_cell_lines(dataset: List[List[str]], reactions: Dict[str, Dict[str, int]], black_list: List[str], syn_dict: Dict[str, List[str]], substrateFreqTable: Dict[str, int]) -> None:
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
189 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
190 Calculate Reaction Propensity Scores (RPS) for each cell line represented in the dataframe and creates an output file.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
191
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
192 Parameters:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
193 dataset : the dataset's data, by rows
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
194 reactions (dict): A dictionary representing reactions where keys are reaction names and values are dictionaries containing metabolite names as keys and stoichiometric coefficients as values.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
195 black_list (list): A list containing metabolite names that should be excluded from the RPS calculation.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
196 syn_dict (dict): A dictionary where keys are general metabolite names and values are lists of possible synonyms.
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
197 substrateFreqTable (dict): A dictionary where each metabolite name (key) is associated with how many times it shows up in the model's reactions (value).
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
198
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
199 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
200 None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
201 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
202 cell_lines = dataset[0][1:]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
203 abundances_dict = {}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
204
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
205 translationIsApplied = ARGS.reaction_choice == "default"
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
206 for row in dataset[1:]:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
207 id = get_metabolite_id(row[0], syn_dict) if translationIsApplied else row[0]
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
208 if id: abundances_dict[id] = list(map(utils.Float(), row[1:]))
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
209
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
210 missing_list = check_missing_metab(reactions, abundances_dict, len((cell_lines)))
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
211
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
212 rps_scores :Dict[Dict[str, float]] = {}
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
213 for pos, cell_line_name in enumerate(cell_lines):
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
214 abundances = { metab : abundances[pos] for metab, abundances in abundances_dict.items() }
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
215 rps_scores[cell_line_name] = calculate_rps(reactions, abundances, black_list, missing_list, substrateFreqTable)
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
216
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
217 df = pd.DataFrame.from_dict(rps_scores)
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
218
281
5dd2ab4637aa Uploaded
francesco_lapi
parents: 280
diff changeset
219 df.index.name = 'Reactions'
5dd2ab4637aa Uploaded
francesco_lapi
parents: 280
diff changeset
220 df.to_csv(ARGS.rps_output, sep='\t', na_rep='None', index=True)
5dd2ab4637aa Uploaded
francesco_lapi
parents: 280
diff changeset
221
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
222 ############################ main ####################################
147
3fca9b568faf Uploaded
bimib
parents: 4
diff changeset
223 def main(args:List[str] = None) -> None:
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
224 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
225 Initializes everything and sets the program in motion based on the fronted input arguments.
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
226
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
227 Returns:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
228 None
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
229 """
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
230 global ARGS
147
3fca9b568faf Uploaded
bimib
parents: 4
diff changeset
231 ARGS = process_args(args)
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
232
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
233 # TODO:use utils functions vvv
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
234 with open(ARGS.tool_dir + '/local/pickle files/black_list.pickle', 'rb') as bl:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
235 black_list = pk.load(bl)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
236
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
237 with open(ARGS.tool_dir + '/local/pickle files/synonyms.pickle', 'rb') as sd:
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
238 syn_dict = pk.load(sd)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
239
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
240 dataset = utils.readCsv(utils.FilePath.fromStrPath(ARGS.input), '\t', skipHeader = False)
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
241
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
242 if ARGS.reaction_choice == 'default':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
243 reactions = pk.load(open(ARGS.tool_dir + '/local/pickle files/reactions.pickle', 'rb'))
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
244 substrateFreqTable = pk.load(open(ARGS.tool_dir + '/local/pickle files/substrate_frequencies.pickle', 'rb'))
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
245
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
246 elif ARGS.reaction_choice == 'custom':
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
247 reactions = reactionUtils.parse_custom_reactions(ARGS.custom)
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
248 substrateFreqTable = {}
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
249 for _, substrates in reactions.items():
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
250 for substrateName, _ in substrates.items():
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
251 if substrateName not in substrateFreqTable: substrateFreqTable[substrateName] = 0
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
252 substrateFreqTable[substrateName] += 1
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
253
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
254 rps_for_cell_lines(dataset, reactions, black_list, syn_dict, substrateFreqTable)
4
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
255 print('Execution succeded')
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
256
41f35c2f0c7b Uploaded
luca_milaz
parents:
diff changeset
257 ##############################################################################
293
7b8d9de81a86 Uploaded
francesco_lapi
parents: 281
diff changeset
258 if __name__ == "__main__": main()