comparison COBRAxy/ras_generator.py @ 490:c6ea189ea7e9 draft

Uploaded
author francesco_lapi
date Mon, 29 Sep 2025 15:13:21 +0000
parents 97eea560a10f
children 96f512dff490
comparison
equal deleted inserted replaced
489:97eea560a10f 490:c6ea189ea7e9
104 for key, value in reactions.items(): 104 for key, value in reactions.items():
105 ids.append(key) 105 ids.append(key)
106 rules.append(value) 106 rules.append(value)
107 return (ids, rules) 107 return (ids, rules)
108 108
109 ############################ check_methods ####################################
110 def gene_type(l :str, name :str) -> str:
111 """
112 Determine the type of gene ID.
113
114 Args:
115 l (str): The gene identifier to check.
116 name (str): The name of the dataset, used in error messages.
117
118 Returns:
119 str: The type of gene ID ('hugo_id', 'ensembl_gene_id', 'symbol', or 'entrez_id').
120
121 Raises:
122 sys.exit: If the gene ID type is not supported, the execution is aborted.
123 """
124 if check_hgnc(l):
125 return 'hugo_id'
126 elif check_ensembl(l):
127 return 'ensembl_gene_id'
128 elif check_symbol(l):
129 return 'symbol'
130 elif check_entrez(l):
131 return 'entrez_id'
132 else:
133 sys.exit('Execution aborted:\n' +
134 'gene ID type in ' + name + ' not supported. Supported ID'+
135 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n')
136
137 def check_hgnc(l :str) -> bool:
138 """
139 Check if a gene identifier follows the HGNC format.
140
141 Args:
142 l (str): The gene identifier to check.
143
144 Returns:
145 bool: True if the gene identifier follows the HGNC format, False otherwise.
146 """
147 if len(l) > 5:
148 if (l.upper()).startswith('HGNC:'):
149 return l[5:].isdigit()
150 else:
151 return False
152 else:
153 return False
154
155 def check_ensembl(l :str) -> bool:
156 """
157 Check if a gene identifier follows the Ensembl format.
158
159 Args:
160 l (str): The gene identifier to check.
161
162 Returns:
163 bool: True if the gene identifier follows the Ensembl format, False otherwise.
164 """
165 return l.upper().startswith('ENS')
166
167
168 def check_symbol(l :str) -> bool:
169 """
170 Check if a gene identifier follows the symbol format.
171
172 Args:
173 l (str): The gene identifier to check.
174
175 Returns:
176 bool: True if the gene identifier follows the symbol format, False otherwise.
177 """
178 if len(l) > 0:
179 if l[0].isalpha() and l[1:].isalnum():
180 return True
181 else:
182 return False
183 else:
184 return False
185
186 def check_entrez(l :str) -> bool:
187 """
188 Check if a gene identifier follows the Entrez ID format.
189
190 Args:
191 l (str): The gene identifier to check.
192
193 Returns:
194 bool: True if the gene identifier follows the Entrez ID format, False otherwise.
195 """
196 if len(l) > 0:
197 return l.isdigit()
198 else:
199 return False
200 109
201 ############################ gene ############################################# 110 ############################ gene #############################################
202 def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]: 111 def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]:
203 """ 112 """
204 Process gene data to ensure correct formatting and handle duplicates. 113 Process gene data to ensure correct formatting and handle duplicates.