Mercurial > repos > bimib > cobraxy
comparison COBRAxy/ras_generator.py @ 490:c6ea189ea7e9 draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 29 Sep 2025 15:13:21 +0000 |
parents | 97eea560a10f |
children | 96f512dff490 |
comparison
equal
deleted
inserted
replaced
489:97eea560a10f | 490:c6ea189ea7e9 |
---|---|
104 for key, value in reactions.items(): | 104 for key, value in reactions.items(): |
105 ids.append(key) | 105 ids.append(key) |
106 rules.append(value) | 106 rules.append(value) |
107 return (ids, rules) | 107 return (ids, rules) |
108 | 108 |
109 ############################ check_methods #################################### | |
110 def gene_type(l :str, name :str) -> str: | |
111 """ | |
112 Determine the type of gene ID. | |
113 | |
114 Args: | |
115 l (str): The gene identifier to check. | |
116 name (str): The name of the dataset, used in error messages. | |
117 | |
118 Returns: | |
119 str: The type of gene ID ('hugo_id', 'ensembl_gene_id', 'symbol', or 'entrez_id'). | |
120 | |
121 Raises: | |
122 sys.exit: If the gene ID type is not supported, the execution is aborted. | |
123 """ | |
124 if check_hgnc(l): | |
125 return 'hugo_id' | |
126 elif check_ensembl(l): | |
127 return 'ensembl_gene_id' | |
128 elif check_symbol(l): | |
129 return 'symbol' | |
130 elif check_entrez(l): | |
131 return 'entrez_id' | |
132 else: | |
133 sys.exit('Execution aborted:\n' + | |
134 'gene ID type in ' + name + ' not supported. Supported ID'+ | |
135 'types are: HUGO ID, Ensemble ID, HUGO symbol, Entrez ID\n') | |
136 | |
137 def check_hgnc(l :str) -> bool: | |
138 """ | |
139 Check if a gene identifier follows the HGNC format. | |
140 | |
141 Args: | |
142 l (str): The gene identifier to check. | |
143 | |
144 Returns: | |
145 bool: True if the gene identifier follows the HGNC format, False otherwise. | |
146 """ | |
147 if len(l) > 5: | |
148 if (l.upper()).startswith('HGNC:'): | |
149 return l[5:].isdigit() | |
150 else: | |
151 return False | |
152 else: | |
153 return False | |
154 | |
155 def check_ensembl(l :str) -> bool: | |
156 """ | |
157 Check if a gene identifier follows the Ensembl format. | |
158 | |
159 Args: | |
160 l (str): The gene identifier to check. | |
161 | |
162 Returns: | |
163 bool: True if the gene identifier follows the Ensembl format, False otherwise. | |
164 """ | |
165 return l.upper().startswith('ENS') | |
166 | |
167 | |
168 def check_symbol(l :str) -> bool: | |
169 """ | |
170 Check if a gene identifier follows the symbol format. | |
171 | |
172 Args: | |
173 l (str): The gene identifier to check. | |
174 | |
175 Returns: | |
176 bool: True if the gene identifier follows the symbol format, False otherwise. | |
177 """ | |
178 if len(l) > 0: | |
179 if l[0].isalpha() and l[1:].isalnum(): | |
180 return True | |
181 else: | |
182 return False | |
183 else: | |
184 return False | |
185 | |
186 def check_entrez(l :str) -> bool: | |
187 """ | |
188 Check if a gene identifier follows the Entrez ID format. | |
189 | |
190 Args: | |
191 l (str): The gene identifier to check. | |
192 | |
193 Returns: | |
194 bool: True if the gene identifier follows the Entrez ID format, False otherwise. | |
195 """ | |
196 if len(l) > 0: | |
197 return l.isdigit() | |
198 else: | |
199 return False | |
200 | 109 |
201 ############################ gene ############################################# | 110 ############################ gene ############################################# |
202 def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]: | 111 def data_gene(gene: pd.DataFrame, type_gene: str, name: str, gene_custom: Optional[Dict[str, str]]) -> Dict[str, str]: |
203 """ | 112 """ |
204 Process gene data to ensure correct formatting and handle duplicates. | 113 Process gene data to ensure correct formatting and handle duplicates. |