Mercurial > repos > bimib > cobraxy
comparison COBRAxy/marea_cluster.py @ 309:38c9a958ea78 draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Thu, 22 May 2025 16:03:37 +0000 |
| parents | 4a677fc67aeb |
| children | 4599fb23f25b |
comparison
equal
deleted
inserted
replaced
| 308:797d0e002934 | 309:38c9a958ea78 |
|---|---|
| 57 default = 7, | 57 default = 7, |
| 58 help = 'choose maximum cluster number to be generated') | 58 help = 'choose maximum cluster number to be generated') |
| 59 | 59 |
| 60 parser.add_argument('-el', '--elbow', | 60 parser.add_argument('-el', '--elbow', |
| 61 type = str, | 61 type = str, |
| 62 default = 'false', | 62 default = 'False', |
| 63 choices = ['true', 'false'], | 63 choices = ['True', 'False'], |
| 64 help = 'choose if you want to generate an elbow plot for kmeans') | 64 help = 'choose if you want to generate an elbow plot for kmeans') |
| 65 | 65 |
| 66 parser.add_argument('-si', '--silhouette', | 66 parser.add_argument('-si', '--silhouette', |
| 67 type = str, | 67 type = str, |
| 68 default = 'false', | 68 default = 'False', |
| 69 choices = ['true', 'false'], | 69 choices = ['True', 'False'], |
| 70 help = 'choose if you want silhouette plots') | 70 help = 'choose if you want silhouette plots') |
| 71 | 71 |
| 72 parser.add_argument('-td', '--tool_dir', | 72 parser.add_argument('-td', '--tool_dir', |
| 73 type = str, | 73 type = str, |
| 74 required = True, | 74 required = True, |
| 75 help = 'your tool directory') | 75 help = 'your tool directory') |
| 76 | 76 |
| 77 parser.add_argument('-ms', '--min_samples', | 77 parser.add_argument('-ms', '--min_samples', |
| 78 type = float, | 78 type = int, |
| 79 help = 'min samples for dbscan (optional)') | 79 help = 'min samples for dbscan (optional)') |
| 80 | 80 |
| 81 parser.add_argument('-ep', '--eps', | 81 parser.add_argument('-ep', '--eps', |
| 82 type = float, | 82 type = float, |
| 83 help = 'eps for dbscan (optional)') | 83 help = 'eps for dbscan (optional)') |
| 104 s (str): The warning message to be logged and printed. | 104 s (str): The warning message to be logged and printed. |
| 105 | 105 |
| 106 Returns: | 106 Returns: |
| 107 None | 107 None |
| 108 """ | 108 """ |
| 109 args = process_args(sys.argv) | 109 |
| 110 with open(args.out_log, 'a') as log: | 110 with open(args.out_log, 'a') as log: |
| 111 log.write(s + "\n\n") | 111 log.write(s + "\n\n") |
| 112 print(s) | 112 print(s) |
| 113 | 113 |
| 114 ########################## read dataset ###################################### | 114 ########################## read dataset ###################################### |
| 211 | 211 |
| 212 Args: | 212 Args: |
| 213 k_min (int): The minimum number of clusters to consider. | 213 k_min (int): The minimum number of clusters to consider. |
| 214 k_max (int): The maximum number of clusters to consider. | 214 k_max (int): The maximum number of clusters to consider. |
| 215 dataset (pandas.DataFrame): The dataset to perform clustering on. | 215 dataset (pandas.DataFrame): The dataset to perform clustering on. |
| 216 elbow (str): Whether to generate an elbow plot for kmeans ('true' or 'false'). | 216 elbow (str): Whether to generate an elbow plot for kmeans ('True' or 'False'). |
| 217 silhouette (str): Whether to generate silhouette plots ('true' or 'false'). | 217 silhouette (str): Whether to generate silhouette plots ('True' or 'False'). |
| 218 best_cluster (str): The file path to save the output of the best cluster. | 218 best_cluster (str): The file path to save the output of the best cluster. |
| 219 | 219 |
| 220 Returns: | 220 Returns: |
| 221 None | 221 None |
| 222 """ | 222 """ |
| 223 if not os.path.exists(args.output_path): | 223 if not os.path.exists(args.output_path): |
| 224 os.makedirs(args.output_path) | 224 os.makedirs(args.output_path) |
| 225 | 225 |
| 226 | 226 |
| 227 if elbow == 'true': | 227 if elbow == 'True': |
| 228 elbow = True | 228 elbow = True |
| 229 else: | 229 else: |
| 230 elbow = False | 230 elbow = False |
| 231 | 231 |
| 232 if silhouette == 'true': | 232 if silhouette == 'True': |
| 233 silhouette = True | 233 silhouette = True |
| 234 else: | 234 else: |
| 235 silhouette = False | 235 silhouette = False |
| 236 | 236 |
| 237 range_n_clusters = [i for i in range(k_min, k_max+1)] | 237 range_n_clusters = [i for i in range(k_min, k_max+1)] |
| 441 Args: | 441 Args: |
| 442 dataset (pandas.DataFrame): The dataset to be clustered. | 442 dataset (pandas.DataFrame): The dataset to be clustered. |
| 443 k_min (int): The minimum number of clusters to consider. | 443 k_min (int): The minimum number of clusters to consider. |
| 444 k_max (int): The maximum number of clusters to consider. | 444 k_max (int): The maximum number of clusters to consider. |
| 445 best_cluster (str): The file path to save the output of the best cluster. | 445 best_cluster (str): The file path to save the output of the best cluster. |
| 446 silhouette (str): Whether to generate silhouette plots ('true' or 'false'). | 446 silhouette (str): Whether to generate silhouette plots ('True' or 'False'). |
| 447 | 447 |
| 448 Returns: | 448 Returns: |
| 449 None | 449 None |
| 450 """ | 450 """ |
| 451 if not os.path.exists(args.output_path): | 451 if not os.path.exists(args.output_path): |
| 475 | 475 |
| 476 for i in range(len(labels)): | 476 for i in range(len(labels)): |
| 477 prefix = '' | 477 prefix = '' |
| 478 if (i + k_min == best): | 478 if (i + k_min == best): |
| 479 prefix = '_BEST' | 479 prefix = '_BEST' |
| 480 if silhouette == 'true': | 480 if silhouette == 'True': |
| 481 silhouette_draw(dataset, labels[i], i + k_min, f'{args.output_path}/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png') | 481 silhouette_draw(dataset, labels[i], i + k_min, f'{args.output_path}/silhouette_with_' + str(i + k_min) + prefix + '_clusters.png') |
| 482 | 482 |
| 483 for i in range(len(labels)): | 483 for i in range(len(labels)): |
| 484 if (i + k_min == best): | 484 if (i + k_min == best): |
| 485 labels = labels[i] | 485 labels = labels[i] |
