Mercurial > repos > bimib > cobraxy
comparison COBRAxy/marea_cluster.py @ 428:8cd0c70b0084 draft
Uploaded
author | francesco_lapi |
---|---|
date | Wed, 10 Sep 2025 13:21:41 +0000 |
parents | 1032cb1028f1 |
children | 06564187fba3 |
comparison
equal
deleted
inserted
replaced
427:4a385fdb9e58 | 428:8cd0c70b0084 |
---|---|
45 type = str, | 45 type = str, |
46 choices = ['kmeans', 'dbscan', 'hierarchy'], | 46 choices = ['kmeans', 'dbscan', 'hierarchy'], |
47 default = 'kmeans', | 47 default = 'kmeans', |
48 help = 'choose clustering algorythm') | 48 help = 'choose clustering algorythm') |
49 | 49 |
50 parser.add_argument('-sc', '--scaling', | |
51 type = str, | |
52 choices = ['true', 'false'], | |
53 default = 'true', | |
54 help = 'choose if you want to scaling the data') | |
55 | |
50 parser.add_argument('-k1', '--k_min', | 56 parser.add_argument('-k1', '--k_min', |
51 type = int, | 57 type = int, |
52 default = 2, | 58 default = 2, |
53 help = 'choose minimun cluster number to be generated') | 59 help = 'choose minimun cluster number to be generated') |
54 | 60 |
512 | 518 |
513 for i in X.columns: | 519 for i in X.columns: |
514 if any(val is None or np.isnan(val) for val in X[i]): | 520 if any(val is None or np.isnan(val) for val in X[i]): |
515 X = X.drop(columns=[i]) | 521 X = X.drop(columns=[i]) |
516 | 522 |
523 if args.scaling == True: | |
524 list_to_remove = [] | |
525 toll_std=1e-8 | |
526 for i in X.columns: | |
527 mean_i = X[i].mean() | |
528 std_i = X[i].std() | |
529 if std_i >toll_std: | |
530 #scaling with mean 0 and std 1 | |
531 X[i] = (X[i]-mean_i)/std_i | |
532 else: | |
533 #remove feature because std = 0 during clustering | |
534 list_to_remove.append(i) | |
535 if len(list_to_remove)>0: | |
536 X = X.drop(columns=list_to_remove) | |
537 | |
517 if args.k_max != None: | 538 if args.k_max != None: |
518 numero_classi = X.shape[0] | 539 numero_classi = X.shape[0] |
519 while args.k_max >= numero_classi: | 540 while args.k_max >= numero_classi: |
520 err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset' | 541 err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset' |
521 warning(err) | 542 warning(err) |