comparison COBRAxy/marea_cluster.py @ 428:8cd0c70b0084 draft

Uploaded
author francesco_lapi
date Wed, 10 Sep 2025 13:21:41 +0000
parents 1032cb1028f1
children 06564187fba3
comparison
equal deleted inserted replaced
427:4a385fdb9e58 428:8cd0c70b0084
45 type = str, 45 type = str,
46 choices = ['kmeans', 'dbscan', 'hierarchy'], 46 choices = ['kmeans', 'dbscan', 'hierarchy'],
47 default = 'kmeans', 47 default = 'kmeans',
48 help = 'choose clustering algorythm') 48 help = 'choose clustering algorythm')
49 49
50 parser.add_argument('-sc', '--scaling',
51 type = str,
52 choices = ['true', 'false'],
53 default = 'true',
54 help = 'choose if you want to scaling the data')
55
50 parser.add_argument('-k1', '--k_min', 56 parser.add_argument('-k1', '--k_min',
51 type = int, 57 type = int,
52 default = 2, 58 default = 2,
53 help = 'choose minimun cluster number to be generated') 59 help = 'choose minimun cluster number to be generated')
54 60
512 518
513 for i in X.columns: 519 for i in X.columns:
514 if any(val is None or np.isnan(val) for val in X[i]): 520 if any(val is None or np.isnan(val) for val in X[i]):
515 X = X.drop(columns=[i]) 521 X = X.drop(columns=[i])
516 522
523 if args.scaling == True:
524 list_to_remove = []
525 toll_std=1e-8
526 for i in X.columns:
527 mean_i = X[i].mean()
528 std_i = X[i].std()
529 if std_i >toll_std:
530 #scaling with mean 0 and std 1
531 X[i] = (X[i]-mean_i)/std_i
532 else:
533 #remove feature because std = 0 during clustering
534 list_to_remove.append(i)
535 if len(list_to_remove)>0:
536 X = X.drop(columns=list_to_remove)
537
517 if args.k_max != None: 538 if args.k_max != None:
518 numero_classi = X.shape[0] 539 numero_classi = X.shape[0]
519 while args.k_max >= numero_classi: 540 while args.k_max >= numero_classi:
520 err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset' 541 err = 'Skipping k = ' + str(args.k_max) + ' since it is >= number of classes of dataset'
521 warning(err) 542 warning(err)