annotate tools/myTools/bin/class_and_consensus.py @ 1:7e5c71b2e71f draft default tip

Uploaded
author laurenmarazzi
date Wed, 22 Dec 2021 16:00:34 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
1 #!/usr/bin/env python3
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
2 import pandas as pd
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
3 from sklearn.naive_bayes import GaussianNB
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
4 from sklearn.ensemble import RandomForestClassifier
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
5 from sklearn.svm import SVC
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
6 from collections import Counter
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
7 import sys
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
8 def main():
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
9 train=sys.argv[1].split(',') #train attractors
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
10 df_attr=pd.DataFrame()
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
11 for j in train:
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
12 dfj=pd.read_csv(j, delim_whitespace=True,index_col = ["name"])
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
13 df_attr=pd.concat([df_attr,dfj],axis=0)
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
14 #df_attr=df_attr.drop_duplicates()
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
15 df_perturb=pd.read_csv(sys.argv[2], delim_whitespace=True,index_col=[0,1])
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
16
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
17 df_labels=pd.read_csv(sys.argv[3], delim_whitespace=True,index_col = ["name"]) # kmeans results
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
18 labels=df_labels['clusters'].tolist()
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
19
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
20 gnb = GaussianNB()
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
21 gnb.fit(df_attr,labels) #do knn with attractor landscape
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
22 perturb_lab=gnb.predict(df_perturb) # predict clusters for perturbations
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
23 #create dataframe of perturabtion and clusters
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
24 NB_label=pd.DataFrame(index=df_perturb.index)
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
25 NB_label['clusters']=perturb_lab
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
26
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
27 regressor = RandomForestClassifier(n_estimators=100, random_state=1)
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
28 regressor.fit(df_attr,labels) #do knn with attractor landscape
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
29 perturb_lab=regressor.predict(df_perturb) # predict clusters for perturbations
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
30 #create dataframe of perturabtion and clusters
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
31 RF_label=pd.DataFrame(index=df_perturb.index)
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
32 RF_label['clusters']=perturb_lab
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
33
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
34
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
35 svm=SVC(gamma='auto',random_state=4) #intialize knn
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
36 svm.fit(df_attr,labels) #do knn with attractor landscape
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
37 perturb_lab=svm.predict(df_perturb) # predict clusters for perturbations
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
38 #create dataframe of perturabtion and clusters
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
39 SVM_label=pd.DataFrame(index=df_perturb.index)
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
40 SVM_label['clusters']=perturb_lab
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
41
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
42 consensus=[]
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
43
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
44 for df in (SVM_label,NB_label,RF_label):
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
45 df=df.where(df==0, None)
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
46 df['count']=df.apply(lambda x: x.count(), axis=1)
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
47 if df.index.nlevels>1:
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
48 consensus.append(df.loc[df['count']>=1].index.get_level_values('perturbation').to_list())
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
49 else:
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
50 consensus.append(df.loc[df['count']>1].index.to_list())
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
51 cencount=Counter(x for sublist in consensus for x in sublist)
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
52
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
53 if df.index.nlevels>1:
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
54 keys = [k for k, v in cencount.items() if v >=len(df.index.unique('replicate').tolist())*2]
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
55 else:
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
56 keys = [k for k, v in cencount.items()]
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
57
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
58 with open('crit1perts.txt','w') as f:
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
59 for item in keys:
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
60 f.write("%s\n" % item)
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
61
7e5c71b2e71f Uploaded
laurenmarazzi
parents:
diff changeset
62 main()