annotate evaluation.py @ 7:870dfc90473f draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit b3fe72d71e27a7b66621ecaae3e2da4bc4ca6986
author rnateam
date Thu, 23 Feb 2017 13:03:17 -0500
parents 869a6e807d76
children b5f49453af8c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
1 import glob
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
2 from os import system
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
3 import re
2
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
4 from sklearn import metrics
5
4310ac018d05 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
rnateam
parents: 3
diff changeset
5 from shutil import make_archive
0
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
6
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
7 def sh(script):
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
8 system("bash -c '%s'" % script)
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
9
1
ed8c7191b322 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents: 0
diff changeset
10 dataNames = "FASTA/data.names"
0
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
11
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
12 listOfClusters = []
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
13 listOfClasses = []
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
14 cluster_seqs_stats_path = "RESULTS/*.cluster.all"
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
15 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
16
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
17 blackList = []
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
18 numberOfClusters = 0
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
19 for singleFile in sorted(cluster_seqs_stats_files):
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
20 numberOfClusters += 1
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
21 with open(singleFile, "r") as f:
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
22 for line in f.readlines():
6
869a6e807d76 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents: 5
diff changeset
23 uniqueId = line.split()[8]
869a6e807d76 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents: 5
diff changeset
24 clustNum = line.split()[2]
0
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
25 rnaClass, sep, tail = uniqueId.partition("_")
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
26 listOfClasses.append(rnaClass)
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
27 listOfClusters.append(clustNum)
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
28 with open(dataNames, "r") as names:
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
29 for line in names.readlines():
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
30 fullUniqeId = line.split()[3]
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
31 rnaClass, sep, tail = fullUniqeId.partition("_")
1
ed8c7191b322 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents: 0
diff changeset
32 if fullUniqeId == uniqueId:
0
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
33 blackList.append(uniqueId)
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
34
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
35 numberOfClusters += 1 # 1 cluster for all unassigned seqs
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
36 with open(dataNames, "r") as names:
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
37 for line in names.readlines():
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
38 fullUniqeId = line.split()[3]
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
39 rnaClass, sep, tail = fullUniqeId.partition("_")
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
40 rnaClass, sep, tail = fullUniqeId.partition("_")
1
ed8c7191b322 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
rnateam
parents: 0
diff changeset
41 if fullUniqeId not in blackList:
0
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
42 listOfClasses.append(rnaClass)
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
43 listOfClusters.append(str(numberOfClusters))
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
44 numberOfClusters += 1 # separate cluster for all unassigned seqs
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
45
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
46 toWrite = ""
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
47 for i in range(len(listOfClusters)):
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
48 toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n'
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
49 with open("RESULTS/fullTab.tabular", "w") as full:
b797e13169a0 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
rnateam
parents:
diff changeset
50 full.write(toWrite)
2
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
51
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
52
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
53 pattern = re.compile("^RF.*$")
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
54
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
55
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
56 if len(listOfClasses) > 0 and pattern.match(str(listOfClasses[0])):
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
57
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
58 completeness_score = metrics.completeness_score(listOfClasses, listOfClusters)
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
59 homogeneity_score = metrics.homogeneity_score(listOfClasses, listOfClusters)
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
60 adjusted_rand_score = metrics.adjusted_rand_score(listOfClasses, listOfClusters)
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
61 adjusted_mutual_info_score = metrics.adjusted_mutual_info_score(listOfClasses, listOfClusters)
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
62 v_measure_score = metrics.v_measure_score(listOfClasses, listOfClusters)
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
63
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
64 toWrite = "completeness_score : " + str(completeness_score) + "\n" + "homogeneity_score : " + str(homogeneity_score) + "\n" + "adjusted_rand_score : " +str(adjusted_rand_score) + "\n" + "adjusted_mutual_info_score : " + str(adjusted_mutual_info_score)+ "\n" + "v_measure_score : " + str(v_measure_score)
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
65
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
66 else:
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
67 toWrite = "completeness_score : NA \nhomogeneity_score : NA \nadjusted_rand_score : NA \nadjusted_mutual_info_score : NA \nv_measure_score : NA"
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
68
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
69 with open("RESULTS/evaluation.txt", "w") as fOut:
b8e32e577597 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 9bc3c9b613d106098a78e16534897c88a3738c07
rnateam
parents: 1
diff changeset
70 fOut.write(toWrite)
5
4310ac018d05 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
rnateam
parents: 3
diff changeset
71
4310ac018d05 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
rnateam
parents: 3
diff changeset
72
4310ac018d05 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
rnateam
parents: 3
diff changeset
73 make_archive('RESULTS', 'zip', root_dir='RESULTS')