diff evaluation.py @ 0:b797e13169a0 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
author rnateam
date Fri, 16 Dec 2016 07:34:49 -0500
parents
children ed8c7191b322
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/evaluation.py	Fri Dec 16 07:34:49 2016 -0500
@@ -0,0 +1,51 @@
+import glob
+from os import system
+import re
+
+
+def sh(script):
+    system("bash -c '%s'" % script)
+
+
+dataNames = "FASTA/data.names"
+listOfClusters = []
+listOfClasses = []
+cluster_seqs_stats_path = "RESULTS/*.cluster.all"
+cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)
+
+blackList = []
+numberOfClusters = 0
+for singleFile in sorted(cluster_seqs_stats_files):
+    numberOfClusters += 1
+    with open(singleFile, "r") as f:
+        for line in f.readlines():
+            uniqueId = line.split()[6]
+            clustNum = line.split()[1]
+            rnaClass, sep, tail = uniqueId.partition("_")
+            listOfClasses.append(rnaClass)
+            listOfClusters.append(clustNum)
+            with open(dataNames, "r") as names:
+                for line in names.readlines():
+                    fullUniqeId = line.split()[3]
+                    rnaClass, sep, tail = fullUniqeId.partition("_")
+                    short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0]
+                    if short_unique == uniqueId:
+                        blackList.append(uniqueId)
+
+numberOfClusters += 1  # 1 cluster for all unassigned seqs
+with open(dataNames, "r") as names:
+    for line in names.readlines():
+        fullUniqeId = line.split()[3]
+        rnaClass, sep, tail = fullUniqeId.partition("_")
+        short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0]
+        rnaClass, sep, tail = fullUniqeId.partition("_")
+        if short_unique not in blackList:
+            listOfClasses.append(rnaClass)
+            listOfClusters.append(str(numberOfClusters))
+            numberOfClusters += 1  # separate cluster for all unassigned seqs
+
+toWrite = ""
+for i in range(len(listOfClusters)):
+    toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n'
+with open("RESULTS/fullTab.tabular", "w") as full:
+    full.write(toWrite)