diff evaluation.py @ 1:ed8c7191b322 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 11e50007837b1efa01a3039c92df0ebf63f0f7e9
author rnateam
date Thu, 22 Dec 2016 09:06:48 -0500
parents b797e13169a0
children b8e32e577597
line wrap: on
line diff
--- a/evaluation.py	Fri Dec 16 07:34:49 2016 -0500
+++ b/evaluation.py	Thu Dec 22 09:06:48 2016 -0500
@@ -2,12 +2,11 @@
 from os import system
 import re
 
-
 def sh(script):
     system("bash -c '%s'" % script)
 
+dataNames = "FASTA/data.names"
 
-dataNames = "FASTA/data.names"
 listOfClusters = []
 listOfClasses = []
 cluster_seqs_stats_path = "RESULTS/*.cluster.all"
@@ -19,7 +18,7 @@
     numberOfClusters += 1
     with open(singleFile, "r") as f:
         for line in f.readlines():
-            uniqueId = line.split()[6]
+            uniqueId = line.split()[7]
             clustNum = line.split()[1]
             rnaClass, sep, tail = uniqueId.partition("_")
             listOfClasses.append(rnaClass)
@@ -28,8 +27,7 @@
                 for line in names.readlines():
                     fullUniqeId = line.split()[3]
                     rnaClass, sep, tail = fullUniqeId.partition("_")
-                    short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0]
-                    if short_unique == uniqueId:
+                    if fullUniqeId == uniqueId:
                         blackList.append(uniqueId)
 
 numberOfClusters += 1  # 1 cluster for all unassigned seqs
@@ -37,9 +35,8 @@
     for line in names.readlines():
         fullUniqeId = line.split()[3]
         rnaClass, sep, tail = fullUniqeId.partition("_")
-        short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0]
         rnaClass, sep, tail = fullUniqeId.partition("_")
-        if short_unique not in blackList:
+        if fullUniqeId not in blackList:
             listOfClasses.append(rnaClass)
             listOfClusters.append(str(numberOfClusters))
             numberOfClusters += 1  # separate cluster for all unassigned seqs