Mercurial > repos > rnateam > graphclust_postprocessing
diff evaluation.py @ 0:b797e13169a0 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
author | rnateam |
---|---|
date | Fri, 16 Dec 2016 07:34:49 -0500 |
parents | |
children | ed8c7191b322 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/evaluation.py Fri Dec 16 07:34:49 2016 -0500 @@ -0,0 +1,51 @@ +import glob +from os import system +import re + + +def sh(script): + system("bash -c '%s'" % script) + + +dataNames = "FASTA/data.names" +listOfClusters = [] +listOfClasses = [] +cluster_seqs_stats_path = "RESULTS/*.cluster.all" +cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) + +blackList = [] +numberOfClusters = 0 +for singleFile in sorted(cluster_seqs_stats_files): + numberOfClusters += 1 + with open(singleFile, "r") as f: + for line in f.readlines(): + uniqueId = line.split()[6] + clustNum = line.split()[1] + rnaClass, sep, tail = uniqueId.partition("_") + listOfClasses.append(rnaClass) + listOfClusters.append(clustNum) + with open(dataNames, "r") as names: + for line in names.readlines(): + fullUniqeId = line.split()[3] + rnaClass, sep, tail = fullUniqeId.partition("_") + short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0] + if short_unique == uniqueId: + blackList.append(uniqueId) + +numberOfClusters += 1 # 1 cluster for all unassigned seqs +with open(dataNames, "r") as names: + for line in names.readlines(): + fullUniqeId = line.split()[3] + rnaClass, sep, tail = fullUniqeId.partition("_") + short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0] + rnaClass, sep, tail = fullUniqeId.partition("_") + if short_unique not in blackList: + listOfClasses.append(rnaClass) + listOfClusters.append(str(numberOfClusters)) + numberOfClusters += 1 # separate cluster for all unassigned seqs + +toWrite = "" +for i in range(len(listOfClusters)): + toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n' +with open("RESULTS/fullTab.tabular", "w") as full: + full.write(toWrite)