view evaluation.py @ 0:b797e13169a0 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
author rnateam
date Fri, 16 Dec 2016 07:34:49 -0500
parents
children ed8c7191b322
line wrap: on
line source

import glob
from os import system
import re


def sh(script):
    system("bash -c '%s'" % script)


dataNames = "FASTA/data.names"
listOfClusters = []
listOfClasses = []
cluster_seqs_stats_path = "RESULTS/*.cluster.all"
cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)

blackList = []
numberOfClusters = 0
for singleFile in sorted(cluster_seqs_stats_files):
    numberOfClusters += 1
    with open(singleFile, "r") as f:
        for line in f.readlines():
            uniqueId = line.split()[6]
            clustNum = line.split()[1]
            rnaClass, sep, tail = uniqueId.partition("_")
            listOfClasses.append(rnaClass)
            listOfClusters.append(clustNum)
            with open(dataNames, "r") as names:
                for line in names.readlines():
                    fullUniqeId = line.split()[3]
                    rnaClass, sep, tail = fullUniqeId.partition("_")
                    short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0]
                    if short_unique == uniqueId:
                        blackList.append(uniqueId)

numberOfClusters += 1  # 1 cluster for all unassigned seqs
with open(dataNames, "r") as names:
    for line in names.readlines():
        fullUniqeId = line.split()[3]
        rnaClass, sep, tail = fullUniqeId.partition("_")
        short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0]
        rnaClass, sep, tail = fullUniqeId.partition("_")
        if short_unique not in blackList:
            listOfClasses.append(rnaClass)
            listOfClusters.append(str(numberOfClusters))
            numberOfClusters += 1  # separate cluster for all unassigned seqs

toWrite = ""
for i in range(len(listOfClusters)):
    toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n'
with open("RESULTS/fullTab.tabular", "w") as full:
    full.write(toWrite)