Mercurial > repos > rnateam > graphclust_postprocessing
comparison evaluation.py @ 0:b797e13169a0 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 21aaee40723b5341b4236edeb0e72995c2054053
author | rnateam |
---|---|
date | Fri, 16 Dec 2016 07:34:49 -0500 |
parents | |
children | ed8c7191b322 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b797e13169a0 |
---|---|
1 import glob | |
2 from os import system | |
3 import re | |
4 | |
5 | |
6 def sh(script): | |
7 system("bash -c '%s'" % script) | |
8 | |
9 | |
10 dataNames = "FASTA/data.names" | |
11 listOfClusters = [] | |
12 listOfClasses = [] | |
13 cluster_seqs_stats_path = "RESULTS/*.cluster.all" | |
14 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) | |
15 | |
16 blackList = [] | |
17 numberOfClusters = 0 | |
18 for singleFile in sorted(cluster_seqs_stats_files): | |
19 numberOfClusters += 1 | |
20 with open(singleFile, "r") as f: | |
21 for line in f.readlines(): | |
22 uniqueId = line.split()[6] | |
23 clustNum = line.split()[1] | |
24 rnaClass, sep, tail = uniqueId.partition("_") | |
25 listOfClasses.append(rnaClass) | |
26 listOfClusters.append(clustNum) | |
27 with open(dataNames, "r") as names: | |
28 for line in names.readlines(): | |
29 fullUniqeId = line.split()[3] | |
30 rnaClass, sep, tail = fullUniqeId.partition("_") | |
31 short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0] | |
32 if short_unique == uniqueId: | |
33 blackList.append(uniqueId) | |
34 | |
35 numberOfClusters += 1 # 1 cluster for all unassigned seqs | |
36 with open(dataNames, "r") as names: | |
37 for line in names.readlines(): | |
38 fullUniqeId = line.split()[3] | |
39 rnaClass, sep, tail = fullUniqeId.partition("_") | |
40 short_unique = re.findall("_".join(["[^_]+"] * 2), fullUniqeId)[0] | |
41 rnaClass, sep, tail = fullUniqeId.partition("_") | |
42 if short_unique not in blackList: | |
43 listOfClasses.append(rnaClass) | |
44 listOfClusters.append(str(numberOfClusters)) | |
45 numberOfClusters += 1 # separate cluster for all unassigned seqs | |
46 | |
47 toWrite = "" | |
48 for i in range(len(listOfClusters)): | |
49 toWrite += listOfClasses[i] + "\t" + listOfClusters[i] + '\n' | |
50 with open("RESULTS/fullTab.tabular", "w") as full: | |
51 full.write(toWrite) |