comparison addCdhitseqs.py @ 3:79b9117aef01 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
author rnateam
date Wed, 04 Jan 2017 18:15:07 -0500
parents
children 869a6e807d76
comparison
equal deleted inserted replaced
2:b8e32e577597 3:79b9117aef01
1 import re
2 import glob
3 import sys
4
5 cdhitcluster = sys.argv[1]
6 #clusters = sys.argv[2]
7
8 cluster_seqs_stats_path = "RESULTS/*.cluster.all"
9 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)
10
11 #clusterFiles = clusters.split(',')
12 repSeqRedSeqdict = {}
13 repLine = ""
14 count = 0
15 first = False
16
17 with open(cdhitcluster, 'r+') as f:
18 lines = f.readlines()
19 for i in range(0, len(lines)):
20 line = lines[i]
21 if ">Cluster" in line:
22 first = True
23 count = 0
24 if i+1 < len(lines):
25 repLine = lines[i+1]
26 continue
27 elif not first:
28 count += 1
29 first = False
30 else:
31 first = False
32 lineArr = []
33 if count > 0:
34 repLine = repLine.strip()
35 rep_FullId = repLine.split()[2]
36 rep_FullId = rep_FullId.replace(">", "")
37 #rep_short_id = re.findall("_".join(["[^_]+"] * 2), rep_FullId)[0]
38 rep_FullId = rep_FullId.replace("...", "")
39 line = line.strip()
40 add_FullId = line.split()[2]
41 add_FullId = add_FullId.replace(">", "")
42 add_FullId = add_FullId.replace("...", "")
43 #add_short_id = re.findall("_".join(["[^_]+"] * 2), add_FullId)[0]
44 lineArr.append(add_FullId)
45 repSeqRedSeqdict[rep_FullId] = lineArr
46 #lineArr.append(add_short_id)
47 #repSeqRedSeqdict[rep_short_id] = lineArr
48
49 toWrite = ""
50
51 for singleFile in sorted(cluster_seqs_stats_files):
52 with open(singleFile, "a+") as clFile:
53 file_content = clFile.read()
54 first_line = file_content.split('\n')[0]
55 for key, val in repSeqRedSeqdict.items():
56 if key in file_content:
57 for i in val:
58 toWrite += first_line.split()[0] + " " + first_line.split()[1] + " " + first_line.split()[2] + " " + " - " + " " + "CD-Hit" + " " + first_line.split()[5] + " " + "ORIGID" + " " + str(i) + "\n"
59 clFile.write(toWrite)