annotate addCdhitseqs.py @ 4:4a9754d476fe draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
author rnateam
date Fri, 13 Jan 2017 16:59:29 -0500
parents 79b9117aef01
children 869a6e807d76
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
1 import re
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
2 import glob
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
3 import sys
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
4
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
5 cdhitcluster = sys.argv[1]
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
6 #clusters = sys.argv[2]
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
7
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
8 cluster_seqs_stats_path = "RESULTS/*.cluster.all"
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
9 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
10
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
11 #clusterFiles = clusters.split(',')
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
12 repSeqRedSeqdict = {}
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
13 repLine = ""
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
14 count = 0
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
15 first = False
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
16
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
17 with open(cdhitcluster, 'r+') as f:
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
18 lines = f.readlines()
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
19 for i in range(0, len(lines)):
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
20 line = lines[i]
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
21 if ">Cluster" in line:
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
22 first = True
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
23 count = 0
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
24 if i+1 < len(lines):
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
25 repLine = lines[i+1]
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
26 continue
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
27 elif not first:
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
28 count += 1
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
29 first = False
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
30 else:
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
31 first = False
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
32 lineArr = []
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
33 if count > 0:
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
34 repLine = repLine.strip()
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
35 rep_FullId = repLine.split()[2]
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
36 rep_FullId = rep_FullId.replace(">", "")
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
37 #rep_short_id = re.findall("_".join(["[^_]+"] * 2), rep_FullId)[0]
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
38 rep_FullId = rep_FullId.replace("...", "")
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
39 line = line.strip()
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
40 add_FullId = line.split()[2]
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
41 add_FullId = add_FullId.replace(">", "")
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
42 add_FullId = add_FullId.replace("...", "")
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
43 #add_short_id = re.findall("_".join(["[^_]+"] * 2), add_FullId)[0]
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
44 lineArr.append(add_FullId)
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
45 repSeqRedSeqdict[rep_FullId] = lineArr
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
46 #lineArr.append(add_short_id)
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
47 #repSeqRedSeqdict[rep_short_id] = lineArr
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
48
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
49 toWrite = ""
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
50
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
51 for singleFile in sorted(cluster_seqs_stats_files):
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
52 with open(singleFile, "a+") as clFile:
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
53 file_content = clFile.read()
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
54 first_line = file_content.split('\n')[0]
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
55 for key, val in repSeqRedSeqdict.items():
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
56 if key in file_content:
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
57 for i in val:
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
58 toWrite += first_line.split()[0] + " " + first_line.split()[1] + " " + first_line.split()[2] + " " + " - " + " " + "CD-Hit" + " " + first_line.split()[5] + " " + "ORIGID" + " " + str(i) + "\n"
79b9117aef01 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff changeset
59 clFile.write(toWrite)