Mercurial > repos > rnateam > graphclust_postprocessing
annotate addCdhitseqs.py @ 11:e080ebe95476 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 4dd7269185f6fb9bdc007028007d6540f4cf057d
author | rnateam |
---|---|
date | Sat, 25 Mar 2017 16:50:38 -0400 |
parents | 869a6e807d76 |
children |
rev | line source |
---|---|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
1 import re |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
2 import glob |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
3 import sys |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
4 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
5 cdhitcluster = sys.argv[1] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
6 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
7 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
8 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
9 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
10 repSeqRedSeqdict = {} |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
11 repLine = "" |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
12 count = 0 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
13 first = False |
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
14 add_FullId = "" |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
15 k = 0 |
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
16 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
17 with open(cdhitcluster, 'r+') as f: |
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
18 content = f.read() |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
19 reps = re.compile("^.*\*$", re.MULTILINE).findall(content) |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
20 lines = content.split('\n') |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
21 |
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
22 for i in range(0, len(lines)): |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
23 line = lines[i] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
24 if ">Cluster" in line: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
25 first = True |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
26 count = 0 |
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
27 repLine = reps[k] |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
28 k = k+1 |
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
29 continue |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
30 elif not first: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
31 count += 1 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
32 first = False |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
33 else: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
34 first = False |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
35 lineArr = [] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
36 if count > 0: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
37 repLine = repLine.strip() |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
38 rep_FullId = repLine.split()[2] |
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
39 rep_FullId = rep_FullId.replace(">","") |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
40 rep_FullId = rep_FullId.replace("...","") |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
41 if "*" in line or not line.strip(): |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
42 continue |
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
43 line = line.strip() |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
44 add_FullId = line.split()[2] |
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
45 add_FullId = add_FullId.replace(">","") |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
46 add_FullId = add_FullId.replace("...","") |
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
47 lineArr.append(add_FullId) |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
48 repSeqRedSeqdict[rep_FullId] = lineArr |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
49 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
50 toWrite = "" |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
51 for singleFile in sorted(cluster_seqs_stats_files): |
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
52 toWrite = "" |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
53 with open(singleFile, "r+") as clFile: |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
54 file_lines = clFile.readlines() |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
55 for line in file_lines: |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
56 line = '\t'.join(line.split()) |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
57 toWrite += line + '\n' |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
58 clFile.seek(0) |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
59 clFile.write(toWrite) |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
60 clFile.truncate() |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
61 first_line = file_lines[0] |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
62 toWrite = "" |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
63 cols = first_line.split() |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
64 file_content = '\n'.join(file_lines) |
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
65 for key, val in repSeqRedSeqdict.items(): |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
66 if key in file_content: |
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
67 |
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
68 for i in val: |
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
69 cols[3] = "---" |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
70 cols[4] = "CD-Hit" |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
71 cols[7] = str(i) |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
72 if len(first_line.split()) > 9: |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
73 cols[9] = str(i.rsplit("_",1)[0]) |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
74 toWrite += '\t'.join(cols) |
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
75 toWrite +="\n" |
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
76 clFile.write(toWrite) |