Mercurial > repos > rnateam > graphclust_postprocessing
annotate addCdhitseqs.py @ 12:b5f49453af8c draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 65d322f9ab2f24d65b307f3553589149a1d678d5
| author | rnateam |
|---|---|
| date | Wed, 31 May 2017 14:53:30 -0400 |
| parents | 869a6e807d76 |
| children |
| rev | line source |
|---|---|
|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
1 import re |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
2 import glob |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
3 import sys |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
4 |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
5 cdhitcluster = sys.argv[1] |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
6 |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
7 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
8 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
9 |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
10 repSeqRedSeqdict = {} |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
11 repLine = "" |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
12 count = 0 |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
13 first = False |
|
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
14 add_FullId = "" |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
15 k = 0 |
|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
16 |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
17 with open(cdhitcluster, 'r+') as f: |
|
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
18 content = f.read() |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
19 reps = re.compile("^.*\*$", re.MULTILINE).findall(content) |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
20 lines = content.split('\n') |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
21 |
|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
22 for i in range(0, len(lines)): |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
23 line = lines[i] |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
24 if ">Cluster" in line: |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
25 first = True |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
26 count = 0 |
|
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
27 repLine = reps[k] |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
28 k = k+1 |
|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
29 continue |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
30 elif not first: |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
31 count += 1 |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
32 first = False |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
33 else: |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
34 first = False |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
35 lineArr = [] |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
36 if count > 0: |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
37 repLine = repLine.strip() |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
38 rep_FullId = repLine.split()[2] |
|
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
39 rep_FullId = rep_FullId.replace(">","") |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
40 rep_FullId = rep_FullId.replace("...","") |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
41 if "*" in line or not line.strip(): |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
42 continue |
|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
43 line = line.strip() |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
44 add_FullId = line.split()[2] |
|
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
45 add_FullId = add_FullId.replace(">","") |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
46 add_FullId = add_FullId.replace("...","") |
|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
47 lineArr.append(add_FullId) |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
48 repSeqRedSeqdict[rep_FullId] = lineArr |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
49 |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
50 toWrite = "" |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
51 for singleFile in sorted(cluster_seqs_stats_files): |
|
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
52 toWrite = "" |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
53 with open(singleFile, "r+") as clFile: |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
54 file_lines = clFile.readlines() |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
55 for line in file_lines: |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
56 line = '\t'.join(line.split()) |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
57 toWrite += line + '\n' |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
58 clFile.seek(0) |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
59 clFile.write(toWrite) |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
60 clFile.truncate() |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
61 first_line = file_lines[0] |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
62 toWrite = "" |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
63 cols = first_line.split() |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
64 file_content = '\n'.join(file_lines) |
|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
65 for key, val in repSeqRedSeqdict.items(): |
|
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
66 if key in file_content: |
|
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
67 |
|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
68 for i in val: |
|
6
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
69 cols[3] = "---" |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
70 cols[4] = "CD-Hit" |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
71 cols[7] = str(i) |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
72 if len(first_line.split()) > 9: |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
73 cols[9] = str(i.rsplit("_",1)[0]) |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
74 toWrite += '\t'.join(cols) |
|
869a6e807d76
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
3
diff
changeset
|
75 toWrite +="\n" |
|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
76 clFile.write(toWrite) |
