Mercurial > repos > rnateam > graphclust_postprocessing
annotate addCdhitseqs.py @ 4:4a9754d476fe draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit 287021573c592fdb70fdbbc88943aa16a8740fc0
author | rnateam |
---|---|
date | Fri, 13 Jan 2017 16:59:29 -0500 |
parents | 79b9117aef01 |
children | 869a6e807d76 |
rev | line source |
---|---|
3
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
1 import re |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
2 import glob |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
3 import sys |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
4 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
5 cdhitcluster = sys.argv[1] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
6 #clusters = sys.argv[2] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
7 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
8 cluster_seqs_stats_path = "RESULTS/*.cluster.all" |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
9 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
10 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
11 #clusterFiles = clusters.split(',') |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
12 repSeqRedSeqdict = {} |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
13 repLine = "" |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
14 count = 0 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
15 first = False |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
16 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
17 with open(cdhitcluster, 'r+') as f: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
18 lines = f.readlines() |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
19 for i in range(0, len(lines)): |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
20 line = lines[i] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
21 if ">Cluster" in line: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
22 first = True |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
23 count = 0 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
24 if i+1 < len(lines): |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
25 repLine = lines[i+1] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
26 continue |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
27 elif not first: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
28 count += 1 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
29 first = False |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
30 else: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
31 first = False |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
32 lineArr = [] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
33 if count > 0: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
34 repLine = repLine.strip() |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
35 rep_FullId = repLine.split()[2] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
36 rep_FullId = rep_FullId.replace(">", "") |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
37 #rep_short_id = re.findall("_".join(["[^_]+"] * 2), rep_FullId)[0] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
38 rep_FullId = rep_FullId.replace("...", "") |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
39 line = line.strip() |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
40 add_FullId = line.split()[2] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
41 add_FullId = add_FullId.replace(">", "") |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
42 add_FullId = add_FullId.replace("...", "") |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
43 #add_short_id = re.findall("_".join(["[^_]+"] * 2), add_FullId)[0] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
44 lineArr.append(add_FullId) |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
45 repSeqRedSeqdict[rep_FullId] = lineArr |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
46 #lineArr.append(add_short_id) |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
47 #repSeqRedSeqdict[rep_short_id] = lineArr |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
48 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
49 toWrite = "" |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
50 |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
51 for singleFile in sorted(cluster_seqs_stats_files): |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
52 with open(singleFile, "a+") as clFile: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
53 file_content = clFile.read() |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
54 first_line = file_content.split('\n')[0] |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
55 for key, val in repSeqRedSeqdict.items(): |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
56 if key in file_content: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
57 for i in val: |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
58 toWrite += first_line.split()[0] + " " + first_line.split()[1] + " " + first_line.split()[2] + " " + " - " + " " + "CD-Hit" + " " + first_line.split()[5] + " " + "ORIGID" + " " + str(i) + "\n" |
79b9117aef01
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
rnateam
parents:
diff
changeset
|
59 clFile.write(toWrite) |