Mercurial > repos > rnateam > graphclust_preprocessing
annotate splitSHAPE.py @ 10:16bcaef3dc1e draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 6aa3014c2c6f9ef9ee71b20cfffec461b3a102a5
author | rnateam |
---|---|
date | Thu, 01 Jun 2017 12:11:37 -0400 |
parents | 0690d59881b9 |
children | c0c9d19bc7b2 |
rev | line source |
---|---|
7
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
1 import os |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
2 import re |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
3 import sys |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
4 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
5 shape_file = sys.argv[1] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
6 win_size = int(sys.argv[2]) |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
7 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
8 pattern = re.compile("^>.*$") |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
9 toWrite = "" |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
10 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
11 count_for_id = 1 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
12 seq_counter = 0 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
13 new_id = "" |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
14 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
15 seq_id = [] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
16 seq_string = [] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
17 orig_id = [] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
18 name_file = "FASTA/data.names" |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
19 array_all_chunks = [] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
20 with open(name_file, 'r') as f: |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
21 content = f.read() |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
22 lines = content.split('\n')[:-1] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
23 for line in lines: |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
24 seq_id.append(int(line.split()[0])) |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
25 seq_string.append(line.split()[1]) |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
26 orig_id_srt = line.split()[3] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
27 orig_id_srt = orig_id_srt.rsplit('_',1)[0] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
28 orig_id.append(orig_id_srt) |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
29 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
30 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
31 react_dict = {} |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
32 react_arr = [] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
33 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
34 with open(shape_file, 'r') as shape: |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
35 content = shape.read() |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
36 lines = content.split('\n') |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
37 for line in lines: |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
38 if pattern.match(line): |
9
0690d59881b9
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 65d322f9ab2f24d65b307f3553589149a1d678d5
rnateam
parents:
8
diff
changeset
|
39 line = line.replace('>','').split()[0] |
7
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
40 react_arr=[] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
41 react_dict[line] = react_arr |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
42 continue |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
43 else: |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
44 react_arr.append(line) |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
45 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
46 toWrite = "" |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
47 chunks = [] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
48 for i in range(len(orig_id)): |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
49 if not orig_id[i] in react_dict: |
9
0690d59881b9
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 65d322f9ab2f24d65b307f3553589149a1d678d5
rnateam
parents:
8
diff
changeset
|
50 raise RuntimeError('Error key {} {} not found'.format(i, orig_id[i])) |
7
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
51 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
52 react_val = react_dict[orig_id[i]] |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
53 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n" |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
54 chunks = re.findall(r'\d+', seq_string[i]) |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
55 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
56 for j in react_val[int(chunks[1])-1:int(chunks[2])]: |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
57 id_s = int(j.split()[0]) |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
58 |
8
a04e93fdb40a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
7
diff
changeset
|
59 |
a04e93fdb40a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents:
7
diff
changeset
|
60 id_s = id_s - int(chunks[1]) + 1 |
7
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
61 toWrite += str(id_s) + '\t' + j.split()[1] + "\n" |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
62 |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
63 with open("shape_data_split.react", 'w') as out: |
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff
changeset
|
64 out.write(toWrite) |