annotate splitSHAPE.py @ 13:2a5defc09381 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust commit 4406735e44aba20859c252be39f4e99df28c7a92
author rnateam
date Sat, 27 Oct 2018 13:27:57 -0400
parents c0c9d19bc7b2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
1 import os
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
2 import re
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
3 import sys
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
4
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
5 shape_file = sys.argv[1]
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
6
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
7 pattern = re.compile("^>.*$")
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
8 toWrite = ""
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
9
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
10 count_for_id = 1
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
11 seq_counter = 0
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
12 new_id = ""
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
13
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
14 seq_id = []
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
15 seq_string = []
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
16 orig_id = []
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
17 name_file = "FASTA/data.names"
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
18 array_all_chunks = []
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
19 with open(name_file, 'r') as f:
11
c0c9d19bc7b2 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 746497a64b955f6b9afc1944d1c1d8d877e53267
rnateam
parents: 9
diff changeset
20 for line in f:
c0c9d19bc7b2 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 746497a64b955f6b9afc1944d1c1d8d877e53267
rnateam
parents: 9
diff changeset
21 if len(line.strip()) == 0:
c0c9d19bc7b2 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 746497a64b955f6b9afc1944d1c1d8d877e53267
rnateam
parents: 9
diff changeset
22 continue
7
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
23 seq_id.append(int(line.split()[0]))
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
24 seq_string.append(line.split()[1])
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
25 orig_id_srt = line.split()[3]
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
26 orig_id_srt = orig_id_srt.rsplit('_',1)[0]
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
27 orig_id.append(orig_id_srt)
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
28
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
29
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
30 react_dict = {}
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
31 react_arr = []
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
32
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
33 with open(shape_file, 'r') as shape:
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
34 content = shape.read()
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
35 lines = content.split('\n')
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
36 for line in lines:
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
37 if pattern.match(line):
9
0690d59881b9 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 65d322f9ab2f24d65b307f3553589149a1d678d5
rnateam
parents: 8
diff changeset
38 line = line.replace('>','').split()[0]
7
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
39 react_arr=[]
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
40 react_dict[line] = react_arr
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
41 continue
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
42 else:
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
43 react_arr.append(line)
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
44
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
45 toWrite = ""
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
46 chunks = []
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
47 for i in range(len(orig_id)):
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
48 if not orig_id[i] in react_dict:
9
0690d59881b9 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 65d322f9ab2f24d65b307f3553589149a1d678d5
rnateam
parents: 8
diff changeset
49 raise RuntimeError('Error key {} {} not found'.format(i, orig_id[i]))
7
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
50
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
51 react_val = react_dict[orig_id[i]]
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
52 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n"
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
53 chunks = re.findall(r'\d+', seq_string[i])
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
54
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
55 for j in react_val[int(chunks[1])-1:int(chunks[2])]:
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
56 id_s = int(j.split()[0])
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
57
8
a04e93fdb40a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents: 7
diff changeset
58
a04e93fdb40a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
rnateam
parents: 7
diff changeset
59 id_s = id_s - int(chunks[1]) + 1
7
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
60 toWrite += str(id_s) + '\t' + j.split()[1] + "\n"
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
61
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
62 with open("shape_data_split.react", 'w') as out:
07ad2d77f28a planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
rnateam
parents:
diff changeset
63 out.write(toWrite)