comparison splitSHAPE.py @ 7:07ad2d77f28a draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
author rnateam
date Mon, 22 May 2017 12:45:22 -0400
parents
children a04e93fdb40a
comparison
equal deleted inserted replaced
6:dff6a5a17221 7:07ad2d77f28a
1 import os
2 import re
3 import sys
4
5 shape_file = sys.argv[1]
6 win_size = int(sys.argv[2])
7
8 pattern = re.compile("^>.*$")
9 toWrite = ""
10
11 count_for_id = 1
12 seq_counter = 0
13 new_id = ""
14
15 seq_id = []
16 seq_string = []
17 orig_id = []
18 name_file = "FASTA/data.names"
19 array_all_chunks = []
20 with open(name_file, 'r') as f:
21 content = f.read()
22 lines = content.split('\n')[:-1]
23 for line in lines:
24 seq_id.append(int(line.split()[0]))
25 seq_string.append(line.split()[1])
26 orig_id_srt = line.split()[3]
27 orig_id_srt = orig_id_srt.rsplit('_',1)[0]
28 orig_id.append(orig_id_srt)
29
30
31 react_dict = {}
32 react_arr = []
33
34 with open(shape_file, 'r') as shape:
35 content = shape.read()
36 lines = content.split('\n')
37 for line in lines:
38 if pattern.match(line):
39 line = line.replace('>','').strip()
40 react_arr=[]
41 react_dict[line] = react_arr
42 continue
43 else:
44 react_arr.append(line)
45
46 toWrite = ""
47 chunks = []
48 for i in range(len(orig_id)):
49 if not orig_id[i] in react_dict:
50 raise RuntimeError('Error key {} not found'.format(orig_id))
51
52 react_val = react_dict[orig_id[i]]
53 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n"
54 chunks = re.findall(r'\d+', seq_string[i])
55
56 for j in react_val[int(chunks[1])-1:int(chunks[2])]:
57 id_s = int(j.split()[0])
58
59 if id_s > win_size:
60 id_s = id_s - int(chunks[1])
61 toWrite += str(id_s) + '\t' + j.split()[1] + "\n"
62
63 with open("shape_data_split.react", 'w') as out:
64 out.write(toWrite)