Mercurial > repos > rnateam > graphclust_preprocessing
comparison splitSHAPE.py @ 7:07ad2d77f28a draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
author | rnateam |
---|---|
date | Mon, 22 May 2017 12:45:22 -0400 |
parents | |
children | a04e93fdb40a |
comparison
equal
deleted
inserted
replaced
6:dff6a5a17221 | 7:07ad2d77f28a |
---|---|
1 import os | |
2 import re | |
3 import sys | |
4 | |
5 shape_file = sys.argv[1] | |
6 win_size = int(sys.argv[2]) | |
7 | |
8 pattern = re.compile("^>.*$") | |
9 toWrite = "" | |
10 | |
11 count_for_id = 1 | |
12 seq_counter = 0 | |
13 new_id = "" | |
14 | |
15 seq_id = [] | |
16 seq_string = [] | |
17 orig_id = [] | |
18 name_file = "FASTA/data.names" | |
19 array_all_chunks = [] | |
20 with open(name_file, 'r') as f: | |
21 content = f.read() | |
22 lines = content.split('\n')[:-1] | |
23 for line in lines: | |
24 seq_id.append(int(line.split()[0])) | |
25 seq_string.append(line.split()[1]) | |
26 orig_id_srt = line.split()[3] | |
27 orig_id_srt = orig_id_srt.rsplit('_',1)[0] | |
28 orig_id.append(orig_id_srt) | |
29 | |
30 | |
31 react_dict = {} | |
32 react_arr = [] | |
33 | |
34 with open(shape_file, 'r') as shape: | |
35 content = shape.read() | |
36 lines = content.split('\n') | |
37 for line in lines: | |
38 if pattern.match(line): | |
39 line = line.replace('>','').strip() | |
40 react_arr=[] | |
41 react_dict[line] = react_arr | |
42 continue | |
43 else: | |
44 react_arr.append(line) | |
45 | |
46 toWrite = "" | |
47 chunks = [] | |
48 for i in range(len(orig_id)): | |
49 if not orig_id[i] in react_dict: | |
50 raise RuntimeError('Error key {} not found'.format(orig_id)) | |
51 | |
52 react_val = react_dict[orig_id[i]] | |
53 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n" | |
54 chunks = re.findall(r'\d+', seq_string[i]) | |
55 | |
56 for j in react_val[int(chunks[1])-1:int(chunks[2])]: | |
57 id_s = int(j.split()[0]) | |
58 | |
59 if id_s > win_size: | |
60 id_s = id_s - int(chunks[1]) | |
61 toWrite += str(id_s) + '\t' + j.split()[1] + "\n" | |
62 | |
63 with open("shape_data_split.react", 'w') as out: | |
64 out.write(toWrite) |