Mercurial > repos > rnateam > graphclust_preprocessing
annotate splitSHAPE.py @ 8:a04e93fdb40a draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
| author | rnateam | 
|---|---|
| date | Wed, 24 May 2017 09:57:18 -0400 | 
| parents | 07ad2d77f28a | 
| children | 0690d59881b9 | 
| rev | line source | 
|---|---|
| 7 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 1 import os | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 2 import re | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 3 import sys | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 4 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 5 shape_file = sys.argv[1] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 6 win_size = int(sys.argv[2]) | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 7 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 8 pattern = re.compile("^>.*$") | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 9 toWrite = "" | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 10 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 11 count_for_id = 1 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 12 seq_counter = 0 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 13 new_id = "" | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 14 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 15 seq_id = [] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 16 seq_string = [] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 17 orig_id = [] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 18 name_file = "FASTA/data.names" | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 19 array_all_chunks = [] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 20 with open(name_file, 'r') as f: | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 21 content = f.read() | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 22 lines = content.split('\n')[:-1] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 23 for line in lines: | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 24 seq_id.append(int(line.split()[0])) | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 25 seq_string.append(line.split()[1]) | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 26 orig_id_srt = line.split()[3] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 27 orig_id_srt = orig_id_srt.rsplit('_',1)[0] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 28 orig_id.append(orig_id_srt) | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 29 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 30 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 31 react_dict = {} | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 32 react_arr = [] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 33 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 34 with open(shape_file, 'r') as shape: | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 35 content = shape.read() | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 36 lines = content.split('\n') | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 37 for line in lines: | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 38 if pattern.match(line): | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 39 line = line.replace('>','').strip() | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 40 react_arr=[] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 41 react_dict[line] = react_arr | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 42 continue | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 43 else: | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 44 react_arr.append(line) | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 45 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 46 toWrite = "" | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 47 chunks = [] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 48 for i in range(len(orig_id)): | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 49 if not orig_id[i] in react_dict: | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 50 raise RuntimeError('Error key {} not found'.format(orig_id)) | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 51 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 52 react_val = react_dict[orig_id[i]] | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 53 toWrite += '>' + str(seq_id[i]) + " " + seq_string[i] + "\n" | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 54 chunks = re.findall(r'\d+', seq_string[i]) | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 55 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 56 for j in react_val[int(chunks[1])-1:int(chunks[2])]: | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 57 id_s = int(j.split()[0]) | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 58 | 
| 8 
a04e93fdb40a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
 rnateam parents: 
7diff
changeset | 59 | 
| 
a04e93fdb40a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 9a3dc91fa984be18fabc0d968360634d787c9589
 rnateam parents: 
7diff
changeset | 60 id_s = id_s - int(chunks[1]) + 1 | 
| 7 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 61 toWrite += str(id_s) + '\t' + j.split()[1] + "\n" | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 62 | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 63 with open("shape_data_split.react", 'w') as out: | 
| 
07ad2d77f28a
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 4379e712f76f2bb12ee2cc270dd8a0e806df2cd6
 rnateam parents: diff
changeset | 64 out.write(toWrite) | 
