Mercurial > repos > gpovysil > range2tag
comparison range2tag.py @ 0:0b486eaca66e draft
planemo upload for repository https://github.com/gpovysil/galaxy/tools/range2tag commit 16b1092dfe1052f9560188e67f39009f1e4b32bc
author | gpovysil |
---|---|
date | Wed, 16 May 2018 04:53:11 -0400 |
parents | |
children | 7d4ec8f51253 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0b486eaca66e |
---|---|
1 """range2tag.py | |
2 | |
3 Author -- Gundula Povysil | |
4 Contact -- povysil@bioinf.jku.at | |
5 | |
6 Takes a SAM file, start and stop positions as input and prints all tags | |
7 of reads that overlap with regions to user specified output file. | |
8 ======= ========== ================= ================================ | |
9 Version Date Author Description | |
10 0.0.2 2018-05-15 Gundula Povysil - | |
11 ======= ========== ================= ================================ | |
12 | |
13 USAGE: python range2tag.py inputFile.sam ranges.txt outputFile.txt | |
14 """ | |
15 | |
16 | |
17 import numpy as np | |
18 import re | |
19 import argparse | |
20 import sys | |
21 import os | |
22 | |
23 def make_argparser(): | |
24 parser = argparse.ArgumentParser(description='Takes a SAM file, start and stop positions as input and prints all tags of reads that overlap with regions to user specified output file.') | |
25 parser.add_argument('inputFile', | |
26 help='SAM file with aligned reads.') | |
27 parser.add_argument('rangesFile', | |
28 help='TXT file with start and stop positions.') | |
29 parser.add_argument('outputFile', | |
30 help='Output TXT file with tags that are within specified regions.') | |
31 return parser | |
32 | |
33 def range2tag(argv): | |
34 parser = make_argparser() | |
35 args=parser.parse_args(argv[1:]) | |
36 | |
37 inputFile = args.inputFile | |
38 rangesFile = args.rangesFile | |
39 outputFile = args.outputFile | |
40 | |
41 if os.path.isfile(inputFile) is False: | |
42 print("Error: Could not find '{}'".format(inputFile)) | |
43 exit(0) | |
44 | |
45 if os.path.isfile(rangesFile) is False: | |
46 print("Error: Could not find '{}'".format(rangesFile)) | |
47 exit(0) | |
48 | |
49 with open(rangesFile, 'r') as regs: | |
50 range_array = np.genfromtxt(regs, skip_header=0, delimiter='\t', comments='#') | |
51 | |
52 start_posList = range_array[:,0].astype(int) | |
53 stop_posList = range_array[:,1].astype(int) | |
54 | |
55 print(start_posList) | |
56 print(stop_posList) | |
57 | |
58 if len(start_posList) == 0: | |
59 print("Error: start_positions is empty") | |
60 exit(2) | |
61 | |
62 if len(stop_posList) == 0: | |
63 print("Error: end_positions is empty") | |
64 exit(3) | |
65 | |
66 if len(start_posList) != len(stop_posList): | |
67 print("start_positions and end_positions do not have the same length") | |
68 exit(3) | |
69 | |
70 with open(inputFile, 'r') as sam: | |
71 data_array = np.genfromtxt(sam, skip_header=0, delimiter='\t', usecols=range(11), comments='#', dtype='string') | |
72 | |
73 tags = np.array(data_array[:, 0]) | |
74 ref_pos = np.array(data_array[:, 3]).astype(int) | |
75 cigar = np.array(data_array[:, 5]) | |
76 | |
77 lst = [] | |
78 ind = [] | |
79 start_posList = np.array(start_posList).astype(int) | |
80 stop_posList = np.array(stop_posList).astype(int) | |
81 | |
82 for start_pos, stop_pos in zip(start_posList, stop_posList): | |
83 start_pos = start_pos - 3 | |
84 stop_pos = stop_pos + 3 | |
85 mut_tags = None | |
86 for t in range(0, len(tags)): | |
87 c_split = re.split('([A-Z])', cigar[t]) | |
88 cigar_long = None | |
89 | |
90 for i in range(1, len(c_split), 2): | |
91 if cigar_long is None: | |
92 cigar_long = np.repeat(c_split[i], c_split[i - 1]) | |
93 else: | |
94 cigar_long = np.concatenate((cigar_long, np.repeat(c_split[i], c_split[i - 1])), axis=0) | |
95 | |
96 pos = ref_pos[t] | |
97 seq_pos = 0 | |
98 # print(pos) | |
99 if pos < stop_pos: | |
100 for j in range(0, len(cigar_long)): | |
101 if pos >= stop_pos: | |
102 break | |
103 if cigar_long[j] in ("M", "D", "N"): | |
104 pos += 1 | |
105 # print(pos) | |
106 if pos > start_pos: | |
107 if mut_tags is None: | |
108 mut_tags = np.array((tags[t])) | |
109 else: | |
110 mut_tags = np.vstack((mut_tags, np.array(tags[t]))) | |
111 | |
112 index = np.repeat("{}_{}".format(start_pos, stop_pos), len(mut_tags)) | |
113 ind.append(index) | |
114 lst.append(mut_tags) | |
115 | |
116 index = np.concatenate((ind)) | |
117 tags = np.concatenate((lst)) | |
118 mut_tags = np.column_stack((index, tags)) | |
119 | |
120 np.savetxt(outputFile, mut_tags, fmt="%s") | |
121 print("File saved under {} in {}!".format(outputFile, os.getcwd())) | |
122 | |
123 | |
124 if __name__ == '__main__': | |
125 sys.exit(range2tag(sys.argv)) |