comparison range2tag.py @ 0:0b486eaca66e draft

planemo upload for repository https://github.com/gpovysil/galaxy/tools/range2tag commit 16b1092dfe1052f9560188e67f39009f1e4b32bc
author gpovysil
date Wed, 16 May 2018 04:53:11 -0400
parents
children 7d4ec8f51253
comparison
equal deleted inserted replaced
-1:000000000000 0:0b486eaca66e
1 """range2tag.py
2
3 Author -- Gundula Povysil
4 Contact -- povysil@bioinf.jku.at
5
6 Takes a SAM file, start and stop positions as input and prints all tags
7 of reads that overlap with regions to user specified output file.
8 ======= ========== ================= ================================
9 Version Date Author Description
10 0.0.2 2018-05-15 Gundula Povysil -
11 ======= ========== ================= ================================
12
13 USAGE: python range2tag.py inputFile.sam ranges.txt outputFile.txt
14 """
15
16
17 import numpy as np
18 import re
19 import argparse
20 import sys
21 import os
22
23 def make_argparser():
24 parser = argparse.ArgumentParser(description='Takes a SAM file, start and stop positions as input and prints all tags of reads that overlap with regions to user specified output file.')
25 parser.add_argument('inputFile',
26 help='SAM file with aligned reads.')
27 parser.add_argument('rangesFile',
28 help='TXT file with start and stop positions.')
29 parser.add_argument('outputFile',
30 help='Output TXT file with tags that are within specified regions.')
31 return parser
32
33 def range2tag(argv):
34 parser = make_argparser()
35 args=parser.parse_args(argv[1:])
36
37 inputFile = args.inputFile
38 rangesFile = args.rangesFile
39 outputFile = args.outputFile
40
41 if os.path.isfile(inputFile) is False:
42 print("Error: Could not find '{}'".format(inputFile))
43 exit(0)
44
45 if os.path.isfile(rangesFile) is False:
46 print("Error: Could not find '{}'".format(rangesFile))
47 exit(0)
48
49 with open(rangesFile, 'r') as regs:
50 range_array = np.genfromtxt(regs, skip_header=0, delimiter='\t', comments='#')
51
52 start_posList = range_array[:,0].astype(int)
53 stop_posList = range_array[:,1].astype(int)
54
55 print(start_posList)
56 print(stop_posList)
57
58 if len(start_posList) == 0:
59 print("Error: start_positions is empty")
60 exit(2)
61
62 if len(stop_posList) == 0:
63 print("Error: end_positions is empty")
64 exit(3)
65
66 if len(start_posList) != len(stop_posList):
67 print("start_positions and end_positions do not have the same length")
68 exit(3)
69
70 with open(inputFile, 'r') as sam:
71 data_array = np.genfromtxt(sam, skip_header=0, delimiter='\t', usecols=range(11), comments='#', dtype='string')
72
73 tags = np.array(data_array[:, 0])
74 ref_pos = np.array(data_array[:, 3]).astype(int)
75 cigar = np.array(data_array[:, 5])
76
77 lst = []
78 ind = []
79 start_posList = np.array(start_posList).astype(int)
80 stop_posList = np.array(stop_posList).astype(int)
81
82 for start_pos, stop_pos in zip(start_posList, stop_posList):
83 start_pos = start_pos - 3
84 stop_pos = stop_pos + 3
85 mut_tags = None
86 for t in range(0, len(tags)):
87 c_split = re.split('([A-Z])', cigar[t])
88 cigar_long = None
89
90 for i in range(1, len(c_split), 2):
91 if cigar_long is None:
92 cigar_long = np.repeat(c_split[i], c_split[i - 1])
93 else:
94 cigar_long = np.concatenate((cigar_long, np.repeat(c_split[i], c_split[i - 1])), axis=0)
95
96 pos = ref_pos[t]
97 seq_pos = 0
98 # print(pos)
99 if pos < stop_pos:
100 for j in range(0, len(cigar_long)):
101 if pos >= stop_pos:
102 break
103 if cigar_long[j] in ("M", "D", "N"):
104 pos += 1
105 # print(pos)
106 if pos > start_pos:
107 if mut_tags is None:
108 mut_tags = np.array((tags[t]))
109 else:
110 mut_tags = np.vstack((mut_tags, np.array(tags[t])))
111
112 index = np.repeat("{}_{}".format(start_pos, stop_pos), len(mut_tags))
113 ind.append(index)
114 lst.append(mut_tags)
115
116 index = np.concatenate((ind))
117 tags = np.concatenate((lst))
118 mut_tags = np.column_stack((index, tags))
119
120 np.savetxt(outputFile, mut_tags, fmt="%s")
121 print("File saved under {} in {}!".format(outputFile, os.getcwd()))
122
123
124 if __name__ == '__main__':
125 sys.exit(range2tag(sys.argv))