Mercurial > repos > bgruening > rnaz_select_seqs
comparison AnnotateRNAz.py @ 0:273a961d332f draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_team/rnaz commit d261ddb93500e1ea309845fa3989c87c6312583d-dirty
author | bgruening |
---|---|
date | Wed, 30 Jan 2019 04:13:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:273a961d332f |
---|---|
1 # AnnotateRnaz.py --- | |
2 # | |
3 # Filename: AnnotateRnaz.py | |
4 # Description: | |
5 # Author: Joerg Fallmann | |
6 # Maintainer: | |
7 # Created: Sat Jan 26 12:45:25 2019 (+0100) | |
8 # Version: | |
9 # Package-Requires: () | |
10 # Last-Updated: Tue Jan 29 13:52:57 2019 (+0100) | |
11 # By: Joerg Fallmann | |
12 # Update #: 188 | |
13 # URL: | |
14 # Doc URL: | |
15 # Keywords: | |
16 # Compatibility: | |
17 # | |
18 # | |
19 | |
20 # Commentary: | |
21 # This script is a replacement for rnazAnnotate.pl | |
22 # rnazAnnotate can not handle the output from version 2 adequatly | |
23 # This script uses the bedtools API to fast intersect an annotation Bed | |
24 # with output from RNAz | |
25 | |
26 # Change Log: | |
27 # | |
28 # | |
29 # | |
30 # | |
31 # This program is free software: you can redistribute it and/or modify | |
32 # it under the terms of the GNU General Public License as published by | |
33 # the Free Software Foundation, either version 3 of the License, or (at | |
34 # your option) any later version. | |
35 # | |
36 # This program is distributed in the hope that it will be useful, but | |
37 # WITHOUT ANY WARRANTY; without even the implied warranty of | |
38 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
39 # General Public License for more details. | |
40 # | |
41 # You should have received a copy of the GNU General Public License | |
42 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
43 # | |
44 # | |
45 | |
46 # Code: | |
47 | |
48 #!/usr/bin/env python3 | |
49 | |
50 import sys | |
51 import glob | |
52 import argparse | |
53 from io import StringIO | |
54 import gzip | |
55 import traceback as tb | |
56 import pybedtools | |
57 import re | |
58 import tempfile | |
59 | |
60 def parseargs(): | |
61 parser = argparse.ArgumentParser(description='Intersect RNAz output with Annotation from BED') | |
62 parser.add_argument("-b", "--bed", type=str, help='Annotation BED file') | |
63 parser.add_argument("-i", "--input", type=str, help='RNAz output') | |
64 parser.add_argument("-o", "--bedout", type=str, help='Annotation BED output') | |
65 parser.add_argument("-r", "--rnazout", type=str, help='Annotation rnaz output') | |
66 return parser.parse_args() | |
67 | |
68 def annotate(bed, input, bedout, rnazout): | |
69 try: | |
70 | |
71 pybedtools.set_tempdir('.') # Make sure we do not write somewhere we are not supposed to | |
72 anno = pybedtools.BedTool(bed) | |
73 rnaz=readrnaz(input) | |
74 tmpbed = pybedtools.BedTool(rnaztobed(rnaz), from_string=True) | |
75 | |
76 intersection = tmpbed.intersect(anno,wa=True,wb=True,s=True) # intersect strand specific, keep all info on a and b files | |
77 | |
78 bedtornaz(intersection, rnaz, bedout, rnazout) | |
79 | |
80 return 1 | |
81 | |
82 except Exception as err: | |
83 exc_type, exc_value, exc_tb = sys.exc_info() | |
84 tbe = tb.TracebackException( | |
85 exc_type, exc_value, exc_tb, | |
86 ) | |
87 print(''.join(tbe.format()),file=sys.stderr) | |
88 | |
89 def readin(file): | |
90 try: | |
91 if '.gz' in file: | |
92 f = gzip.open(file,'rt') | |
93 else: | |
94 f = open(file,'rt') | |
95 return f | |
96 | |
97 except Exception as err: | |
98 exc_type, exc_value, exc_tb = sys.exc_info() | |
99 tbe = tb.TracebackException( | |
100 exc_type, exc_value, exc_tb, | |
101 ) | |
102 print(''.join(tbe.format()),file=sys.stderr) | |
103 | |
104 def readrnaz(rnaz): | |
105 try: | |
106 toparse = readin(rnaz) | |
107 tointersect = {} | |
108 header = [] | |
109 for line in toparse: | |
110 if '#' in line[0]: | |
111 tointersect['header']=line.strip() | |
112 line = re.sub('^#','',line) | |
113 cont = line.strip().split('\t') | |
114 foi = cont.index('seqID') # need to find which column contains seqID | |
115 sf = cont.index('start') # need to find which column contains start | |
116 ef = cont.index('end') # need to find which column contains end | |
117 if 'strand' in cont:# need to find which column contains strand | |
118 df = cont.index('strand') | |
119 else: | |
120 df = None | |
121 else: | |
122 content = line.strip().split('\t') | |
123 newid=re.split('\.|\,|\s|\\|\/|\_', content[foi])[1] # I can only hope that we have species.chromosome.whatever as annotation in aln or maf, if not this is hardly parseable | |
124 if df: | |
125 longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', content[df]]) | |
126 tointersect[longid] = content | |
127 else: | |
128 longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', '+']) | |
129 tointersect[longid] = content | |
130 longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', '-']) | |
131 tointersect[longid] = content | |
132 | |
133 return tointersect | |
134 | |
135 except Exception as err: | |
136 exc_type, exc_value, exc_tb = sys.exc_info() | |
137 tbe = tb.TracebackException( | |
138 exc_type, exc_value, exc_tb, | |
139 ) | |
140 print(''.join(tbe.format()),file=sys.stderr) | |
141 | |
142 | |
143 def rnaztobed(rnaz): | |
144 try: | |
145 tmpbed = [] | |
146 for key in rnaz: | |
147 if key != 'header': | |
148 tmpbed.append('\t'.join(key.split('_'))) | |
149 | |
150 return '\n'.join(tmpbed) | |
151 | |
152 except Exception as err: | |
153 exc_type, exc_value, exc_tb = sys.exc_info() | |
154 tbe = tb.TracebackException( | |
155 exc_type, exc_value, exc_tb, | |
156 ) | |
157 print(''.join(tbe.format()),file=sys.stderr) | |
158 | |
159 def bedtornaz(bed, rnaz, bedout, rnazout): | |
160 try: | |
161 b = open(bedout,'w') | |
162 r = open(rnazout,'w') | |
163 | |
164 annotatedbed=[] | |
165 annotatedrnaz=[] | |
166 annotatedrnaz.append(str.join('\t',[rnaz['header'],'Annotation'])) | |
167 for line in open(bed.fn): | |
168 out = line.strip().split("\t") | |
169 annotatedbed.append(str.join('\t',out[0:3]+out[9:10]+out[4:6])) | |
170 key = str.join('_',out[0:6]) | |
171 annotatedrnaz.append(str.join('\t',rnaz[key]+out[9:10])) | |
172 | |
173 print(str.join('\n', annotatedbed),file=b) | |
174 print(str.join('\n', annotatedrnaz),file=r) | |
175 | |
176 except Exception as err: | |
177 exc_type, exc_value, exc_tb = sys.exc_info() | |
178 tbe = tb.TracebackException( | |
179 exc_type, exc_value, exc_tb, | |
180 ) | |
181 print(''.join(tbe.format()),file=sys.stderr) | |
182 | |
183 | |
184 def closefile(file): | |
185 try: | |
186 file.close() | |
187 | |
188 except Exception as err: | |
189 exc_type, exc_value, exc_tb = sys.exc_info() | |
190 tbe = tb.TracebackException( | |
191 exc_type, exc_value, exc_tb, | |
192 ) | |
193 print(''.join(tbe.format()),file=sys.stderr) | |
194 | |
195 | |
196 | |
197 | |
198 #################### | |
199 #### MAIN #### | |
200 #################### | |
201 if __name__ == '__main__': | |
202 args=parseargs() | |
203 annotate(args.bed, args.input, args.bedout, args.rnazout) | |
204 ###################################################################### | |
205 # AnnotateRnaz.py ends here |