comparison AnnotateRNAz.py @ 0:273a961d332f draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_team/rnaz commit d261ddb93500e1ea309845fa3989c87c6312583d-dirty
author bgruening
date Wed, 30 Jan 2019 04:13:47 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:273a961d332f
1 # AnnotateRnaz.py ---
2 #
3 # Filename: AnnotateRnaz.py
4 # Description:
5 # Author: Joerg Fallmann
6 # Maintainer:
7 # Created: Sat Jan 26 12:45:25 2019 (+0100)
8 # Version:
9 # Package-Requires: ()
10 # Last-Updated: Tue Jan 29 13:52:57 2019 (+0100)
11 # By: Joerg Fallmann
12 # Update #: 188
13 # URL:
14 # Doc URL:
15 # Keywords:
16 # Compatibility:
17 #
18 #
19
20 # Commentary:
21 # This script is a replacement for rnazAnnotate.pl
22 # rnazAnnotate can not handle the output from version 2 adequatly
23 # This script uses the bedtools API to fast intersect an annotation Bed
24 # with output from RNAz
25
26 # Change Log:
27 #
28 #
29 #
30 #
31 # This program is free software: you can redistribute it and/or modify
32 # it under the terms of the GNU General Public License as published by
33 # the Free Software Foundation, either version 3 of the License, or (at
34 # your option) any later version.
35 #
36 # This program is distributed in the hope that it will be useful, but
37 # WITHOUT ANY WARRANTY; without even the implied warranty of
38 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
39 # General Public License for more details.
40 #
41 # You should have received a copy of the GNU General Public License
42 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
43 #
44 #
45
46 # Code:
47
48 #!/usr/bin/env python3
49
50 import sys
51 import glob
52 import argparse
53 from io import StringIO
54 import gzip
55 import traceback as tb
56 import pybedtools
57 import re
58 import tempfile
59
60 def parseargs():
61 parser = argparse.ArgumentParser(description='Intersect RNAz output with Annotation from BED')
62 parser.add_argument("-b", "--bed", type=str, help='Annotation BED file')
63 parser.add_argument("-i", "--input", type=str, help='RNAz output')
64 parser.add_argument("-o", "--bedout", type=str, help='Annotation BED output')
65 parser.add_argument("-r", "--rnazout", type=str, help='Annotation rnaz output')
66 return parser.parse_args()
67
68 def annotate(bed, input, bedout, rnazout):
69 try:
70
71 pybedtools.set_tempdir('.') # Make sure we do not write somewhere we are not supposed to
72 anno = pybedtools.BedTool(bed)
73 rnaz=readrnaz(input)
74 tmpbed = pybedtools.BedTool(rnaztobed(rnaz), from_string=True)
75
76 intersection = tmpbed.intersect(anno,wa=True,wb=True,s=True) # intersect strand specific, keep all info on a and b files
77
78 bedtornaz(intersection, rnaz, bedout, rnazout)
79
80 return 1
81
82 except Exception as err:
83 exc_type, exc_value, exc_tb = sys.exc_info()
84 tbe = tb.TracebackException(
85 exc_type, exc_value, exc_tb,
86 )
87 print(''.join(tbe.format()),file=sys.stderr)
88
89 def readin(file):
90 try:
91 if '.gz' in file:
92 f = gzip.open(file,'rt')
93 else:
94 f = open(file,'rt')
95 return f
96
97 except Exception as err:
98 exc_type, exc_value, exc_tb = sys.exc_info()
99 tbe = tb.TracebackException(
100 exc_type, exc_value, exc_tb,
101 )
102 print(''.join(tbe.format()),file=sys.stderr)
103
104 def readrnaz(rnaz):
105 try:
106 toparse = readin(rnaz)
107 tointersect = {}
108 header = []
109 for line in toparse:
110 if '#' in line[0]:
111 tointersect['header']=line.strip()
112 line = re.sub('^#','',line)
113 cont = line.strip().split('\t')
114 foi = cont.index('seqID') # need to find which column contains seqID
115 sf = cont.index('start') # need to find which column contains start
116 ef = cont.index('end') # need to find which column contains end
117 if 'strand' in cont:# need to find which column contains strand
118 df = cont.index('strand')
119 else:
120 df = None
121 else:
122 content = line.strip().split('\t')
123 newid=re.split('\.|\,|\s|\\|\/|\_', content[foi])[1] # I can only hope that we have species.chromosome.whatever as annotation in aln or maf, if not this is hardly parseable
124 if df:
125 longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', content[df]])
126 tointersect[longid] = content
127 else:
128 longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', '+'])
129 tointersect[longid] = content
130 longid = '_'.join([newid, content[sf], content[ef], 'RNAzresult', '0', '-'])
131 tointersect[longid] = content
132
133 return tointersect
134
135 except Exception as err:
136 exc_type, exc_value, exc_tb = sys.exc_info()
137 tbe = tb.TracebackException(
138 exc_type, exc_value, exc_tb,
139 )
140 print(''.join(tbe.format()),file=sys.stderr)
141
142
143 def rnaztobed(rnaz):
144 try:
145 tmpbed = []
146 for key in rnaz:
147 if key != 'header':
148 tmpbed.append('\t'.join(key.split('_')))
149
150 return '\n'.join(tmpbed)
151
152 except Exception as err:
153 exc_type, exc_value, exc_tb = sys.exc_info()
154 tbe = tb.TracebackException(
155 exc_type, exc_value, exc_tb,
156 )
157 print(''.join(tbe.format()),file=sys.stderr)
158
159 def bedtornaz(bed, rnaz, bedout, rnazout):
160 try:
161 b = open(bedout,'w')
162 r = open(rnazout,'w')
163
164 annotatedbed=[]
165 annotatedrnaz=[]
166 annotatedrnaz.append(str.join('\t',[rnaz['header'],'Annotation']))
167 for line in open(bed.fn):
168 out = line.strip().split("\t")
169 annotatedbed.append(str.join('\t',out[0:3]+out[9:10]+out[4:6]))
170 key = str.join('_',out[0:6])
171 annotatedrnaz.append(str.join('\t',rnaz[key]+out[9:10]))
172
173 print(str.join('\n', annotatedbed),file=b)
174 print(str.join('\n', annotatedrnaz),file=r)
175
176 except Exception as err:
177 exc_type, exc_value, exc_tb = sys.exc_info()
178 tbe = tb.TracebackException(
179 exc_type, exc_value, exc_tb,
180 )
181 print(''.join(tbe.format()),file=sys.stderr)
182
183
184 def closefile(file):
185 try:
186 file.close()
187
188 except Exception as err:
189 exc_type, exc_value, exc_tb = sys.exc_info()
190 tbe = tb.TracebackException(
191 exc_type, exc_value, exc_tb,
192 )
193 print(''.join(tbe.format()),file=sys.stderr)
194
195
196
197
198 ####################
199 #### MAIN ####
200 ####################
201 if __name__ == '__main__':
202 args=parseargs()
203 annotate(args.bed, args.input, args.bedout, args.rnazout)
204 ######################################################################
205 # AnnotateRnaz.py ends here