Mercurial > repos > cpt > cpt_linear_genome_plot
comparison linear_genome_plot.py @ 1:e923c686ead9 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:45:31 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:621754dd31f8 | 1:e923c686ead9 |
---|---|
1 #!/usr/bin/env python | |
2 from Bio import SeqIO | |
3 from dna_features_viewer import BiopythonTranslator, GraphicRecord | |
4 from matplotlib import rc_context | |
5 import matplotlib | |
6 import matplotlib.pyplot as plt | |
7 from itertools import cycle | |
8 import re | |
9 import sys | |
10 import argparse | |
11 | |
12 | |
13 class CPTTranslator(BiopythonTranslator): | |
14 """ | |
15 This is a customized translator from the dna_features_viewer module to fit Galaxy | |
16 """ | |
17 | |
18 global custom_feature_colors | |
19 global box_status | |
20 global label_fields | |
21 global custom_name_colors | |
22 global ignored_features_types | |
23 global ignored_gene_labels | |
24 global ignored_feature_labels | |
25 | |
26 def compute_feature_color(self, feature): | |
27 if feature.type == "CDS": | |
28 if "product" in feature.qualifiers: | |
29 color_specific = any( | |
30 re.search( | |
31 ("(\\b" + str(item) + "\\b)"), feature.qualifiers["product"][0] | |
32 ) | |
33 for item in custom_name_colors.keys() | |
34 ) or any( | |
35 re.search((item), feature.qualifiers["product"][0]) | |
36 for item in custom_name_colors.keys() | |
37 ) | |
38 if color_specific: | |
39 try: | |
40 return custom_name_colors[feature.qualifiers["product"][0]] | |
41 except KeyError: | |
42 for item in custom_name_colors.keys(): | |
43 if item in feature.qualifiers["product"][0]: | |
44 custom_name_colors[ | |
45 feature.qualifiers["product"][0] | |
46 ] = custom_name_colors[item] | |
47 return custom_name_colors[ | |
48 feature.qualifiers["product"][0] | |
49 ] | |
50 # print(feature.qualifiers["product"][0]) | |
51 else: | |
52 try: | |
53 return custom_feature_colors[feature.type] | |
54 except KeyError: | |
55 return BiopythonTranslator.compute_feature_color(self, feature) | |
56 else: | |
57 if feature.type not in ignored_features_types: | |
58 try: | |
59 return custom_feature_colors[feature.type] | |
60 except KeyError: | |
61 return BiopythonTranslator.compute_feature_color(self, feature) | |
62 | |
63 def compute_feature_label(self, feature): # remove the chop_blocks | |
64 self.label_fields = label_fields | |
65 if feature.type == "CDS": | |
66 if "product" in feature.qualifiers: | |
67 if ignored_gene_labels: # product name drop | |
68 verify_chops = any( | |
69 re.search( | |
70 ("(\\b" + str(item) + "\\b)"), | |
71 feature.qualifiers["product"][0], | |
72 ) | |
73 for item in ignored_gene_labels | |
74 ) or any( | |
75 re.search((item), feature.qualifiers["product"][0]) | |
76 for item in ignored_gene_labels | |
77 ) | |
78 if verify_chops: | |
79 return None | |
80 else: | |
81 return BiopythonTranslator.compute_feature_label(self, feature) | |
82 else: | |
83 return BiopythonTranslator.compute_feature_label(self, feature) | |
84 elif feature.type in ignored_feature_labels: | |
85 return None | |
86 else: | |
87 return BiopythonTranslator.compute_feature_label(self, feature) | |
88 | |
89 def compute_filtered_features(self, features): | |
90 return [ | |
91 feature | |
92 for feature in features | |
93 if feature.type not in ignored_features_types | |
94 ] | |
95 | |
96 def compute_feature_legend_text(self, feature): | |
97 return feature.type | |
98 | |
99 def compute_feature_box_color(self, feature): | |
100 if feature.type == "CDS": | |
101 return "white" | |
102 | |
103 def compute_feature_label_link_color(self, feature): | |
104 return "black" | |
105 | |
106 def compute_feature_box_linewidth(self, feature): | |
107 if box_status: | |
108 return 0.5 | |
109 else: | |
110 return 0 | |
111 | |
112 | |
113 def parse_gbk(file): | |
114 """simple function to parse out the feature information AND products""" | |
115 | |
116 record = SeqIO.read(file, "genbank") | |
117 count = 0 | |
118 feature_types = {} | |
119 product_names = [] | |
120 for feat in record.features: | |
121 if feat.type not in feature_types: | |
122 feature_types[feat.type] = 1 | |
123 else: | |
124 feature_types[feat.type] += 1 | |
125 if "product" in feat.qualifiers: | |
126 product_names.append(feat.qualifiers["product"][0]) | |
127 | |
128 return feature_types, product_names, record | |
129 | |
130 | |
131 if __name__ == "__main__": | |
132 parser = argparse.ArgumentParser(description="Linear Genome Plot") | |
133 # Input and Parameters | |
134 parser.add_argument( | |
135 "input_file", type=argparse.FileType("r"), help="genbank or gff3 file" | |
136 ) | |
137 parser.add_argument("--plot_width", type=int, default=20) | |
138 # parser.add_argument("--plot_height",type=int,default=4) | |
139 parser.add_argument( | |
140 "--title", type=str, default="genome plot" | |
141 ) # NEED TO ADD TO XML | |
142 parser.add_argument( | |
143 "--common_features_excluded", default="", help="common features to be excluded" | |
144 ) | |
145 parser.add_argument( | |
146 "--features_excluded", | |
147 default="", | |
148 help="features to be excluded from plot, separate by commas", | |
149 ) | |
150 parser.add_argument( | |
151 "--common_ignore_feature_labels", | |
152 default="", | |
153 help="common feature labels to be excluded", | |
154 ) | |
155 parser.add_argument( | |
156 "--ignored_feature_labels", | |
157 default="", | |
158 help="ignore labeling of specific features", | |
159 ) | |
160 parser.add_argument( | |
161 "--common_ignore_product_labels", | |
162 default="", | |
163 help="common product names to not label", | |
164 ) | |
165 parser.add_argument( | |
166 "--ignore_labeling", | |
167 default="", | |
168 help="labeling for specific products to ignore, separate by commas", | |
169 ) | |
170 parser.add_argument( | |
171 "--feature_label_order", | |
172 default="locus_tag", | |
173 help="label order, where the first choice is the first feature listed to pull name labels from", | |
174 ) # NEED TO ADD TO XML | |
175 parser.add_argument( | |
176 "--label_box", | |
177 action="store_true", | |
178 help="Use to have label box around feature labels", | |
179 ) | |
180 parser.add_argument( | |
181 "--label_algo", | |
182 action="store_true", | |
183 help="use dna features spacing algo for label placement (in or above feature)", | |
184 ) | |
185 # parser.add_argument("--level_offset",type=int,default=0,help="All features and annotations will be pushed up by the input amount. Useful for when plotting several sets of features successively on the same axis.") # Will exclude for now | |
186 # parser.add_argument("--custom_region",action="store_true",help="cropped region for plot") | |
187 parser.add_argument("--sz", type=int, help="beginning location for crop") | |
188 parser.add_argument("--ez", type=int, help="end location for crop") | |
189 parser.add_argument("--st", type=int, help="start site of translation") | |
190 parser.add_argument("--et", type=int, help="end site of translation") | |
191 parser.add_argument( | |
192 "--translation_on", action="store_true", help="plot the translation sub-axis" | |
193 ) | |
194 parser.add_argument( | |
195 "--feature_id", | |
196 nargs="*", | |
197 action="append", | |
198 help="feature label to have custom color", | |
199 ) # NEED TO ADD TO XML | |
200 parser.add_argument( | |
201 "--feature_id_color", | |
202 nargs="*", | |
203 action="append", | |
204 help="feature's accompanying color", | |
205 ) | |
206 parser.add_argument( | |
207 "--gene_id", | |
208 nargs="*", | |
209 action="append", | |
210 help="gene/cds label to have custom color", | |
211 ) | |
212 parser.add_argument( | |
213 "--gene_id_color", | |
214 nargs="*", | |
215 action="append", | |
216 help="gene/cds's accompanying color", | |
217 ) | |
218 parser.add_argument("--multiline", action="store_true", help="Plot multiline plot") | |
219 parser.add_argument( | |
220 "--nucl_per_line", type=int, help="nucleotides per line of multiline" | |
221 ) | |
222 # Output | |
223 parser.add_argument( | |
224 "--file_stats", | |
225 type=argparse.FileType("w"), | |
226 default="out_stats.txt", | |
227 help="output stat file", | |
228 ) | |
229 # parser.add_argument("--tmp_img",dest="tmp_img",type=argparse.FileType("wb"),default="out_tmp.svg") | |
230 parser.add_argument( | |
231 "--out_img", | |
232 dest="out_img", | |
233 type=argparse.FileType("wb"), | |
234 default="out_img.svg", | |
235 help="svg genome plot", | |
236 ) | |
237 args = parser.parse_args() | |
238 | |
239 ## Part I ; Parse and send output of features count and the list of product names | |
240 feature_counts, products, genome = parse_gbk(args.input_file) | |
241 with args.file_stats as f: | |
242 f.writelines("---::: FILE BREAKDOWN :::---\n\n") | |
243 f.writelines("------::: Feature Count :::------\n") | |
244 for feature, count in feature_counts.items(): | |
245 f.writelines(f"Feature: {feature} ::::: Count: {count}\n") | |
246 f.writelines("------::: Product Names :::------\n") | |
247 if products != []: | |
248 for product in products: | |
249 f.writelines(f"Product Name: {product}\n") | |
250 else: | |
251 f.writelines("No Annotated Product Names Found") | |
252 | |
253 ## Part II ; Prep Global Variables | |
254 ## Make K:V pairs for Feature Colors | |
255 if args.label_box: | |
256 box_status = True | |
257 else: | |
258 box_status = False | |
259 | |
260 if args.feature_id: | |
261 feature_ids = [f for listed_obj in args.feature_id for f in listed_obj] | |
262 feature_ids_colors = [ | |
263 f for listed_obj in args.feature_id_color for f in listed_obj | |
264 ] | |
265 custom_feature_colors = dict(zip(feature_ids, feature_ids_colors)) | |
266 else: | |
267 custom_feature_colors = {} | |
268 | |
269 ## Make K:V pairs for Name Colors (as above) | |
270 if args.gene_id: | |
271 gene_ids = [g for listed_obj in args.gene_id for g in listed_obj] | |
272 gene_ids_colors = [g for listed_obj in args.gene_id_color for g in listed_obj] | |
273 custom_name_colors = dict(zip(gene_ids, gene_ids_colors)) | |
274 else: | |
275 custom_name_colors = {} | |
276 | |
277 ## Ignored Features | |
278 # ignored_features_types = str.split(args.features_excluded,",") | |
279 if args.common_features_excluded: | |
280 ignored_features_types = str.split(args.common_features_excluded, ",") | |
281 if args.features_excluded: | |
282 ignored_features_types += str.split(args.features_excluded, ",") | |
283 elif args.features_excluded: | |
284 ignored_features_types = str.split(args.features_excluded, ",") | |
285 else: | |
286 ignored_features_types = False | |
287 | |
288 print(ignored_features_types) | |
289 | |
290 ## product labels | |
291 if args.common_ignore_product_labels: | |
292 ignored_gene_labels = str.split(args.common_ignore_product_labels, ",") | |
293 if args.ignore_labeling: | |
294 ignored_gene_labels += str.split(args.ignore_labeling, ",") | |
295 elif args.ignore_labeling: | |
296 ignored_gene_labels = str.split(args.ignore_labeling, ",") | |
297 else: | |
298 ignored_gene_labels = False | |
299 | |
300 print(ignored_gene_labels) | |
301 | |
302 if args.feature_label_order != [""]: | |
303 label_fields = str.split(args.feature_label_order, ",") | |
304 | |
305 # if ignored_gene_labels == ['']: | |
306 # ignored_gene_labels = False | |
307 | |
308 ## Ignored Labeling | |
309 if args.common_ignore_feature_labels: | |
310 ignored_feature_labels = str.split(args.common_ignore_feature_labels, ",") | |
311 if args.ignored_feature_labels: | |
312 ignored_feature_labels += str.split(args.ignored_feature_labels, ",") | |
313 elif args.ignored_feature_labels: | |
314 ignored_feature_labels = str.split(args.ignored_feature_labels, ",") | |
315 else: | |
316 ignored_feature_labels = False | |
317 | |
318 print(ignored_feature_labels) | |
319 ## Print Statements for Debugging | |
320 # print(custom_feature_colors) | |
321 # print(custom_name_colors) | |
322 # print(ignored_features_types) | |
323 # print(ignored_gene_labels) | |
324 # print(label_fields) | |
325 | |
326 ## Part III ; PLOT | |
327 # Housekeeping | |
328 rc_context( | |
329 { | |
330 "font.family": ["monospace"], | |
331 } | |
332 ) # courier-like | |
333 matplotlib.use("Agg") # I think this has to be used... | |
334 | |
335 if args.label_algo: | |
336 lab_algo = True | |
337 else: | |
338 lab_algo = False | |
339 | |
340 translator = CPTTranslator() | |
341 graphic_record = translator.translate_record(genome) | |
342 | |
343 with open("tmp.svg", "wb") as img: | |
344 img.truncate(0) | |
345 img.close() | |
346 | |
347 if ( | |
348 args.sz and not args.multiline | |
349 ): # if user is wanting to look at a subset region of the genome | |
350 zoom_start, zoom_end = args.sz, args.ez | |
351 cropped = graphic_record.crop((zoom_start, zoom_end)) | |
352 ax, _ = cropped.plot( | |
353 figure_width=args.plot_width, annotate_inline=lab_algo, figure_height=None | |
354 ) | |
355 if args.translation_on: | |
356 crop_seq = (args.st - 1, args.et) | |
357 cropped.plot_translation( | |
358 ax, | |
359 location=crop_seq, | |
360 fontdict={"size": 8, "weight": "bold"}, | |
361 y_offset=1, | |
362 ) | |
363 ax.set_title(args.title) | |
364 # Galaxy specific shenanigans | |
365 tmp_fig = "./tmp.svg" | |
366 plt.savefig(tmp_fig) | |
367 plt.close() | |
368 elif args.multiline: | |
369 if args.sz: | |
370 zoom_start, zoom_end = args.sz, args.ez | |
371 else: | |
372 zoom_start, zoom_end = 1, graphic_record.sequence_length | |
373 cropped = graphic_record.crop((zoom_start, zoom_end)) | |
374 ax, _ = cropped.plot_on_multiple_lines( | |
375 figure_width=args.plot_width, | |
376 annotate_inline=lab_algo, | |
377 figure_height=None, | |
378 nucl_per_line=args.nucl_per_line, | |
379 plot_sequence=False, | |
380 ) | |
381 # ax.set_title(args.title) | |
382 tmp_fig = "./tmp.svg" | |
383 plt.savefig(tmp_fig) | |
384 plt.close() | |
385 else: | |
386 ax, _ = graphic_record.plot( | |
387 figure_width=args.plot_width, annotate_inline=lab_algo | |
388 ) | |
389 ax.set_title(args.title) | |
390 tmp_fig = "./tmp.svg" | |
391 # Galaxy specific shenanigans | |
392 plt.savefig(tmp_fig) | |
393 plt.close() | |
394 with open("tmp.svg", "rb") as img: | |
395 for line in img: | |
396 args.out_img.write(line) |