comparison linear_genome_plot.py @ 1:e923c686ead9 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:45:31 +0000
parents
children
comparison
equal deleted inserted replaced
0:621754dd31f8 1:e923c686ead9
1 #!/usr/bin/env python
2 from Bio import SeqIO
3 from dna_features_viewer import BiopythonTranslator, GraphicRecord
4 from matplotlib import rc_context
5 import matplotlib
6 import matplotlib.pyplot as plt
7 from itertools import cycle
8 import re
9 import sys
10 import argparse
11
12
13 class CPTTranslator(BiopythonTranslator):
14 """
15 This is a customized translator from the dna_features_viewer module to fit Galaxy
16 """
17
18 global custom_feature_colors
19 global box_status
20 global label_fields
21 global custom_name_colors
22 global ignored_features_types
23 global ignored_gene_labels
24 global ignored_feature_labels
25
26 def compute_feature_color(self, feature):
27 if feature.type == "CDS":
28 if "product" in feature.qualifiers:
29 color_specific = any(
30 re.search(
31 ("(\\b" + str(item) + "\\b)"), feature.qualifiers["product"][0]
32 )
33 for item in custom_name_colors.keys()
34 ) or any(
35 re.search((item), feature.qualifiers["product"][0])
36 for item in custom_name_colors.keys()
37 )
38 if color_specific:
39 try:
40 return custom_name_colors[feature.qualifiers["product"][0]]
41 except KeyError:
42 for item in custom_name_colors.keys():
43 if item in feature.qualifiers["product"][0]:
44 custom_name_colors[
45 feature.qualifiers["product"][0]
46 ] = custom_name_colors[item]
47 return custom_name_colors[
48 feature.qualifiers["product"][0]
49 ]
50 # print(feature.qualifiers["product"][0])
51 else:
52 try:
53 return custom_feature_colors[feature.type]
54 except KeyError:
55 return BiopythonTranslator.compute_feature_color(self, feature)
56 else:
57 if feature.type not in ignored_features_types:
58 try:
59 return custom_feature_colors[feature.type]
60 except KeyError:
61 return BiopythonTranslator.compute_feature_color(self, feature)
62
63 def compute_feature_label(self, feature): # remove the chop_blocks
64 self.label_fields = label_fields
65 if feature.type == "CDS":
66 if "product" in feature.qualifiers:
67 if ignored_gene_labels: # product name drop
68 verify_chops = any(
69 re.search(
70 ("(\\b" + str(item) + "\\b)"),
71 feature.qualifiers["product"][0],
72 )
73 for item in ignored_gene_labels
74 ) or any(
75 re.search((item), feature.qualifiers["product"][0])
76 for item in ignored_gene_labels
77 )
78 if verify_chops:
79 return None
80 else:
81 return BiopythonTranslator.compute_feature_label(self, feature)
82 else:
83 return BiopythonTranslator.compute_feature_label(self, feature)
84 elif feature.type in ignored_feature_labels:
85 return None
86 else:
87 return BiopythonTranslator.compute_feature_label(self, feature)
88
89 def compute_filtered_features(self, features):
90 return [
91 feature
92 for feature in features
93 if feature.type not in ignored_features_types
94 ]
95
96 def compute_feature_legend_text(self, feature):
97 return feature.type
98
99 def compute_feature_box_color(self, feature):
100 if feature.type == "CDS":
101 return "white"
102
103 def compute_feature_label_link_color(self, feature):
104 return "black"
105
106 def compute_feature_box_linewidth(self, feature):
107 if box_status:
108 return 0.5
109 else:
110 return 0
111
112
113 def parse_gbk(file):
114 """simple function to parse out the feature information AND products"""
115
116 record = SeqIO.read(file, "genbank")
117 count = 0
118 feature_types = {}
119 product_names = []
120 for feat in record.features:
121 if feat.type not in feature_types:
122 feature_types[feat.type] = 1
123 else:
124 feature_types[feat.type] += 1
125 if "product" in feat.qualifiers:
126 product_names.append(feat.qualifiers["product"][0])
127
128 return feature_types, product_names, record
129
130
131 if __name__ == "__main__":
132 parser = argparse.ArgumentParser(description="Linear Genome Plot")
133 # Input and Parameters
134 parser.add_argument(
135 "input_file", type=argparse.FileType("r"), help="genbank or gff3 file"
136 )
137 parser.add_argument("--plot_width", type=int, default=20)
138 # parser.add_argument("--plot_height",type=int,default=4)
139 parser.add_argument(
140 "--title", type=str, default="genome plot"
141 ) # NEED TO ADD TO XML
142 parser.add_argument(
143 "--common_features_excluded", default="", help="common features to be excluded"
144 )
145 parser.add_argument(
146 "--features_excluded",
147 default="",
148 help="features to be excluded from plot, separate by commas",
149 )
150 parser.add_argument(
151 "--common_ignore_feature_labels",
152 default="",
153 help="common feature labels to be excluded",
154 )
155 parser.add_argument(
156 "--ignored_feature_labels",
157 default="",
158 help="ignore labeling of specific features",
159 )
160 parser.add_argument(
161 "--common_ignore_product_labels",
162 default="",
163 help="common product names to not label",
164 )
165 parser.add_argument(
166 "--ignore_labeling",
167 default="",
168 help="labeling for specific products to ignore, separate by commas",
169 )
170 parser.add_argument(
171 "--feature_label_order",
172 default="locus_tag",
173 help="label order, where the first choice is the first feature listed to pull name labels from",
174 ) # NEED TO ADD TO XML
175 parser.add_argument(
176 "--label_box",
177 action="store_true",
178 help="Use to have label box around feature labels",
179 )
180 parser.add_argument(
181 "--label_algo",
182 action="store_true",
183 help="use dna features spacing algo for label placement (in or above feature)",
184 )
185 # parser.add_argument("--level_offset",type=int,default=0,help="All features and annotations will be pushed up by the input amount. Useful for when plotting several sets of features successively on the same axis.") # Will exclude for now
186 # parser.add_argument("--custom_region",action="store_true",help="cropped region for plot")
187 parser.add_argument("--sz", type=int, help="beginning location for crop")
188 parser.add_argument("--ez", type=int, help="end location for crop")
189 parser.add_argument("--st", type=int, help="start site of translation")
190 parser.add_argument("--et", type=int, help="end site of translation")
191 parser.add_argument(
192 "--translation_on", action="store_true", help="plot the translation sub-axis"
193 )
194 parser.add_argument(
195 "--feature_id",
196 nargs="*",
197 action="append",
198 help="feature label to have custom color",
199 ) # NEED TO ADD TO XML
200 parser.add_argument(
201 "--feature_id_color",
202 nargs="*",
203 action="append",
204 help="feature's accompanying color",
205 )
206 parser.add_argument(
207 "--gene_id",
208 nargs="*",
209 action="append",
210 help="gene/cds label to have custom color",
211 )
212 parser.add_argument(
213 "--gene_id_color",
214 nargs="*",
215 action="append",
216 help="gene/cds's accompanying color",
217 )
218 parser.add_argument("--multiline", action="store_true", help="Plot multiline plot")
219 parser.add_argument(
220 "--nucl_per_line", type=int, help="nucleotides per line of multiline"
221 )
222 # Output
223 parser.add_argument(
224 "--file_stats",
225 type=argparse.FileType("w"),
226 default="out_stats.txt",
227 help="output stat file",
228 )
229 # parser.add_argument("--tmp_img",dest="tmp_img",type=argparse.FileType("wb"),default="out_tmp.svg")
230 parser.add_argument(
231 "--out_img",
232 dest="out_img",
233 type=argparse.FileType("wb"),
234 default="out_img.svg",
235 help="svg genome plot",
236 )
237 args = parser.parse_args()
238
239 ## Part I ; Parse and send output of features count and the list of product names
240 feature_counts, products, genome = parse_gbk(args.input_file)
241 with args.file_stats as f:
242 f.writelines("---::: FILE BREAKDOWN :::---\n\n")
243 f.writelines("------::: Feature Count :::------\n")
244 for feature, count in feature_counts.items():
245 f.writelines(f"Feature: {feature} ::::: Count: {count}\n")
246 f.writelines("------::: Product Names :::------\n")
247 if products != []:
248 for product in products:
249 f.writelines(f"Product Name: {product}\n")
250 else:
251 f.writelines("No Annotated Product Names Found")
252
253 ## Part II ; Prep Global Variables
254 ## Make K:V pairs for Feature Colors
255 if args.label_box:
256 box_status = True
257 else:
258 box_status = False
259
260 if args.feature_id:
261 feature_ids = [f for listed_obj in args.feature_id for f in listed_obj]
262 feature_ids_colors = [
263 f for listed_obj in args.feature_id_color for f in listed_obj
264 ]
265 custom_feature_colors = dict(zip(feature_ids, feature_ids_colors))
266 else:
267 custom_feature_colors = {}
268
269 ## Make K:V pairs for Name Colors (as above)
270 if args.gene_id:
271 gene_ids = [g for listed_obj in args.gene_id for g in listed_obj]
272 gene_ids_colors = [g for listed_obj in args.gene_id_color for g in listed_obj]
273 custom_name_colors = dict(zip(gene_ids, gene_ids_colors))
274 else:
275 custom_name_colors = {}
276
277 ## Ignored Features
278 # ignored_features_types = str.split(args.features_excluded,",")
279 if args.common_features_excluded:
280 ignored_features_types = str.split(args.common_features_excluded, ",")
281 if args.features_excluded:
282 ignored_features_types += str.split(args.features_excluded, ",")
283 elif args.features_excluded:
284 ignored_features_types = str.split(args.features_excluded, ",")
285 else:
286 ignored_features_types = False
287
288 print(ignored_features_types)
289
290 ## product labels
291 if args.common_ignore_product_labels:
292 ignored_gene_labels = str.split(args.common_ignore_product_labels, ",")
293 if args.ignore_labeling:
294 ignored_gene_labels += str.split(args.ignore_labeling, ",")
295 elif args.ignore_labeling:
296 ignored_gene_labels = str.split(args.ignore_labeling, ",")
297 else:
298 ignored_gene_labels = False
299
300 print(ignored_gene_labels)
301
302 if args.feature_label_order != [""]:
303 label_fields = str.split(args.feature_label_order, ",")
304
305 # if ignored_gene_labels == ['']:
306 # ignored_gene_labels = False
307
308 ## Ignored Labeling
309 if args.common_ignore_feature_labels:
310 ignored_feature_labels = str.split(args.common_ignore_feature_labels, ",")
311 if args.ignored_feature_labels:
312 ignored_feature_labels += str.split(args.ignored_feature_labels, ",")
313 elif args.ignored_feature_labels:
314 ignored_feature_labels = str.split(args.ignored_feature_labels, ",")
315 else:
316 ignored_feature_labels = False
317
318 print(ignored_feature_labels)
319 ## Print Statements for Debugging
320 # print(custom_feature_colors)
321 # print(custom_name_colors)
322 # print(ignored_features_types)
323 # print(ignored_gene_labels)
324 # print(label_fields)
325
326 ## Part III ; PLOT
327 # Housekeeping
328 rc_context(
329 {
330 "font.family": ["monospace"],
331 }
332 ) # courier-like
333 matplotlib.use("Agg") # I think this has to be used...
334
335 if args.label_algo:
336 lab_algo = True
337 else:
338 lab_algo = False
339
340 translator = CPTTranslator()
341 graphic_record = translator.translate_record(genome)
342
343 with open("tmp.svg", "wb") as img:
344 img.truncate(0)
345 img.close()
346
347 if (
348 args.sz and not args.multiline
349 ): # if user is wanting to look at a subset region of the genome
350 zoom_start, zoom_end = args.sz, args.ez
351 cropped = graphic_record.crop((zoom_start, zoom_end))
352 ax, _ = cropped.plot(
353 figure_width=args.plot_width, annotate_inline=lab_algo, figure_height=None
354 )
355 if args.translation_on:
356 crop_seq = (args.st - 1, args.et)
357 cropped.plot_translation(
358 ax,
359 location=crop_seq,
360 fontdict={"size": 8, "weight": "bold"},
361 y_offset=1,
362 )
363 ax.set_title(args.title)
364 # Galaxy specific shenanigans
365 tmp_fig = "./tmp.svg"
366 plt.savefig(tmp_fig)
367 plt.close()
368 elif args.multiline:
369 if args.sz:
370 zoom_start, zoom_end = args.sz, args.ez
371 else:
372 zoom_start, zoom_end = 1, graphic_record.sequence_length
373 cropped = graphic_record.crop((zoom_start, zoom_end))
374 ax, _ = cropped.plot_on_multiple_lines(
375 figure_width=args.plot_width,
376 annotate_inline=lab_algo,
377 figure_height=None,
378 nucl_per_line=args.nucl_per_line,
379 plot_sequence=False,
380 )
381 # ax.set_title(args.title)
382 tmp_fig = "./tmp.svg"
383 plt.savefig(tmp_fig)
384 plt.close()
385 else:
386 ax, _ = graphic_record.plot(
387 figure_width=args.plot_width, annotate_inline=lab_algo
388 )
389 ax.set_title(args.title)
390 tmp_fig = "./tmp.svg"
391 # Galaxy specific shenanigans
392 plt.savefig(tmp_fig)
393 plt.close()
394 with open("tmp.svg", "rb") as img:
395 for line in img:
396 args.out_img.write(line)