Mercurial > repos > cpt > cpt_linear_genome_plot
comparison linear_genome_plot.py @ 1:e923c686ead9 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
| author | cpt |
|---|---|
| date | Mon, 05 Jun 2023 02:45:31 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:621754dd31f8 | 1:e923c686ead9 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 from Bio import SeqIO | |
| 3 from dna_features_viewer import BiopythonTranslator, GraphicRecord | |
| 4 from matplotlib import rc_context | |
| 5 import matplotlib | |
| 6 import matplotlib.pyplot as plt | |
| 7 from itertools import cycle | |
| 8 import re | |
| 9 import sys | |
| 10 import argparse | |
| 11 | |
| 12 | |
| 13 class CPTTranslator(BiopythonTranslator): | |
| 14 """ | |
| 15 This is a customized translator from the dna_features_viewer module to fit Galaxy | |
| 16 """ | |
| 17 | |
| 18 global custom_feature_colors | |
| 19 global box_status | |
| 20 global label_fields | |
| 21 global custom_name_colors | |
| 22 global ignored_features_types | |
| 23 global ignored_gene_labels | |
| 24 global ignored_feature_labels | |
| 25 | |
| 26 def compute_feature_color(self, feature): | |
| 27 if feature.type == "CDS": | |
| 28 if "product" in feature.qualifiers: | |
| 29 color_specific = any( | |
| 30 re.search( | |
| 31 ("(\\b" + str(item) + "\\b)"), feature.qualifiers["product"][0] | |
| 32 ) | |
| 33 for item in custom_name_colors.keys() | |
| 34 ) or any( | |
| 35 re.search((item), feature.qualifiers["product"][0]) | |
| 36 for item in custom_name_colors.keys() | |
| 37 ) | |
| 38 if color_specific: | |
| 39 try: | |
| 40 return custom_name_colors[feature.qualifiers["product"][0]] | |
| 41 except KeyError: | |
| 42 for item in custom_name_colors.keys(): | |
| 43 if item in feature.qualifiers["product"][0]: | |
| 44 custom_name_colors[ | |
| 45 feature.qualifiers["product"][0] | |
| 46 ] = custom_name_colors[item] | |
| 47 return custom_name_colors[ | |
| 48 feature.qualifiers["product"][0] | |
| 49 ] | |
| 50 # print(feature.qualifiers["product"][0]) | |
| 51 else: | |
| 52 try: | |
| 53 return custom_feature_colors[feature.type] | |
| 54 except KeyError: | |
| 55 return BiopythonTranslator.compute_feature_color(self, feature) | |
| 56 else: | |
| 57 if feature.type not in ignored_features_types: | |
| 58 try: | |
| 59 return custom_feature_colors[feature.type] | |
| 60 except KeyError: | |
| 61 return BiopythonTranslator.compute_feature_color(self, feature) | |
| 62 | |
| 63 def compute_feature_label(self, feature): # remove the chop_blocks | |
| 64 self.label_fields = label_fields | |
| 65 if feature.type == "CDS": | |
| 66 if "product" in feature.qualifiers: | |
| 67 if ignored_gene_labels: # product name drop | |
| 68 verify_chops = any( | |
| 69 re.search( | |
| 70 ("(\\b" + str(item) + "\\b)"), | |
| 71 feature.qualifiers["product"][0], | |
| 72 ) | |
| 73 for item in ignored_gene_labels | |
| 74 ) or any( | |
| 75 re.search((item), feature.qualifiers["product"][0]) | |
| 76 for item in ignored_gene_labels | |
| 77 ) | |
| 78 if verify_chops: | |
| 79 return None | |
| 80 else: | |
| 81 return BiopythonTranslator.compute_feature_label(self, feature) | |
| 82 else: | |
| 83 return BiopythonTranslator.compute_feature_label(self, feature) | |
| 84 elif feature.type in ignored_feature_labels: | |
| 85 return None | |
| 86 else: | |
| 87 return BiopythonTranslator.compute_feature_label(self, feature) | |
| 88 | |
| 89 def compute_filtered_features(self, features): | |
| 90 return [ | |
| 91 feature | |
| 92 for feature in features | |
| 93 if feature.type not in ignored_features_types | |
| 94 ] | |
| 95 | |
| 96 def compute_feature_legend_text(self, feature): | |
| 97 return feature.type | |
| 98 | |
| 99 def compute_feature_box_color(self, feature): | |
| 100 if feature.type == "CDS": | |
| 101 return "white" | |
| 102 | |
| 103 def compute_feature_label_link_color(self, feature): | |
| 104 return "black" | |
| 105 | |
| 106 def compute_feature_box_linewidth(self, feature): | |
| 107 if box_status: | |
| 108 return 0.5 | |
| 109 else: | |
| 110 return 0 | |
| 111 | |
| 112 | |
| 113 def parse_gbk(file): | |
| 114 """simple function to parse out the feature information AND products""" | |
| 115 | |
| 116 record = SeqIO.read(file, "genbank") | |
| 117 count = 0 | |
| 118 feature_types = {} | |
| 119 product_names = [] | |
| 120 for feat in record.features: | |
| 121 if feat.type not in feature_types: | |
| 122 feature_types[feat.type] = 1 | |
| 123 else: | |
| 124 feature_types[feat.type] += 1 | |
| 125 if "product" in feat.qualifiers: | |
| 126 product_names.append(feat.qualifiers["product"][0]) | |
| 127 | |
| 128 return feature_types, product_names, record | |
| 129 | |
| 130 | |
| 131 if __name__ == "__main__": | |
| 132 parser = argparse.ArgumentParser(description="Linear Genome Plot") | |
| 133 # Input and Parameters | |
| 134 parser.add_argument( | |
| 135 "input_file", type=argparse.FileType("r"), help="genbank or gff3 file" | |
| 136 ) | |
| 137 parser.add_argument("--plot_width", type=int, default=20) | |
| 138 # parser.add_argument("--plot_height",type=int,default=4) | |
| 139 parser.add_argument( | |
| 140 "--title", type=str, default="genome plot" | |
| 141 ) # NEED TO ADD TO XML | |
| 142 parser.add_argument( | |
| 143 "--common_features_excluded", default="", help="common features to be excluded" | |
| 144 ) | |
| 145 parser.add_argument( | |
| 146 "--features_excluded", | |
| 147 default="", | |
| 148 help="features to be excluded from plot, separate by commas", | |
| 149 ) | |
| 150 parser.add_argument( | |
| 151 "--common_ignore_feature_labels", | |
| 152 default="", | |
| 153 help="common feature labels to be excluded", | |
| 154 ) | |
| 155 parser.add_argument( | |
| 156 "--ignored_feature_labels", | |
| 157 default="", | |
| 158 help="ignore labeling of specific features", | |
| 159 ) | |
| 160 parser.add_argument( | |
| 161 "--common_ignore_product_labels", | |
| 162 default="", | |
| 163 help="common product names to not label", | |
| 164 ) | |
| 165 parser.add_argument( | |
| 166 "--ignore_labeling", | |
| 167 default="", | |
| 168 help="labeling for specific products to ignore, separate by commas", | |
| 169 ) | |
| 170 parser.add_argument( | |
| 171 "--feature_label_order", | |
| 172 default="locus_tag", | |
| 173 help="label order, where the first choice is the first feature listed to pull name labels from", | |
| 174 ) # NEED TO ADD TO XML | |
| 175 parser.add_argument( | |
| 176 "--label_box", | |
| 177 action="store_true", | |
| 178 help="Use to have label box around feature labels", | |
| 179 ) | |
| 180 parser.add_argument( | |
| 181 "--label_algo", | |
| 182 action="store_true", | |
| 183 help="use dna features spacing algo for label placement (in or above feature)", | |
| 184 ) | |
| 185 # parser.add_argument("--level_offset",type=int,default=0,help="All features and annotations will be pushed up by the input amount. Useful for when plotting several sets of features successively on the same axis.") # Will exclude for now | |
| 186 # parser.add_argument("--custom_region",action="store_true",help="cropped region for plot") | |
| 187 parser.add_argument("--sz", type=int, help="beginning location for crop") | |
| 188 parser.add_argument("--ez", type=int, help="end location for crop") | |
| 189 parser.add_argument("--st", type=int, help="start site of translation") | |
| 190 parser.add_argument("--et", type=int, help="end site of translation") | |
| 191 parser.add_argument( | |
| 192 "--translation_on", action="store_true", help="plot the translation sub-axis" | |
| 193 ) | |
| 194 parser.add_argument( | |
| 195 "--feature_id", | |
| 196 nargs="*", | |
| 197 action="append", | |
| 198 help="feature label to have custom color", | |
| 199 ) # NEED TO ADD TO XML | |
| 200 parser.add_argument( | |
| 201 "--feature_id_color", | |
| 202 nargs="*", | |
| 203 action="append", | |
| 204 help="feature's accompanying color", | |
| 205 ) | |
| 206 parser.add_argument( | |
| 207 "--gene_id", | |
| 208 nargs="*", | |
| 209 action="append", | |
| 210 help="gene/cds label to have custom color", | |
| 211 ) | |
| 212 parser.add_argument( | |
| 213 "--gene_id_color", | |
| 214 nargs="*", | |
| 215 action="append", | |
| 216 help="gene/cds's accompanying color", | |
| 217 ) | |
| 218 parser.add_argument("--multiline", action="store_true", help="Plot multiline plot") | |
| 219 parser.add_argument( | |
| 220 "--nucl_per_line", type=int, help="nucleotides per line of multiline" | |
| 221 ) | |
| 222 # Output | |
| 223 parser.add_argument( | |
| 224 "--file_stats", | |
| 225 type=argparse.FileType("w"), | |
| 226 default="out_stats.txt", | |
| 227 help="output stat file", | |
| 228 ) | |
| 229 # parser.add_argument("--tmp_img",dest="tmp_img",type=argparse.FileType("wb"),default="out_tmp.svg") | |
| 230 parser.add_argument( | |
| 231 "--out_img", | |
| 232 dest="out_img", | |
| 233 type=argparse.FileType("wb"), | |
| 234 default="out_img.svg", | |
| 235 help="svg genome plot", | |
| 236 ) | |
| 237 args = parser.parse_args() | |
| 238 | |
| 239 ## Part I ; Parse and send output of features count and the list of product names | |
| 240 feature_counts, products, genome = parse_gbk(args.input_file) | |
| 241 with args.file_stats as f: | |
| 242 f.writelines("---::: FILE BREAKDOWN :::---\n\n") | |
| 243 f.writelines("------::: Feature Count :::------\n") | |
| 244 for feature, count in feature_counts.items(): | |
| 245 f.writelines(f"Feature: {feature} ::::: Count: {count}\n") | |
| 246 f.writelines("------::: Product Names :::------\n") | |
| 247 if products != []: | |
| 248 for product in products: | |
| 249 f.writelines(f"Product Name: {product}\n") | |
| 250 else: | |
| 251 f.writelines("No Annotated Product Names Found") | |
| 252 | |
| 253 ## Part II ; Prep Global Variables | |
| 254 ## Make K:V pairs for Feature Colors | |
| 255 if args.label_box: | |
| 256 box_status = True | |
| 257 else: | |
| 258 box_status = False | |
| 259 | |
| 260 if args.feature_id: | |
| 261 feature_ids = [f for listed_obj in args.feature_id for f in listed_obj] | |
| 262 feature_ids_colors = [ | |
| 263 f for listed_obj in args.feature_id_color for f in listed_obj | |
| 264 ] | |
| 265 custom_feature_colors = dict(zip(feature_ids, feature_ids_colors)) | |
| 266 else: | |
| 267 custom_feature_colors = {} | |
| 268 | |
| 269 ## Make K:V pairs for Name Colors (as above) | |
| 270 if args.gene_id: | |
| 271 gene_ids = [g for listed_obj in args.gene_id for g in listed_obj] | |
| 272 gene_ids_colors = [g for listed_obj in args.gene_id_color for g in listed_obj] | |
| 273 custom_name_colors = dict(zip(gene_ids, gene_ids_colors)) | |
| 274 else: | |
| 275 custom_name_colors = {} | |
| 276 | |
| 277 ## Ignored Features | |
| 278 # ignored_features_types = str.split(args.features_excluded,",") | |
| 279 if args.common_features_excluded: | |
| 280 ignored_features_types = str.split(args.common_features_excluded, ",") | |
| 281 if args.features_excluded: | |
| 282 ignored_features_types += str.split(args.features_excluded, ",") | |
| 283 elif args.features_excluded: | |
| 284 ignored_features_types = str.split(args.features_excluded, ",") | |
| 285 else: | |
| 286 ignored_features_types = False | |
| 287 | |
| 288 print(ignored_features_types) | |
| 289 | |
| 290 ## product labels | |
| 291 if args.common_ignore_product_labels: | |
| 292 ignored_gene_labels = str.split(args.common_ignore_product_labels, ",") | |
| 293 if args.ignore_labeling: | |
| 294 ignored_gene_labels += str.split(args.ignore_labeling, ",") | |
| 295 elif args.ignore_labeling: | |
| 296 ignored_gene_labels = str.split(args.ignore_labeling, ",") | |
| 297 else: | |
| 298 ignored_gene_labels = False | |
| 299 | |
| 300 print(ignored_gene_labels) | |
| 301 | |
| 302 if args.feature_label_order != [""]: | |
| 303 label_fields = str.split(args.feature_label_order, ",") | |
| 304 | |
| 305 # if ignored_gene_labels == ['']: | |
| 306 # ignored_gene_labels = False | |
| 307 | |
| 308 ## Ignored Labeling | |
| 309 if args.common_ignore_feature_labels: | |
| 310 ignored_feature_labels = str.split(args.common_ignore_feature_labels, ",") | |
| 311 if args.ignored_feature_labels: | |
| 312 ignored_feature_labels += str.split(args.ignored_feature_labels, ",") | |
| 313 elif args.ignored_feature_labels: | |
| 314 ignored_feature_labels = str.split(args.ignored_feature_labels, ",") | |
| 315 else: | |
| 316 ignored_feature_labels = False | |
| 317 | |
| 318 print(ignored_feature_labels) | |
| 319 ## Print Statements for Debugging | |
| 320 # print(custom_feature_colors) | |
| 321 # print(custom_name_colors) | |
| 322 # print(ignored_features_types) | |
| 323 # print(ignored_gene_labels) | |
| 324 # print(label_fields) | |
| 325 | |
| 326 ## Part III ; PLOT | |
| 327 # Housekeeping | |
| 328 rc_context( | |
| 329 { | |
| 330 "font.family": ["monospace"], | |
| 331 } | |
| 332 ) # courier-like | |
| 333 matplotlib.use("Agg") # I think this has to be used... | |
| 334 | |
| 335 if args.label_algo: | |
| 336 lab_algo = True | |
| 337 else: | |
| 338 lab_algo = False | |
| 339 | |
| 340 translator = CPTTranslator() | |
| 341 graphic_record = translator.translate_record(genome) | |
| 342 | |
| 343 with open("tmp.svg", "wb") as img: | |
| 344 img.truncate(0) | |
| 345 img.close() | |
| 346 | |
| 347 if ( | |
| 348 args.sz and not args.multiline | |
| 349 ): # if user is wanting to look at a subset region of the genome | |
| 350 zoom_start, zoom_end = args.sz, args.ez | |
| 351 cropped = graphic_record.crop((zoom_start, zoom_end)) | |
| 352 ax, _ = cropped.plot( | |
| 353 figure_width=args.plot_width, annotate_inline=lab_algo, figure_height=None | |
| 354 ) | |
| 355 if args.translation_on: | |
| 356 crop_seq = (args.st - 1, args.et) | |
| 357 cropped.plot_translation( | |
| 358 ax, | |
| 359 location=crop_seq, | |
| 360 fontdict={"size": 8, "weight": "bold"}, | |
| 361 y_offset=1, | |
| 362 ) | |
| 363 ax.set_title(args.title) | |
| 364 # Galaxy specific shenanigans | |
| 365 tmp_fig = "./tmp.svg" | |
| 366 plt.savefig(tmp_fig) | |
| 367 plt.close() | |
| 368 elif args.multiline: | |
| 369 if args.sz: | |
| 370 zoom_start, zoom_end = args.sz, args.ez | |
| 371 else: | |
| 372 zoom_start, zoom_end = 1, graphic_record.sequence_length | |
| 373 cropped = graphic_record.crop((zoom_start, zoom_end)) | |
| 374 ax, _ = cropped.plot_on_multiple_lines( | |
| 375 figure_width=args.plot_width, | |
| 376 annotate_inline=lab_algo, | |
| 377 figure_height=None, | |
| 378 nucl_per_line=args.nucl_per_line, | |
| 379 plot_sequence=False, | |
| 380 ) | |
| 381 # ax.set_title(args.title) | |
| 382 tmp_fig = "./tmp.svg" | |
| 383 plt.savefig(tmp_fig) | |
| 384 plt.close() | |
| 385 else: | |
| 386 ax, _ = graphic_record.plot( | |
| 387 figure_width=args.plot_width, annotate_inline=lab_algo | |
| 388 ) | |
| 389 ax.set_title(args.title) | |
| 390 tmp_fig = "./tmp.svg" | |
| 391 # Galaxy specific shenanigans | |
| 392 plt.savefig(tmp_fig) | |
| 393 plt.close() | |
| 394 with open("tmp.svg", "rb") as img: | |
| 395 for line in img: | |
| 396 args.out_img.write(line) |
