cpt_linear_genome_plot: linear_genome

comparison linear_genome_plot.py @ 1:e923c686ead9 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c

author	cpt
date	Mon, 05 Jun 2023 02:45:31 +0000
parents
children

comparison

equal deleted inserted replaced

-:621754dd31f8
+:e923c686ead9
+#!/usr/bin/env python
+from Bio import SeqIO
+from dna_features_viewer import BiopythonTranslator, GraphicRecord
+from matplotlib import rc_context
+import matplotlib
+import matplotlib.pyplot as plt
+from itertools import cycle
+import re
+import sys
+import argparse
+class CPTTranslator(BiopythonTranslator):
+"""
+This is a customized translator from the dna_features_viewer module to fit Galaxy
+"""
+global custom_feature_colors
+global box_status
+global label_fields
+global custom_name_colors
+global ignored_features_types
+global ignored_gene_labels
+global ignored_feature_labels
+def compute_feature_color(self, feature):
+if feature.type == "CDS":
+if "product" in feature.qualifiers:
+color_specific = any(
+re.search(
+("(\\b" + str(item) + "\\b)"), feature.qualifiers["product"][0]
+)
+for item in custom_name_colors.keys()
+) or any(
+re.search((item), feature.qualifiers["product"][0])
+for item in custom_name_colors.keys()
+)
+if color_specific:
+try:
+return custom_name_colors[feature.qualifiers["product"][0]]
+except KeyError:
+for item in custom_name_colors.keys():
+if item in feature.qualifiers["product"][0]:
+custom_name_colors[
+feature.qualifiers["product"][0]
+] = custom_name_colors[item]
+return custom_name_colors[
+feature.qualifiers["product"][0]
+]
+# print(feature.qualifiers["product"][0])
+else:
+try:
+return custom_feature_colors[feature.type]
+except KeyError:
+return BiopythonTranslator.compute_feature_color(self, feature)
+else:
+if feature.type not in ignored_features_types:
+try:
+return custom_feature_colors[feature.type]
+except KeyError:
+return BiopythonTranslator.compute_feature_color(self, feature)
+def compute_feature_label(self, feature):  # remove the chop_blocks
+self.label_fields = label_fields
+if feature.type == "CDS":
+if "product" in feature.qualifiers:
+if ignored_gene_labels:  #  product name drop
+verify_chops = any(
+re.search(
+("(\\b" + str(item) + "\\b)"),
+feature.qualifiers["product"][0],
+)
+for item in ignored_gene_labels
+) or any(
+re.search((item), feature.qualifiers["product"][0])
+for item in ignored_gene_labels
+)
+if verify_chops:
+return None
+else:
+return BiopythonTranslator.compute_feature_label(self, feature)
+else:
+return BiopythonTranslator.compute_feature_label(self, feature)
+elif feature.type in ignored_feature_labels:
+return None
+else:
+return BiopythonTranslator.compute_feature_label(self, feature)
+def compute_filtered_features(self, features):
+return [
+feature
+for feature in features
+if feature.type not in ignored_features_types
+]
+def compute_feature_legend_text(self, feature):
+return feature.type
+def compute_feature_box_color(self, feature):
+if feature.type == "CDS":
+return "white"
+def compute_feature_label_link_color(self, feature):
+return "black"
+def compute_feature_box_linewidth(self, feature):
+if box_status:
+return 0.5
+else:
+return 0
+def parse_gbk(file):
+"""simple function to parse out the feature information AND products"""
+record = SeqIO.read(file, "genbank")
+count = 0
+feature_types = {}
+product_names = []
+for feat in record.features:
+if feat.type not in feature_types:
+feature_types[feat.type] = 1
+else:
+feature_types[feat.type] += 1
+if "product" in feat.qualifiers:
+product_names.append(feat.qualifiers["product"][0])
+return feature_types, product_names, record
+if __name__ == "__main__":
+parser = argparse.ArgumentParser(description="Linear Genome Plot")
+#  Input and Parameters
+parser.add_argument(
+"input_file", type=argparse.FileType("r"), help="genbank or gff3 file"
+)
+parser.add_argument("--plot_width", type=int, default=20)
+# parser.add_argument("--plot_height",type=int,default=4)
+parser.add_argument(
+"--title", type=str, default="genome plot"
+)  # NEED TO ADD TO XML
+parser.add_argument(
+"--common_features_excluded", default="", help="common features to be excluded"
+)
+parser.add_argument(
+"--features_excluded",
+default="",
+help="features to be excluded from plot, separate by commas",
+)
+parser.add_argument(
+"--common_ignore_feature_labels",
+default="",
+help="common feature labels to be excluded",
+)
+parser.add_argument(
+"--ignored_feature_labels",
+default="",
+help="ignore labeling of specific features",
+)
+parser.add_argument(
+"--common_ignore_product_labels",
+default="",
+help="common product names to not label",
+)
+parser.add_argument(
+"--ignore_labeling",
+default="",
+help="labeling for specific products to ignore, separate by commas",
+)
+parser.add_argument(
+"--feature_label_order",
+default="locus_tag",
+help="label order, where the first choice is the first feature listed to pull name labels from",
+)  # NEED TO ADD TO XML
+parser.add_argument(
+"--label_box",
+action="store_true",
+help="Use to have label box around feature labels",
+)
+parser.add_argument(
+"--label_algo",
+action="store_true",
+help="use dna features spacing algo for label placement (in or above feature)",
+)
+# parser.add_argument("--level_offset",type=int,default=0,help="All features and annotations will be pushed up by the input amount. Useful for when plotting several sets of features successively on the same axis.") # Will exclude for now
+# parser.add_argument("--custom_region",action="store_true",help="cropped region for plot")
+parser.add_argument("--sz", type=int, help="beginning location for crop")
+parser.add_argument("--ez", type=int, help="end location for crop")
+parser.add_argument("--st", type=int, help="start site of translation")
+parser.add_argument("--et", type=int, help="end site of translation")
+parser.add_argument(
+"--translation_on", action="store_true", help="plot the translation sub-axis"
+)
+parser.add_argument(
+"--feature_id",
+nargs="*",
+action="append",
+help="feature label to have custom color",
+)  # NEED TO ADD TO XML
+parser.add_argument(
+"--feature_id_color",
+nargs="*",
+action="append",
+help="feature's accompanying color",
+)
+parser.add_argument(
+"--gene_id",
+nargs="*",
+action="append",
+help="gene/cds label to have custom color",
+)
+parser.add_argument(
+"--gene_id_color",
+nargs="*",
+action="append",
+help="gene/cds's accompanying color",
+)
+parser.add_argument("--multiline", action="store_true", help="Plot multiline plot")
+parser.add_argument(
+"--nucl_per_line", type=int, help="nucleotides per line of multiline"
+)
+#  Output
+parser.add_argument(
+"--file_stats",
+type=argparse.FileType("w"),
+default="out_stats.txt",
+help="output stat file",
+)
+# parser.add_argument("--tmp_img",dest="tmp_img",type=argparse.FileType("wb"),default="out_tmp.svg")
+parser.add_argument(
+"--out_img",
+dest="out_img",
+type=argparse.FileType("wb"),
+default="out_img.svg",
+help="svg genome plot",
+)
+args = parser.parse_args()
+##  Part I ; Parse and send output of features count and the list of product names
+feature_counts, products, genome = parse_gbk(args.input_file)
+with args.file_stats as f:
+f.writelines("---::: FILE BREAKDOWN :::---\n\n")
+f.writelines("------::: Feature Count :::------\n")
+for feature, count in feature_counts.items():
+f.writelines(f"Feature: {feature} ::::: Count: {count}\n")
+f.writelines("------::: Product Names :::------\n")
+if products != []:
+for product in products:
+f.writelines(f"Product Name: {product}\n")
+else:
+f.writelines("No Annotated Product Names Found")
+##  Part II ; Prep Global Variables
+##  Make K:V pairs for Feature Colors
+if args.label_box:
+box_status = True
+else:
+box_status = False
+if args.feature_id:
+feature_ids = [f for listed_obj in args.feature_id for f in listed_obj]
+feature_ids_colors = [
+f for listed_obj in args.feature_id_color for f in listed_obj
+]
+custom_feature_colors = dict(zip(feature_ids, feature_ids_colors))
+else:
+custom_feature_colors = {}
+##  Make K:V pairs for Name Colors (as above)
+if args.gene_id:
+gene_ids = [g for listed_obj in args.gene_id for g in listed_obj]
+gene_ids_colors = [g for listed_obj in args.gene_id_color for g in listed_obj]
+custom_name_colors = dict(zip(gene_ids, gene_ids_colors))
+else:
+custom_name_colors = {}
+##  Ignored Features
+# ignored_features_types = str.split(args.features_excluded,",")
+if args.common_features_excluded:
+ignored_features_types = str.split(args.common_features_excluded, ",")
+if args.features_excluded:
+ignored_features_types += str.split(args.features_excluded, ",")
+elif args.features_excluded:
+ignored_features_types = str.split(args.features_excluded, ",")
+else:
+ignored_features_types = False
+print(ignored_features_types)
+## product labels
+if args.common_ignore_product_labels:
+ignored_gene_labels = str.split(args.common_ignore_product_labels, ",")
+if args.ignore_labeling:
+ignored_gene_labels += str.split(args.ignore_labeling, ",")
+elif args.ignore_labeling:
+ignored_gene_labels = str.split(args.ignore_labeling, ",")
+else:
+ignored_gene_labels = False
+print(ignored_gene_labels)
+if args.feature_label_order != [""]:
+label_fields = str.split(args.feature_label_order, ",")
+# if ignored_gene_labels == ['']:
+#    ignored_gene_labels = False
+##  Ignored Labeling
+if args.common_ignore_feature_labels:
+ignored_feature_labels = str.split(args.common_ignore_feature_labels, ",")
+if args.ignored_feature_labels:
+ignored_feature_labels += str.split(args.ignored_feature_labels, ",")
+elif args.ignored_feature_labels:
+ignored_feature_labels = str.split(args.ignored_feature_labels, ",")
+else:
+ignored_feature_labels = False
+print(ignored_feature_labels)
+##  Print Statements for Debugging
+# print(custom_feature_colors)
+# print(custom_name_colors)
+# print(ignored_features_types)
+# print(ignored_gene_labels)
+# print(label_fields)
+## Part III ; PLOT
+# Housekeeping
+rc_context(
+{
+"font.family": ["monospace"],
+}
+)  # courier-like
+matplotlib.use("Agg")  # I think this has to be used...
+if args.label_algo:
+lab_algo = True
+else:
+lab_algo = False
+translator = CPTTranslator()
+graphic_record = translator.translate_record(genome)
+with open("tmp.svg", "wb") as img:
+img.truncate(0)
+img.close()
+if (
+args.sz and not args.multiline
+):  #  if user is wanting to look at a subset region of the genome
+zoom_start, zoom_end = args.sz, args.ez
+cropped = graphic_record.crop((zoom_start, zoom_end))
+ax, _ = cropped.plot(
+figure_width=args.plot_width, annotate_inline=lab_algo, figure_height=None
+)
+if args.translation_on:
+crop_seq = (args.st - 1, args.et)
+cropped.plot_translation(
+ax,
+location=crop_seq,
+fontdict={"size": 8, "weight": "bold"},
+y_offset=1,
+)
+ax.set_title(args.title)
+# Galaxy specific shenanigans
+tmp_fig = "./tmp.svg"
+plt.savefig(tmp_fig)
+plt.close()
+elif args.multiline:
+if args.sz:
+zoom_start, zoom_end = args.sz, args.ez
+else:
+zoom_start, zoom_end = 1, graphic_record.sequence_length
+cropped = graphic_record.crop((zoom_start, zoom_end))
+ax, _ = cropped.plot_on_multiple_lines(
+figure_width=args.plot_width,
+annotate_inline=lab_algo,
+figure_height=None,
+nucl_per_line=args.nucl_per_line,
+plot_sequence=False,
+)
+# ax.set_title(args.title)
+tmp_fig = "./tmp.svg"
+plt.savefig(tmp_fig)
+plt.close()
+else:
+ax, _ = graphic_record.plot(
+figure_width=args.plot_width, annotate_inline=lab_algo
+)
+ax.set_title(args.title)
+tmp_fig = "./tmp.svg"
+# Galaxy specific shenanigans
+plt.savefig(tmp_fig)
+plt.close()
+with open("tmp.svg", "rb") as img:
+for line in img:
+args.out_img.write(line)

Mercurial > repos > cpt > cpt_linear_genome_plot

comparison linear_genome_plot.py @ 1:e923c686ead9 draft