# HG changeset patch
# User greg
# Date 1674679101 0
# Node ID 636eeb4fb9ac488feb2a225b490f545e4728f1dc
Uploaded
diff -r 000000000000 -r 636eeb4fb9ac .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Wed Jan 25 20:38:21 2023 +0000
@@ -0,0 +1,9 @@
+name: draw_features
+owner: greg
+description: Plots contigs and features of high-quality annotated assemblies
+long_description: Plots contigs and features of high-quality annotated assemblies
+categories:
+- Visualization
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/pima/draw_features
+homepage_url: https://github.com/gregvonkuster/galaxy_tools
+type: unrestricted
diff -r 000000000000 -r 636eeb4fb9ac draw_features.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/draw_features.py Wed Jan 25 20:38:21 2023 +0000
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+
+import argparse
+import os
+
+import pandas
+import matplotlib.pyplot as pyplot
+from Bio import SeqIO
+from dna_features_viewer import GraphicFeature, GraphicRecord
+
+
+AMR_COLOR = '#FED976'
+INC_GROUPS_COLOR = '#0570B0'
+FEATURE_COLORS = [AMR_COLOR, INC_GROUPS_COLOR]
+FIGURE_WIDTH = 13
+
+
+def draw_features(feature_hits_files, contigs, output_dir):
+ ofh = open('process_log', 'w')
+ # Read feature_hits_files.
+ feature_hits = pandas.Series(dtype=object)
+ feature_plots = pandas.Series(dtype=object)
+ for feature_hits_file in feature_hits_files:
+ feature_name = os.path.basename(feature_hits_file)
+ # Make sure the file is not empty.
+ if os.path.isfile(feature_hits_file) and os.path.getsize(feature_hits_file) > 0:
+ best_hits = pandas.read_csv(filepath_or_buffer=feature_hits_file, sep='\t', header=None)
+ ofh.write("\nFeature file %s will be processed\n" % str(feature_hits_file))
+ else:
+ ofh.write("\nEmpty feature file %s will NOT be processed\n" % str(feature_hits_file))
+ best_hits = None
+ feature_hits[feature_name] = best_hits
+
+ # Draw one plot per contig for simplicity.
+ ofh.write("\nProcessing contigs file: %s\n" % str(contigs))
+ for contig in SeqIO.parse(contigs, 'fasta'):
+ ofh.write("Processing contig: %s\n" % str(contig))
+ contig_plot_png = os.path.join(output_dir, '%s.png' % str(contig.id))
+ feature_sets_to_plot = pandas.Series(dtype=object)
+ for feature_number in range(len(feature_hits)):
+ feature_name = feature_hits.index.to_list()[feature_number]
+ ofh.write("Processing feature name: %s\n" % str(feature_name))
+ these_features = feature_hits[feature_name]
+ if these_features is None or these_features.shape[0] == 0:
+ # No features.
+ continue
+ contig_features = these_features.loc[these_features.iloc[:, 0] == contig.id, :]
+ if contig_features is None or contig_features.shape[0] == 0:
+ # No features.
+ continue
+ features_to_plot = []
+ for i in range(contig_features.shape[0]):
+ i = contig_features.iloc[i, :]
+ features_to_plot += [GraphicFeature(start=i[1], end=i[2], label=i[3], strand=1 * i[5], color=FEATURE_COLORS[feature_number])]
+ feature_sets_to_plot[feature_name] = features_to_plot
+ ofh.write("Number of features to plot: %d\n" % len(feature_sets_to_plot))
+ if len(feature_sets_to_plot) == 0:
+ # No features.
+ continue
+ # Determine each plot height for later scaling
+ expected_plot_heights = []
+ for i in range(len(feature_sets_to_plot)):
+ record = GraphicRecord(sequence_length=len(contig), features=feature_sets_to_plot[i])
+ if i == len(feature_sets_to_plot) - 1:
+ with_ruler = True
+ else:
+ with_ruler = False
+ plot, _ = record.plot(figure_width=FIGURE_WIDTH, with_ruler=with_ruler)
+ expected_plot_heights += [plot.figure.get_size_inches()[1]]
+ plot_height_sum = sum(expected_plot_heights)
+ # Make a figure with separate plots for each feature class.
+ plots = pyplot.subplots(nrows=len(feature_sets_to_plot),
+ ncols=1,
+ sharex=True,
+ figsize=(FIGURE_WIDTH, plot_height_sum * .66666),
+ gridspec_kw={"height_ratios": expected_plot_heights})
+ figure = plots[0]
+ plots = plots[1]
+ if len(feature_sets_to_plot) == 1:
+ plots = [plots]
+ # Add each feature class's plot with the pre-determined height.
+ for i in range(len(feature_sets_to_plot)):
+ record = GraphicRecord(sequence_length=len(contig), features=feature_sets_to_plot[i])
+ if i == len(feature_sets_to_plot) - 1:
+ with_ruler = True
+ else:
+ with_ruler = False
+ plot, _ = record.plot(ax=plots[i], with_ruler=with_ruler, figure_width=FIGURE_WIDTH)
+ ymin, ymax = plot.figure.axes[0].get_ylim()
+ if i == 0:
+ plot.text(x=0, y=ymax, s=contig.id)
+ figure.tight_layout()
+ ofh.write("Saving PNG plot file: %s\n" % str(contig_plot_png))
+ figure.savefig(contig_plot_png)
+ feature_plots[contig.id] = contig_plot_png
+ ofh.close()
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument('--feature_hits_dir', action='store', dest='feature_hits_dir', help='Directory of tabular files containing feature hits')
+ parser.add_argument('--contigs', action='store', dest='contigs', help='Fasta file of contigs')
+ parser.add_argument('--output_dir', action='store', dest='output_dir', help='Output directory')
+
+ args = parser.parse_args()
+
+ # Get thge collection of feature hits files. The collection
+ # will be sorted alphabetically and will contain 2 files
+ # named something like AMR_CDS_311_2022_12_20.fasta and
+ # Incompatibility_Groups_2023_01_01.fasta.
+ feature_hits_files = []
+ for file_name in sorted(os.listdir(args.feature_hits_dir)):
+ file_path = os.path.abspath(os.path.join(args.feature_hits_dir, file_name))
+ feature_hits_files.append(file_path)
+
+ draw_features(feature_hits_files, args.contigs, args.output_dir)
diff -r 000000000000 -r 636eeb4fb9ac draw_features.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/draw_features.xml Wed Jan 25 20:38:21 2023 +0000
@@ -0,0 +1,74 @@
+
+ of annotated assemblies
+
+ macros.xml
+
+
+ 'contigs.fasta' &&
+#else:
+ ln -s '$contigs' 'contigs.fasta' &&
+#end if
+
+mkdir feature_hits_dir &&
+mkdir output_dir &&
+
+#for $i in $feature_hits:
+ #set file_name = $i.file_name
+ #set identifier = re.sub('[^\s\w\-\\.]', '_', str($i.element_identifier))
+ ln -s '$file_name' 'feature_hits_dir/$identifier' &&
+#end for
+
+python '$__tool_directory__/draw_features.py'
+--contigs 'contigs.fasta'
+--feature_hits_dir 'feature_hits_dir'
+--output_dir 'output_dir'
+#if str($output_process_log) == 'yes':
+ && mv 'process_log' '$process_log'
+#end if
+]]>
+
+
+
+
+
+
+
+
+
+
+ output_process_log == 'yes'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Accepts a FASTA file of assembled contigs and a collection of 2 BED files containing the feature hits and plots
+the features.
+
+
+
+
diff -r 000000000000 -r 636eeb4fb9ac macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Jan 25 20:38:21 2023 +0000
@@ -0,0 +1,19 @@
+
+ 1.0.0
+ 0
+ 21.01
+
+
+ biopython
+ dna_features_viewer
+ matplotlib
+ pandas
+
+
+
+
+ 10.1101/011650
+
+
+
+
diff -r 000000000000 -r 636eeb4fb9ac test-data/PS01519_contigs.fasta.gz
Binary file test-data/PS01519_contigs.fasta.gz has changed
diff -r 000000000000 -r 636eeb4fb9ac test-data/amr_cds.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/amr_cds.bed Wed Jan 25 20:38:21 2023 +0000
@@ -0,0 +1,16 @@
+contig_1 484392 485921 62550724|AAX88798.1|AY818309.1|1|1|alo|alo|anthrolysin_O/cereolysin_O_family_cholesterol-dependent_cytolysin_Alo 0.984 +
+contig_1 4673373 4675772 29894390|AAP07680.1|AE016877.1|1|1|inhA2|inhA2|M6_family_metalloprotease_immune_inhibitor_InhA2 1.000 -
+contig_1 4253739 4254749 29894808|AAP08097.1|AE016877.1|1|1|cytK2|cytK2|beta-channel_forming_cytolysin_CytK2 1.000 +
+contig_1 232577 233434 5305719|AAD41788.1|AF132087.1|1|1|plcR|plcR|transcriptional_regulator_PlcR 1.000 +
+contig_1 4667636 4668636 505581118|AGL98063.1|KC527544.1|1|1|cerB|cerB|sphingomyelinase_CerB 0.983 -
+contig_1 4095082 4097472 296323016|ADH05944.1|CP001903.1|1|1|inhA1|inhA1|M6_family_metalloprotease_immune_inhibitor_InhA1 0.998 -
+contig_1 2421494 2422813 29896770|AAP10048.1|AE016877.1|1|1|hblC|hblC|hemolytic_enterotoxin_HBL_lytic_component_L2 0.977 +
+contig_1 3520286 3521566 29895642|AAP08924.1|AE016877.1|1|1|entFM|entFM|enterotoxin_EntFM 0.988 -
+contig_1 2423992 2425119 29896768|AAP10046.1|AE016877.1|1|1|hblB|hblB|hemolytic_enterotoxin_HBL_binding_subunit_HblB 0.996 +
+contig_1 1750170 1751159 29897410|AAP10686.1|AE016877.1|1|1|plcA|plcA|phosphatidylinositol_diacylglycerol-lyase 1.000 +
+contig_1 3448511 3448927 446866507|WP_000943763.1|NG_050591.1|1|1|fosBx1|fosB_gen|fosfomycin_resistance_bacillithiol_transferase_FosBx1 0.998 -
+contig_1 1994015 1995253 29897180|AAP10457.1|AE016877.1|1|1|hlyII|hlyII|hemolysin_II_HlyII 1.000 +
+contig_1 2968066 2968986 1028083092|WP_063842248.1|NG_047482.1|1|1|bla1|bla1|class_A_beta-lactamase_Bla1 0.976 -
+contig_1 2079369 2080142 1028078998|WP_063839879.1|NG_056058.1|1|1|BcII|BcII|BcII_family_subclass_B1_metallo-beta-lactamase 0.963 +
+contig_1 3591341 3592420 29895498|AAP08785.1|AE016877.1|1|1|nheC|nheC|non-hemolytic_enterotoxin_NHE_subunit_C 0.999 -
+contig_1 2425495 2426895 6686906|CAB64770.1|AJ007794.1|1|1|hblA|hblA|hemolytic_enterotoxin_HBL_binding_subunit_HblA 0.988 +