| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 import argparse | 
|  | 4 import csv | 
|  | 5 import os | 
|  | 6 | 
|  | 7 import numpy | 
|  | 8 import pandas | 
|  | 9 import matplotlib.pyplot as pyplot | 
|  | 10 | 
|  | 11 | 
|  | 12 def get_amr_in_feature_hits(feature_hits): | 
|  | 13     for k in feature_hits.keys(): | 
|  | 14         if k.lower().find('amr') >= 0: | 
|  | 15             return feature_hits[k] | 
|  | 16     return None | 
|  | 17 | 
|  | 18 | 
|  | 19 def draw_amr_matrix(feature_hits_files, amr_gene_drug_file, output_dir): | 
|  | 20     ofh = open('process_log', 'w') | 
|  | 21 | 
|  | 22     # Read feature_hits_files. | 
|  | 23     feature_hits = pandas.Series(dtype=object) | 
|  | 24     for feature_hits_file in feature_hits_files: | 
|  | 25         feature_name = os.path.basename(feature_hits_file) | 
|  | 26         # Make sure the file is not empty. | 
|  | 27         if os.path.isfile(feature_hits_file) and os.path.getsize(feature_hits_file) > 0: | 
|  | 28             best_hits = pandas.read_csv(filepath_or_buffer=feature_hits_file, sep='\t', header=None) | 
|  | 29             ofh.write("\nFeature file %s will be processed\n" % os.path.basename(feature_hits_file)) | 
|  | 30         else: | 
|  | 31             ofh.write("\nEmpty feature file %s will NOT be processed\n" % os.path.basename(feature_hits_file)) | 
|  | 32             best_hits = None | 
|  | 33         feature_hits[feature_name] = best_hits | 
|  | 34 | 
|  | 35     amr_hits = get_amr_in_feature_hits(feature_hits) | 
|  | 36     ofh.write("\namr_hits:\n%s\n" % str(amr_hits)) | 
|  | 37     if amr_hits is not None: | 
|  | 38         amr_to_draw = pandas.DataFrame(columns=['gene', 'drug']) | 
|  | 39         ofh.write("\namr_to_draw:\n%s\n" % str(amr_to_draw)) | 
|  | 40         # Read amr_drug_gene_file. | 
|  | 41         amr_gene_drug = pandas.read_csv(amr_gene_drug_file, index_col=None, sep='\t', quoting=csv.QUOTE_NONE, header=None) | 
|  | 42         ofh.write("\namr_gene_drug:\n%s\n" % str(amr_gene_drug)) | 
|  | 43 | 
|  | 44         # Roll up AMR gene hits. | 
|  | 45         ofh.write("\namr_hits.shape[0]:%s\n" % str(amr_hits.shape[0])) | 
|  | 46         if amr_hits.shape[0] > 0: | 
|  | 47             for gene_idx, gene in amr_hits.iterrows(): | 
|  | 48                 ofh.write("gene_idx:%s\n" % str(gene_idx)) | 
|  | 49                 ofh.write("gene:%s\n" % str(gene)) | 
|  | 50                 gene_name = gene[3] | 
|  | 51                 ofh.write("gene_name: %s\n" % str(gene_name)) | 
|  | 52                 ofh.write("amr_gene_drug[0]: %s\n" % str(amr_gene_drug[0])) | 
|  | 53                 drugs = amr_gene_drug.loc[amr_gene_drug[0] == gene_name, :][1] | 
|  | 54                 ofh.write("drugs:%s\n" % str(drugs)) | 
|  | 55                 for drug in drugs: | 
|  | 56                     amr_to_draw = amr_to_draw.append(pandas.Series([gene_name, drug], name=amr_to_draw.shape[0], index=amr_to_draw.columns)) | 
|  | 57                     ofh.write("\amr_to_draw:%s\n" % str(amr_to_draw)) | 
|  | 58 | 
|  | 59         amr_mutations = pandas.Series(dtype=object) | 
|  | 60         if amr_mutations.shape[0] > 0: | 
|  | 61             # Roll up potentially resistance conferring mutations. | 
|  | 62             # TODO: may need to populate this if we need to use call_amr_mutations. | 
|  | 63             for mutation_region, mutation_hits in amr_mutations.iteritems(): | 
|  | 64                 for mutation_idx, mutation_hit in mutation_hits.iterrows(): | 
|  | 65                     mutation_name = mutation_region + ' ' + mutation_hit['REF'] + '->' + mutation_hit['ALT'] | 
|  | 66                     amr_to_draw = amr_to_draw.append(pandas.Series([mutation_name, mutation_hit['DRUG']], name=amr_to_draw.shape[0], index=amr_to_draw.columns)) | 
|  | 67 | 
|  | 68         amr_deletions = pandas.DataFrame() | 
|  | 69         if amr_deletions.shape[0] > 0: | 
|  | 70             # Roll up deletions that might confer resistance. | 
|  | 71             # TODO: may need to populate this if we need to use call_insertions. | 
|  | 72             for deletion_idx, deleted_gene in amr_deletions.iterrows(): | 
|  | 73                 amr_to_draw = amr_to_draw.append(pandas.Series(['\u0394' + deleted_gene[3], deleted_gene[5]], name=amr_to_draw.shape[0], index=amr_to_draw.columns)) | 
|  | 74 | 
|  | 75         if amr_to_draw.shape[0] > 1: | 
|  | 76             ofh.write("\nDrawing AMR matrix...\n") | 
|  | 77             present_genes = amr_to_draw['gene'].unique() | 
|  | 78             present_drugs = amr_to_draw['drug'].unique() | 
|  | 79             amr_matrix = pandas.DataFrame(0, index=present_genes, columns=present_drugs) | 
|  | 80             for hit_idx, hit in amr_to_draw.iterrows(): | 
|  | 81                 amr_matrix.loc[hit[0], hit[1]] = 1 | 
|  | 82             amr_matrix_png = os.path.join(output_dir, 'amr_matrix.png') | 
|  | 83             int_matrix = amr_matrix[amr_matrix.columns].astype(int) | 
|  | 84             figure, axis = pyplot.subplots() | 
|  | 85             axis.invert_yaxis() | 
|  | 86             axis.set_yticks(numpy.arange(0.5, len(amr_matrix.index)), minor=False) | 
|  | 87             axis.set_yticklabels(int_matrix.index.values) | 
|  | 88             axis.set_xticks(numpy.arange(0.5, len(amr_matrix.columns)), minor=False) | 
|  | 89             axis.set_xticklabels(amr_matrix.columns.values, rotation=90) | 
|  | 90             axis.xaxis.tick_top() | 
|  | 91             axis.xaxis.set_label_position('top') | 
|  | 92             pyplot.tight_layout() | 
|  | 93             pyplot.savefig(amr_matrix_png, dpi=300) | 
|  | 94         else: | 
|  | 95             ofh.write("\nEmpty AMR matrix, nothing to draw...\n") | 
|  | 96     ofh.close() | 
|  | 97 | 
|  | 98 | 
|  | 99 if __name__ == '__main__': | 
|  | 100     parser = argparse.ArgumentParser() | 
|  | 101 | 
|  | 102     parser.add_argument('--amr_gene_drug_file', action='store', dest='amr_gene_drug_file', help='AMR_gene_drugs tsv file') | 
|  | 103     parser.add_argument('--feature_hits_dir', action='store', dest='feature_hits_dir', help='Directory of tabular files containing feature hits') | 
|  | 104     parser.add_argument('--output_dir', action='store', dest='output_dir', help='Output directory') | 
|  | 105 | 
|  | 106     args = parser.parse_args() | 
|  | 107 | 
|  | 108     # Get thge collection of feature hits files.  The collection | 
|  | 109     # will be sorted alphabetically and will contain 2 files | 
|  | 110     # named something like AMR_CDS_311_2022_12_20.fasta and | 
|  | 111     # Incompatibility_Groups_2023_01_01.fasta. | 
|  | 112     feature_hits_files = [] | 
|  | 113     for file_name in sorted(os.listdir(args.feature_hits_dir)): | 
|  | 114         file_path = os.path.abspath(os.path.join(args.feature_hits_dir, file_name)) | 
|  | 115         feature_hits_files.append(file_path) | 
|  | 116 | 
|  | 117     draw_amr_matrix(feature_hits_files, args.amr_gene_drug_file, args.output_dir) |