| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 import argparse | 
|  | 4 import csv | 
|  | 5 import os | 
|  | 6 | 
|  | 7 import numpy | 
|  | 8 import pandas | 
|  | 9 import matplotlib.pyplot as pyplot | 
|  | 10 | 
|  | 11 | 
| 1 | 12 def get_amr_in_feature_hits(amr_feature_hits): | 
|  | 13     for k in amr_feature_hits.keys(): | 
| 0 | 14         if k.lower().find('amr') >= 0: | 
| 1 | 15             return amr_feature_hits[k] | 
| 0 | 16     return None | 
|  | 17 | 
|  | 18 | 
| 1 | 19 def draw_amr_matrix(amr_feature_hits_files, amr_deletions_file, amr_mutations_file, amr_gene_drug_file, output_dir): | 
| 0 | 20     ofh = open('process_log', 'w') | 
|  | 21 | 
| 1 | 22     # Read amr_feature_hits_files. | 
|  | 23     amr_feature_hits = pandas.Series(dtype=object) | 
|  | 24     for amr_feature_hits_file in amr_feature_hits_files: | 
|  | 25         feature_name = os.path.basename(amr_feature_hits_file) | 
| 0 | 26         # Make sure the file is not empty. | 
| 1 | 27         if os.path.isfile(amr_feature_hits_file) and os.path.getsize(amr_feature_hits_file) > 0: | 
|  | 28             best_hits = pandas.read_csv(filepath_or_buffer=amr_feature_hits_file, sep='\t', header=None) | 
|  | 29             ofh.write("\nFeature file %s will be processed\n" % os.path.basename(amr_feature_hits_file)) | 
| 0 | 30         else: | 
| 1 | 31             ofh.write("\nEmpty feature file %s will NOT be processed\n" % os.path.basename(amr_feature_hits_file)) | 
| 0 | 32             best_hits = None | 
| 1 | 33         amr_feature_hits[feature_name] = best_hits | 
| 0 | 34 | 
| 1 | 35     amr_hits = get_amr_in_feature_hits(amr_feature_hits) | 
| 0 | 36     ofh.write("\namr_hits:\n%s\n" % str(amr_hits)) | 
|  | 37     if amr_hits is not None: | 
|  | 38         amr_to_draw = pandas.DataFrame(columns=['gene', 'drug']) | 
|  | 39         ofh.write("\namr_to_draw:\n%s\n" % str(amr_to_draw)) | 
|  | 40         # Read amr_drug_gene_file. | 
|  | 41         amr_gene_drug = pandas.read_csv(amr_gene_drug_file, index_col=None, sep='\t', quoting=csv.QUOTE_NONE, header=None) | 
|  | 42         ofh.write("\namr_gene_drug:\n%s\n" % str(amr_gene_drug)) | 
|  | 43 | 
|  | 44         # Roll up AMR gene hits. | 
|  | 45         ofh.write("\namr_hits.shape[0]:%s\n" % str(amr_hits.shape[0])) | 
|  | 46         if amr_hits.shape[0] > 0: | 
|  | 47             for gene_idx, gene in amr_hits.iterrows(): | 
|  | 48                 ofh.write("gene_idx:%s\n" % str(gene_idx)) | 
|  | 49                 ofh.write("gene:%s\n" % str(gene)) | 
|  | 50                 gene_name = gene[3] | 
|  | 51                 ofh.write("gene_name: %s\n" % str(gene_name)) | 
|  | 52                 ofh.write("amr_gene_drug[0]: %s\n" % str(amr_gene_drug[0])) | 
|  | 53                 drugs = amr_gene_drug.loc[amr_gene_drug[0] == gene_name, :][1] | 
|  | 54                 ofh.write("drugs:%s\n" % str(drugs)) | 
|  | 55                 for drug in drugs: | 
|  | 56                     amr_to_draw = amr_to_draw.append(pandas.Series([gene_name, drug], name=amr_to_draw.shape[0], index=amr_to_draw.columns)) | 
|  | 57                     ofh.write("\amr_to_draw:%s\n" % str(amr_to_draw)) | 
|  | 58 | 
| 1 | 59         if amr_mutations_file is not None: | 
|  | 60             # TODO: So far, no samples have produced mutations, so we haven't been able | 
|  | 61             # to produce a populated VarScan VCF file of mutations - https://github.com/appliedbinf/pima_md/blob/main/pima.py#L2923. | 
|  | 62             # The call_amr_mutations Galaxy tool will currently produce this VarScan VCF file, but we need a sample that | 
|  | 63             # will produce a populated file.  After we find one, we'll need to figure out how to implement this loop | 
|  | 64             # https://github.com/appliedbinf/pima_md/blob/main/pima.py#L2925 in a Galaxy tool so that the VarScan VCF | 
| 3 | 65             # file will be converted to the TSV amr_mutations_file that thsi tool expects. | 
|  | 66             amr_mutations = pandas.DataFrame() | 
| 0 | 67             # Roll up potentially resistance conferring mutations. | 
|  | 68             for mutation_region, mutation_hits in amr_mutations.iteritems(): | 
|  | 69                 for mutation_idx, mutation_hit in mutation_hits.iterrows(): | 
|  | 70                     mutation_name = mutation_region + ' ' + mutation_hit['REF'] + '->' + mutation_hit['ALT'] | 
|  | 71                     amr_to_draw = amr_to_draw.append(pandas.Series([mutation_name, mutation_hit['DRUG']], name=amr_to_draw.shape[0], index=amr_to_draw.columns)) | 
|  | 72 | 
| 1 | 73         if amr_deletions_file is not None: | 
|  | 74             # TODO: So far, no samples have produced deletions, but we do have all the pices in place | 
|  | 75             # within the workflow to receive the amr_deletions_file here, although it is currently | 
|  | 76             # always empty... | 
| 0 | 77             # Roll up deletions that might confer resistance. | 
| 3 | 78             try: | 
|  | 79                 amr_deletions = pandas.read_csv(filepath_or_buffer=amr_deletions_file, sep='\t', header=None) | 
|  | 80             except Exception: | 
|  | 81                 amr_deletions = pandas.DataFrame() | 
| 1 | 82             if amr_deletions.shape[0] > 0: | 
|  | 83                 amr_deletions.columns = ['contig', 'start', 'stop', 'name', 'type', 'drug', 'note'] | 
|  | 84                 amr_deletions = amr_deletions.loc[amr_deletions['type'].isin(['large-deletion', 'any']), :] | 
|  | 85                 for deletion_idx, deleted_gene in amr_deletions.iterrows(): | 
|  | 86                     amr_to_draw = amr_to_draw.append(pandas.Series(['\u0394' + deleted_gene[3], deleted_gene[5]], name=amr_to_draw.shape[0], index=amr_to_draw.columns)) | 
| 0 | 87 | 
|  | 88         if amr_to_draw.shape[0] > 1: | 
|  | 89             ofh.write("\nDrawing AMR matrix...\n") | 
|  | 90             present_genes = amr_to_draw['gene'].unique() | 
|  | 91             present_drugs = amr_to_draw['drug'].unique() | 
|  | 92             amr_matrix = pandas.DataFrame(0, index=present_genes, columns=present_drugs) | 
|  | 93             for hit_idx, hit in amr_to_draw.iterrows(): | 
|  | 94                 amr_matrix.loc[hit[0], hit[1]] = 1 | 
|  | 95             amr_matrix_png = os.path.join(output_dir, 'amr_matrix.png') | 
|  | 96             int_matrix = amr_matrix[amr_matrix.columns].astype(int) | 
|  | 97             figure, axis = pyplot.subplots() | 
|  | 98             axis.invert_yaxis() | 
|  | 99             axis.set_yticks(numpy.arange(0.5, len(amr_matrix.index)), minor=False) | 
|  | 100             axis.set_yticklabels(int_matrix.index.values) | 
|  | 101             axis.set_xticks(numpy.arange(0.5, len(amr_matrix.columns)), minor=False) | 
|  | 102             axis.set_xticklabels(amr_matrix.columns.values, rotation=90) | 
|  | 103             axis.xaxis.tick_top() | 
|  | 104             axis.xaxis.set_label_position('top') | 
|  | 105             pyplot.tight_layout() | 
|  | 106             pyplot.savefig(amr_matrix_png, dpi=300) | 
|  | 107         else: | 
|  | 108             ofh.write("\nEmpty AMR matrix, nothing to draw...\n") | 
|  | 109     ofh.close() | 
|  | 110 | 
|  | 111 | 
|  | 112 if __name__ == '__main__': | 
|  | 113     parser = argparse.ArgumentParser() | 
|  | 114 | 
| 1 | 115     parser.add_argument('--amr_feature_hits_dir', action='store', dest='amr_feature_hits_dir', help='Directory of tabular files containing feature hits') | 
|  | 116     parser.add_argument('--amr_deletions_file', action='store', dest='amr_deletions_file', default=None, help='AMR deletions BED file') | 
|  | 117     parser.add_argument('--amr_mutations_file', action='store', dest='amr_mutations_file', default=None, help='AMR mutations TSV file') | 
| 0 | 118     parser.add_argument('--amr_gene_drug_file', action='store', dest='amr_gene_drug_file', help='AMR_gene_drugs tsv file') | 
|  | 119     parser.add_argument('--output_dir', action='store', dest='output_dir', help='Output directory') | 
|  | 120 | 
|  | 121     args = parser.parse_args() | 
|  | 122 | 
|  | 123     # Get thge collection of feature hits files.  The collection | 
|  | 124     # will be sorted alphabetically and will contain 2 files | 
|  | 125     # named something like AMR_CDS_311_2022_12_20.fasta and | 
|  | 126     # Incompatibility_Groups_2023_01_01.fasta. | 
| 1 | 127     amr_feature_hits_files = [] | 
|  | 128     for file_name in sorted(os.listdir(args.amr_feature_hits_dir)): | 
|  | 129         file_path = os.path.abspath(os.path.join(args.amr_feature_hits_dir, file_name)) | 
|  | 130         amr_feature_hits_files.append(file_path) | 
| 0 | 131 | 
| 1 | 132     draw_amr_matrix(amr_feature_hits_files, args.amr_deletions_file, args.amr_mutations_file, args.amr_gene_drug_file, args.output_dir) |