Mercurial > repos > public-health-bioinformatics > pick_plasmids_containing_genes
comparison pick_plasmids_containing_genes.py @ 0:62019f5116f8 draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/tree/master/tools/pick_plasmids_containing_genes commit af16cdb29dcdc9ad2ff1effb5cf4e23a8c98a764"
| author | public-health-bioinformatics |
|---|---|
| date | Tue, 12 Nov 2019 22:20:54 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:62019f5116f8 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 from __future__ import print_function | |
| 4 | |
| 5 import argparse | |
| 6 import csv | |
| 7 import errno | |
| 8 import os | |
| 9 import re | |
| 10 import shutil | |
| 11 | |
| 12 | |
| 13 def parse_screen_file(screen_file): | |
| 14 screen = [] | |
| 15 with open(screen_file) as f: | |
| 16 reader = csv.DictReader(f, delimiter="\t", quotechar='"') | |
| 17 for row in reader: | |
| 18 screen.append(row) | |
| 19 return screen | |
| 20 | |
| 21 | |
| 22 def main(args): | |
| 23 # create output directory | |
| 24 try: | |
| 25 os.mkdir(args.outdir) | |
| 26 except OSError as exc: | |
| 27 if exc.errno == errno.EEXIST and os.path.isdir(args.outdir): | |
| 28 pass | |
| 29 else: | |
| 30 raise | |
| 31 | |
| 32 # parse screening file | |
| 33 screen = parse_screen_file(args.abricate_report_screening_file) | |
| 34 contigs_with_genes_of_interest = [] | |
| 35 # parse all abricate reports and determine which ones contain genes of interest | |
| 36 print("\t".join(["file", "gene_detected"])) | |
| 37 | |
| 38 with open(args.concatenated_abricate_reports, 'r') as f: | |
| 39 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"') | |
| 40 for gene in screen: | |
| 41 for abricate_report_row in abricate_report_reader: | |
| 42 if abricate_report_row['#FILE'] == '#FILE': | |
| 43 continue | |
| 44 if re.search(gene['regex'], abricate_report_row['GENE']): | |
| 45 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE']) | |
| 46 f.seek(0) | |
| 47 next(abricate_report_reader) | |
| 48 | |
| 49 # copy the corresponding plasmid fasta files into outdir | |
| 50 for contig in contigs_with_genes_of_interest: | |
| 51 for plasmid in args.plasmids: | |
| 52 copy_plasmid = False | |
| 53 with open(plasmid, 'r') as f: | |
| 54 for line in f: | |
| 55 if ('>' + contig) == line.rstrip(): | |
| 56 copy_plasmid = True | |
| 57 if copy_plasmid: | |
| 58 print("\t".join([plasmid, "True"])) | |
| 59 shutil.copy2(plasmid, args.outdir) | |
| 60 | |
| 61 | |
| 62 if __name__ == '__main__': | |
| 63 parser = argparse.ArgumentParser() | |
| 64 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)") | |
| 65 parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)") | |
| 66 parser.add_argument("--abricate_report_screening_file", help="") | |
| 67 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") | |
| 68 args = parser.parse_args() | |
| 69 main(args) |
