Mercurial > repos > nml > promer
view promer_substitutions.py @ 0:bca6b5cb87cd draft default tip
"planemo upload for repository https://github.com/phac-nml/promer commit 09ae227a84e31c9c56f58815b1a0c8c0e0a7900e"
author | nml |
---|---|
date | Tue, 17 Dec 2019 15:55:27 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python import os import subprocess import sys from Bio import SeqIO from Bio.SeqIO import FastaIO """ # ============================================================================= Copyright Government of Canada 2018 Written by: Eric Marinier, Public Health Agency of Canada, National Microbiology Laboratory Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. # ============================================================================= """ __version__ = '0.2.0' FASTA_DIRECTORY = "fasta" REF_START = 2 REF_END = 3 HEADER_ROW = "[P1]\t[SUB]\t[SUB]\t[P2]\t[BUFF]\t[DIST]\ \t[R]\t[Q]\t[FRM]\t[FRM]\t[TAG]\t[TAG]\n" def reorient_file(fasta_location, start, end): record = list(SeqIO.parse(fasta_location, "fasta"))[0] # reversed if start > end: record.seq = record.seq[(end - 1):start].reverse_complement() # same orientation else: record.seq = record.seq[(start - 1):end] SeqIO.write(record, fasta_location, "fasta") def promer(reference_location, query_location): # promer subprocess.check_output( ['promer', reference_location, query_location], universal_newlines=True) # File locations from arguments: reference_location = sys.argv[1] query_location = sys.argv[2] # Make directories: if not os.path.exists(FASTA_DIRECTORY): os.mkdir(FASTA_DIRECTORY) # Read query FASTA: query = list(SeqIO.parse(query_location, "fasta")) fasta_locations = [] # Prepare final output: snps_file = open("snps.tab", "w") snps_file.write(HEADER_ROW) # Split the query FASTA file into multiple files, each with one record: # (This is done to work around a bug in promer.) for record in query: output_name = str(record.id) + ".fasta" output_location = os.path.join(FASTA_DIRECTORY, output_name) fasta_locations.append(output_location) SeqIO.write(record, output_location, "fasta") # Run promer on each (new) FASTA file: for fasta_location in fasta_locations: promer(reference_location, fasta_location) # show-coords output = subprocess.check_output( ['show-coords', '-THrcl', 'out.delta'], universal_newlines=True) if not output: continue ref_start = int(output.split()[REF_START]) ref_end = int(output.split()[REF_END]) reorient_file(fasta_location, ref_start, ref_end) promer(reference_location, fasta_location) # show snps output = str(subprocess.check_output( ['show-snps', '-T', '-q', 'out.delta'], universal_newlines=True)) output = output.split("\n")[4:] output = "\n".join(output) snps_file.write(output) snps_file.close() # Write all FASTA files into one output: output_location = "contigs.fasta" records = [] for fasta_location in fasta_locations: record = list(SeqIO.parse(fasta_location, "fasta"))[0] records.append(record) contigs_file = open(output_location, 'w') fasta_writer = FastaIO.FastaWriter(contigs_file, wrap=None) fasta_writer.write_file(records) contigs_file.close()