# HG changeset patch # User nml # Date 1576616127 18000 # Node ID bca6b5cb87cd0548d398f9809c183044c79bab1c "planemo upload for repository https://github.com/phac-nml/promer commit 09ae227a84e31c9c56f58815b1a0c8c0e0a7900e" diff -r 000000000000 -r bca6b5cb87cd promer4_substitions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/promer4_substitions.xml Tue Dec 17 15:55:27 2019 -0500 @@ -0,0 +1,59 @@ + + aligns two sets of contigs and reports amino acid substitutions between them + + python + mummer4 + biopython + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r bca6b5cb87cd promer_substitutions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/promer_substitutions.py Tue Dec 17 15:55:27 2019 -0500 @@ -0,0 +1,126 @@ +#!/usr/bin/env python + +import os +import subprocess +import sys + +from Bio import SeqIO +from Bio.SeqIO import FastaIO + +""" +# ============================================================================= +Copyright Government of Canada 2018 +Written by: Eric Marinier, Public Health Agency of Canada, + National Microbiology Laboratory +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at: +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +# ============================================================================= +""" + +__version__ = '0.2.0' + +FASTA_DIRECTORY = "fasta" + +REF_START = 2 +REF_END = 3 + +HEADER_ROW = "[P1]\t[SUB]\t[SUB]\t[P2]\t[BUFF]\t[DIST]\ + \t[R]\t[Q]\t[FRM]\t[FRM]\t[TAG]\t[TAG]\n" + + +def reorient_file(fasta_location, start, end): + + record = list(SeqIO.parse(fasta_location, "fasta"))[0] + + # reversed + if start > end: + record.seq = record.seq[(end - 1):start].reverse_complement() + + # same orientation + else: + record.seq = record.seq[(start - 1):end] + + SeqIO.write(record, fasta_location, "fasta") + + +def promer(reference_location, query_location): + + # promer + subprocess.check_output( + ['promer', reference_location, query_location], + universal_newlines=True) + + +# File locations from arguments: +reference_location = sys.argv[1] +query_location = sys.argv[2] + +# Make directories: +if not os.path.exists(FASTA_DIRECTORY): + os.mkdir(FASTA_DIRECTORY) + +# Read query FASTA: +query = list(SeqIO.parse(query_location, "fasta")) +fasta_locations = [] + +# Prepare final output: +snps_file = open("snps.tab", "w") +snps_file.write(HEADER_ROW) + +# Split the query FASTA file into multiple files, each with one record: +# (This is done to work around a bug in promer.) +for record in query: + + output_name = str(record.id) + ".fasta" + output_location = os.path.join(FASTA_DIRECTORY, output_name) + + fasta_locations.append(output_location) + SeqIO.write(record, output_location, "fasta") + +# Run promer on each (new) FASTA file: +for fasta_location in fasta_locations: + + promer(reference_location, fasta_location) + + # show-coords + output = subprocess.check_output( + ['show-coords', '-THrcl', 'out.delta'], universal_newlines=True) + + if not output: + continue + + ref_start = int(output.split()[REF_START]) + ref_end = int(output.split()[REF_END]) + + reorient_file(fasta_location, ref_start, ref_end) + promer(reference_location, fasta_location) + + # show snps + output = str(subprocess.check_output( + ['show-snps', '-T', '-q', 'out.delta'], universal_newlines=True)) + output = output.split("\n")[4:] + output = "\n".join(output) + + snps_file.write(output) + +snps_file.close() + +# Write all FASTA files into one output: +output_location = "contigs.fasta" +records = [] + +for fasta_location in fasta_locations: + + record = list(SeqIO.parse(fasta_location, "fasta"))[0] + records.append(record) + +contigs_file = open(output_location, 'w') +fasta_writer = FastaIO.FastaWriter(contigs_file, wrap=None) +fasta_writer.write_file(records) +contigs_file.close() diff -r 000000000000 -r bca6b5cb87cd test-data/contigs.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/contigs.fasta Tue Dec 17 15:55:27 2019 -0500 @@ -0,0 +1,24 @@ +>NODE_1_length_6 +GATACA +>NODE_2_length_6 +GATACA +>NODE_3_length_6 +GATACA +>NODE_4_length_6 +GATACA +>NODE_5_length_6 +GATACA +>NODE_6_length_6 +GATACA +>NODE_7_length_6 +GATACA +>NODE_8_length_6 +GATACA +>NODE_9_length_6 +GATACA +>NODE_10_length_6 +GATACA +>NODE_11_length_6 +GATACA +>NODE_12_length_6 +GATACA diff -r 000000000000 -r bca6b5cb87cd test-data/query.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/query.fasta Tue Dec 17 15:55:27 2019 -0500 @@ -0,0 +1,24 @@ +>NODE_1_length_6 +GATACA +>NODE_2_length_6 +GATACA +>NODE_3_length_6 +GATACA +>NODE_4_length_6 +GATACA +>NODE_5_length_6 +GATACA +>NODE_6_length_6 +GATACA +>NODE_7_length_6 +GATACA +>NODE_8_length_6 +GATACA +>NODE_9_length_6 +GATACA +>NODE_10_length_6 +GATACA +>NODE_11_length_6 +GATACA +>NODE_12_length_6 +GATACA diff -r 000000000000 -r bca6b5cb87cd test-data/reference.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reference.fasta Tue Dec 17 15:55:27 2019 -0500 @@ -0,0 +1,12 @@ +>NODE_1_length_300 +CCAAACTCTGTAAAATCCCTATGATTAGGGACACAGAGTGAGAACCAAACTCTCCCTACGGGCAACATCAGCCTAGGAAGCCCAATCGTCTTTAGCGGTTGGGCACTTCACCTCTTTTCCATCTATAGCTCTGACACGCTCCAAATCCACAAAATGCAAAAATTCTTGATTGATTAGATTGTAGTAAAAATGCGCATGAATTTTTTTATACTCTTCAAGCGTTTTAGTCTCAATCTTTTCTAAAATAGGCTCTAACAGATCCAACTCACCCCCAAACCCTAAAAAACCTTTTAATTCTTCA +>NODE_2_length_300 +CCAAACTCTGTAAAATCCCTATGATTAGGGACACAGAGTGAGAACCAAACTCTCCCTACGGGCAACATCAGCCTAGGAAGCCCAATCGTCTTTAGCGGTTGGGCACTTCACCTCTTTTCCATCTATAGCTCTGACACGCTCCAAATCCACAAAATGCAAAAATTCTTGATTGATTAGATTGTAGTAAAAATGCGCATGAATTTTTTTATACTCTTCAAGCGTTTTAGTCTCAATCTTTTCTAAAATAGGCTCTAACAGATCCAACTCACCCCCAAACCCTAAAAAACCTTTTAATTCTTCA +>NODE_3_length_300 +CCAAACTCTGTAAAATCCCTATGATTAGGGACACAGAGTGAGAACCAAACTCTCCCTACGGGCAACATCAGCCTAGGAAGCCCAATCGTCTTTAGCGGTTGGGCACTTCACCTCTTTTCCATCTATAGCTCTGACACGCTCCAAATCCACAAAATGCAAAAATTCTTGATTGATTAGATTGTAGTAAAAATGCGCATGAATTTTTTTATACTCTTCAAGCGTTTTAGTCTCAATCTTTTCTAAAATAGGCTCTAACAGATCCAACTCACCCCCAAACCCTAAAAAACCTTTTAATTCTTCA +>NODE_4_length_300 +CCAAACTCTGTAAAATCCCTATGATTAGGGACACAGAGTGAGAACCAAACTCTCCCTACGGGCAACATCAGCCTAGGAAGCCCAATCGTCTTTAGCGGTTGGGCACTTCACCTCTTTTCCATCTATAGCTCTGACACGCTCCAAATCCACAAAATGCAAAAATTCTTGATTGATTAGATTGTAGTAAAAATGCGCATGAATTTTTTTATACTCTTCAAGCGTTTTAGTCTCAATCTTTTCTAAAATAGGCTCTAACAGATCCAACTCACCCCCAAACCCTAAAAAACCTTTTAATTCTTCA +>NODE_5_length_300 +CCAAACTCTGTAAAATCCCTATGATTAGGGACACAGAGTGAGAACCAAACTCTCCCTACGGGCAACATCAGCCTAGGAAGCCCAATCGTCTTTAGCGGTTGGGCACTTCACCTCTTTTCCATCTATAGCTCTGACACGCTCCAAATCCACAAAATGCAAAAATTCTTGATTGATTAGATTGTAGTAAAAATGCGCATGAATTTTTTTATACTCTTCAAGCGTTTTAGTCTCAATCTTTTCTAAAATAGGCTCTAACAGATCCAACTCACCCCCAAACCCTAAAAAACCTTTTAATTCTTCA +>NODE_6_length_300 +CCAAACTCTGTAAAATCCCTATGATTAGGGACACAGAGTGAGAACCAAACTCTCCCTACGGGCAACATCAGCCTAGGAAGCCCAATCGTCTTTAGCGGTTGGGCACTTCACCTCTTTTCCATCTATAGCTCTGACACGCTCCAAATCCACAAAATGCAAAAATTCTTGATTGATTAGATTGTAGTAAAAATGCGCATGAATTTTTTTATACTCTTCAAGCGTTTTAGTCTCAATCTTTTCTAAAATAGGCTCTAACAGATCCAACTCACCCCCAAACCCTAAAAAACCTTTTAATTCTTCA diff -r 000000000000 -r bca6b5cb87cd test-data/snps.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/snps.tab Tue Dec 17 15:55:27 2019 -0500 @@ -0,0 +1,1 @@ +[P1] [SUB] [SUB] [P2] [BUFF] [DIST] [R] [Q] [FRM] [FRM] [TAG] [TAG]