Mercurial > repos > p.lucas > get_iupac_percentage
changeset 0:21e3c46d9656 draft
Uploaded script
author | p.lucas |
---|---|
date | Tue, 11 Jun 2024 09:42:34 +0000 |
parents | |
children | 2dc9d2efce29 |
files | FA_IUPAC_count.py |
diffstat | 1 files changed, 54 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FA_IUPAC_count.py Tue Jun 11 09:42:34 2024 +0000 @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# Libraries to import: +from __future__ import division +from Bio import SeqIO +import argparse +import os +import sys + + +# MAIN +def __main__(): + # Script options: + parser = argparse.ArgumentParser(description="""Count IUPAC base in (multi)fasta file.""", + epilog="""This script need few options, use -h to see it.""") + parser.add_argument("-i", "-inputfile", dest="input_file", help="Fasta file to count.") + parser.add_argument("-o", "-output_file", dest="output_file", help="Output File") + # Get script options: + options = parser.parse_args() + input_file = options.input_file + output_file = options.output_file + # Check options: + if len(sys.argv) < 5 or len(sys.argv) > 5: + parser.print_help() + sys.exit(1) + # Variables: + outputfile = open(output_file, "w") + + # Count function: + def count_dna(seq, allowed_bases=['A', 'T', 'G', 'C']): + seq = seq.upper() + total_dna_bases = 0 + for base in allowed_bases: + total_dna_bases = total_dna_bases + seq.count(base.upper()) + + if len(seq) != 0: + dna_fraction = total_dna_bases / len(seq) + return (round(dna_fraction * 100, 2), total_dna_bases) + else: + return (100, "CONSENSUS KO") + + # Read input file: + with open(input_file, "r") as handle: + for record in SeqIO.parse(handle, "fasta"): + iupac_list = ["R", "Y", "S", "W", "K", "M", "B", "D", "H", "V", "N"] + percIUPAC, countIUPAC = count_dna(record.seq, iupac_list) + # print(countIUPAC) + outputfile.write(record.id+'\t') + outputfile.write(str(percIUPAC)+'\n') + outputfile.close() + + +# MAIN END +if __name__ == "__main__": + __main__()