Mercurial > repos > nml > stringmlst
view split_by_allele.py @ 1:4e03573653fe draft default tip
planemo upload commit 008f4667b70be22e9ddf496738b3f74bb942ed28
author | nml |
---|---|
date | Tue, 19 Sep 2017 16:34:57 -0400 |
parents | fc0f15ca12e0 |
children |
line wrap: on
line source
#!/usr/bin/env python import getopt import os import sys from Bio import SeqIO ERROR_MSG = "Error could not parse out allele name and number from '%s'" def split_allele_file(alleles, profiles): writers = {} handle = open(alleles, "rU") for record in SeqIO.parse(handle, "fasta"): seqid = record.id # split out the alelle name from the version number # attempting to split based on '-' first, if that fails, then '_' result = seqid.split('_') if len(result) != 2: result = seqid.split('-') if len(result) == 2: newid = '_'.join(result) record.id = newid else: print(ERROR_MSG % seqid) exit(0) name, num = result # if writer exist, then write to that current fasta file if name in writers: SeqIO.write(record, writers[name], "fasta") else: # new allele found, create new writer and add the first record file_name = name + '.fasta' output_fh = open(file_name, "w") SeqIO.write(record, output_fh, "fasta") writers[name] = output_fh handle.close() # create config file based on the alleles found with open('config.txt', 'w') as cfile: cfile.write("[loci]\n") for name, writer in writers.items(): path = os.path.realpath(writer.name) cfile.write("%s\t%s\n" % (name, path)) cfile.write("[profile]\n") cfile.write("profile\t%s\n" % profiles) return alleles = None profiles = None """Input arguments""" options, remainder = getopt.getopt(sys.argv[1:], '', [ 'alleles=', 'profiles=' ]) for opt, arg in options: if opt in ('--alleles'): alleles = arg elif opt in ('--profiles'): profiles = arg if alleles and profiles: split_allele_file(alleles, profiles)