Mercurial > repos > nml > stringmlst
diff split_by_allele.py @ 1:4e03573653fe draft default tip
planemo upload commit 008f4667b70be22e9ddf496738b3f74bb942ed28
author | nml |
---|---|
date | Tue, 19 Sep 2017 16:34:57 -0400 |
parents | fc0f15ca12e0 |
children |
line wrap: on
line diff
--- a/split_by_allele.py Mon Oct 24 13:15:20 2016 -0400 +++ b/split_by_allele.py Tue Sep 19 16:34:57 2017 -0400 @@ -1,62 +1,63 @@ #!/usr/bin/env python import getopt +import os import sys -import os + from Bio import SeqIO -def split_allele_file(alleles,profiles): - +ERROR_MSG = "Error could not parse out allele name and number from '%s'" + + +def split_allele_file(alleles, profiles): + writers = {} handle = open(alleles, "rU") for record in SeqIO.parse(handle, "fasta"): - - seqid=record.id - - #split out the alelle name from the version number - #attempting to split based on '-' first, if that fails, then '_' + + seqid = record.id + + # split out the alelle name from the version number + # attempting to split based on '-' first, if that fails, then '_' result = seqid.split('_') - - if len(result) !=2: + + if len(result) != 2: result = seqid.split('-') - if len(result) ==2: + if len(result) == 2: newid = '_'.join(result) record.id = newid else: - print "Error could not parse out allele name and number from '%s'" % seqid + print(ERROR_MSG % seqid) exit(0) - - - name,num = result + name, num = result - #if writer exist, then write to that current fasta file + # if writer exist, then write to that current fasta file if name in writers: SeqIO.write(record, writers[name], "fasta") else: - #new allele found, create new writer and add the first record + # new allele found, create new writer and add the first record file_name = name + '.fasta' output_fh = open(file_name, "w") SeqIO.write(record, output_fh, "fasta") writers[name] = output_fh - + handle.close() - #creat config file based on the alleles found - with open('config.txt','w') as cfile: + # create config file based on the alleles found + with open('config.txt', 'w') as cfile: cfile.write("[loci]\n") - for name, writer in writers.iteritems() : + for name, writer in writers.items(): path = os.path.realpath(writer.name) - cfile.write("%s\t%s\n" % (name,path)) + cfile.write("%s\t%s\n" % (name, path)) cfile.write("[profile]\n") cfile.write("profile\t%s\n" % profiles) - return -alleles=None -profiles=None +alleles = None +profiles = None """Input arguments""" options, remainder = getopt.getopt(sys.argv[1:], '', [ @@ -71,5 +72,4 @@ profiles = arg if alleles and profiles: - split_allele_file(alleles,profiles) - + split_allele_file(alleles, profiles)