Mercurial > repos > nml > stringmlst
view split_by_allele.py @ 0:fc0f15ca12e0 draft
planemo upload commit 0366addb646f1ddea484915abdeda939d7d49bd5
author | nml |
---|---|
date | Mon, 24 Oct 2016 13:15:20 -0400 |
parents | |
children | 4e03573653fe |
line wrap: on
line source
#!/usr/bin/env python import getopt import sys import os from Bio import SeqIO def split_allele_file(alleles,profiles): writers = {} handle = open(alleles, "rU") for record in SeqIO.parse(handle, "fasta"): seqid=record.id #split out the alelle name from the version number #attempting to split based on '-' first, if that fails, then '_' result = seqid.split('_') if len(result) !=2: result = seqid.split('-') if len(result) ==2: newid = '_'.join(result) record.id = newid else: print "Error could not parse out allele name and number from '%s'" % seqid exit(0) name,num = result #if writer exist, then write to that current fasta file if name in writers: SeqIO.write(record, writers[name], "fasta") else: #new allele found, create new writer and add the first record file_name = name + '.fasta' output_fh = open(file_name, "w") SeqIO.write(record, output_fh, "fasta") writers[name] = output_fh handle.close() #creat config file based on the alleles found with open('config.txt','w') as cfile: cfile.write("[loci]\n") for name, writer in writers.iteritems() : path = os.path.realpath(writer.name) cfile.write("%s\t%s\n" % (name,path)) cfile.write("[profile]\n") cfile.write("profile\t%s\n" % profiles) return alleles=None profiles=None """Input arguments""" options, remainder = getopt.getopt(sys.argv[1:], '', [ 'alleles=', 'profiles=' ]) for opt, arg in options: if opt in ('--alleles'): alleles = arg elif opt in ('--profiles'): profiles = arg if alleles and profiles: split_allele_file(alleles,profiles)