view split_by_allele.py @ 0:fc0f15ca12e0 draft

planemo upload commit 0366addb646f1ddea484915abdeda939d7d49bd5
author nml
date Mon, 24 Oct 2016 13:15:20 -0400
parents
children 4e03573653fe
line wrap: on
line source

#!/usr/bin/env python
import getopt
import sys
import os
from Bio import SeqIO

def split_allele_file(alleles,profiles):
    
    writers = {}

    handle = open(alleles, "rU")
    for record in SeqIO.parse(handle, "fasta"):
        
        seqid=record.id
        
        #split out the alelle name from the version number
        #attempting to split based on '-' first, if that fails, then '_'
        result = seqid.split('_')
        
        if len(result) !=2:
            result = seqid.split('-')
            if len(result) ==2:
                newid = '_'.join(result)
                record.id = newid
            else:
                print "Error could not parse out allele name and number from '%s'" % seqid
                exit(0)
            
                
        name,num = result


        #if writer exist, then write to that current fasta file
        if name in writers:
            SeqIO.write(record, writers[name], "fasta")
        else:
            #new allele found, create new writer and add the first record
            file_name = name + '.fasta'
            output_fh = open(file_name, "w")
            SeqIO.write(record, output_fh, "fasta")
            writers[name] = output_fh
            
    handle.close()

    #creat config file based on the alleles found
    with open('config.txt','w') as cfile:
        cfile.write("[loci]\n")
        for name, writer in writers.iteritems() :
            path = os.path.realpath(writer.name)
            cfile.write("%s\t%s\n" % (name,path))
        cfile.write("[profile]\n")
        cfile.write("profile\t%s\n" % profiles)

        
    return


alleles=None
profiles=None

"""Input arguments"""
options, remainder = getopt.getopt(sys.argv[1:], '', [
 'alleles=',
 'profiles='
])

for opt, arg in options:
    if opt in ('--alleles'):
        alleles = arg
    elif opt in ('--profiles'):
        profiles = arg

if alleles and profiles:
    split_allele_file(alleles,profiles)