Mercurial > repos > nml > stringmlst
comparison split_by_allele.py @ 0:fc0f15ca12e0 draft
planemo upload commit 0366addb646f1ddea484915abdeda939d7d49bd5
| author | nml |
|---|---|
| date | Mon, 24 Oct 2016 13:15:20 -0400 |
| parents | |
| children | 4e03573653fe |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:fc0f15ca12e0 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import getopt | |
| 3 import sys | |
| 4 import os | |
| 5 from Bio import SeqIO | |
| 6 | |
| 7 def split_allele_file(alleles,profiles): | |
| 8 | |
| 9 writers = {} | |
| 10 | |
| 11 handle = open(alleles, "rU") | |
| 12 for record in SeqIO.parse(handle, "fasta"): | |
| 13 | |
| 14 seqid=record.id | |
| 15 | |
| 16 #split out the alelle name from the version number | |
| 17 #attempting to split based on '-' first, if that fails, then '_' | |
| 18 result = seqid.split('_') | |
| 19 | |
| 20 if len(result) !=2: | |
| 21 result = seqid.split('-') | |
| 22 if len(result) ==2: | |
| 23 newid = '_'.join(result) | |
| 24 record.id = newid | |
| 25 else: | |
| 26 print "Error could not parse out allele name and number from '%s'" % seqid | |
| 27 exit(0) | |
| 28 | |
| 29 | |
| 30 name,num = result | |
| 31 | |
| 32 | |
| 33 #if writer exist, then write to that current fasta file | |
| 34 if name in writers: | |
| 35 SeqIO.write(record, writers[name], "fasta") | |
| 36 else: | |
| 37 #new allele found, create new writer and add the first record | |
| 38 file_name = name + '.fasta' | |
| 39 output_fh = open(file_name, "w") | |
| 40 SeqIO.write(record, output_fh, "fasta") | |
| 41 writers[name] = output_fh | |
| 42 | |
| 43 handle.close() | |
| 44 | |
| 45 #creat config file based on the alleles found | |
| 46 with open('config.txt','w') as cfile: | |
| 47 cfile.write("[loci]\n") | |
| 48 for name, writer in writers.iteritems() : | |
| 49 path = os.path.realpath(writer.name) | |
| 50 cfile.write("%s\t%s\n" % (name,path)) | |
| 51 cfile.write("[profile]\n") | |
| 52 cfile.write("profile\t%s\n" % profiles) | |
| 53 | |
| 54 | |
| 55 return | |
| 56 | |
| 57 | |
| 58 alleles=None | |
| 59 profiles=None | |
| 60 | |
| 61 """Input arguments""" | |
| 62 options, remainder = getopt.getopt(sys.argv[1:], '', [ | |
| 63 'alleles=', | |
| 64 'profiles=' | |
| 65 ]) | |
| 66 | |
| 67 for opt, arg in options: | |
| 68 if opt in ('--alleles'): | |
| 69 alleles = arg | |
| 70 elif opt in ('--profiles'): | |
| 71 profiles = arg | |
| 72 | |
| 73 if alleles and profiles: | |
| 74 split_allele_file(alleles,profiles) | |
| 75 |
