comparison make_phylip.py @ 36:51cd0307fb70

Phylip's extra ouputs are now stored in the job working directory
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 20 Nov 2013 16:32:01 -0500
parents ea52b23f1141
children 884ccb07885b
comparison
equal deleted inserted replaced
35:ea52b23f1141 36:51cd0307fb70
19 # along with this program; if not, write to the Free Software 19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 # MA 02110-1301, USA. 21 # MA 02110-1301, USA.
22 22
23 import argparse 23 import argparse
24 import errno 24 import glob
25 import os 25 import os
26 import shutil 26 import shutil
27 from Population import Population 27 from Population import Population
28
29 def mkdir_p(path):
30 try:
31 os.makedirs(path)
32 except OSError, e:
33 if e.errno <> errno.EEXIST:
34 raise
35 28
36 def revseq(seq): 29 def revseq(seq):
37 seq=list(seq) 30 seq=list(seq)
38 seq.reverse() 31 seq.reverse()
39 seq=''.join(seq) 32 seq=''.join(seq)
355 #~ formtd='\n'.join([hader,seq]) 348 #~ formtd='\n'.join([hader,seq])
356 dPopsFormPhy[eachPop]=formtd 349 dPopsFormPhy[eachPop]=formtd
357 #~ 350 #~
358 return dPopsFormPhy,len(seq) 351 return dPopsFormPhy,len(seq)
359 352
360 def wrapSeqsFasta(dENSEMBLTPopsFasta,outFastaFold,sPopsIntrst): 353 def wrapSeqsFasta(dENSEMBLTPopsFasta,sPopsIntrst):
361 """ 354 """
362 """ 355 """
363 ENSEMBLTKaKs=[] 356 ENSEMBLTKaKs=[]
364 nonHeader=True 357 nonHeader=True
365 cnt=0 358 cnt=0
373 seqPMLformat=['%s %s'%(len(dPopsFormPhy),lenseq)]#generate new PHYML sequence 366 seqPMLformat=['%s %s'%(len(dPopsFormPhy),lenseq)]#generate new PHYML sequence
374 #~ seqPMLformat=[]#generate new PHYML sequence 367 #~ seqPMLformat=[]#generate new PHYML sequence
375 for namex in sorted(sPopsIntrst): 368 for namex in sorted(sPopsIntrst):
376 seqPMLformat.append(dPopsFormPhy[namex]) 369 seqPMLformat.append(dPopsFormPhy[namex])
377 #~ 370 #~
378 mkdir_p(outFastaFold) 371 outFastaf=open('%s.phy'%ENSEMBLT,'w')
379 outFastaf=os.path.join(outFastaFold,'%s.phy'%ENSEMBLT)
380 outFastaf=open(outFastaf,'w')
381 outFastaf.write('\n'.join(seqPMLformat)) 372 outFastaf.write('\n'.join(seqPMLformat))
382 outFastaf.close() 373 outFastaf.close()
383 #~ 374 #~
384 return 0 375 return 0
385 376
409 parser.add_argument('--chrClmn',metavar='int',type=int,help='the column with the chromosome.',required=True) 400 parser.add_argument('--chrClmn',metavar='int',type=int,help='the column with the chromosome.',required=True)
410 parser.add_argument('--posClmn',metavar='int',type=int,help='the column with the SNPs position.',required=True) 401 parser.add_argument('--posClmn',metavar='int',type=int,help='the column with the SNPs position.',required=True)
411 parser.add_argument('--refClmn',metavar='int',type=int,help='the column with the reference nucleotide.',required=True) 402 parser.add_argument('--refClmn',metavar='int',type=int,help='the column with the reference nucleotide.',required=True)
412 parser.add_argument('--altrClmn',metavar='int',type=int,help='the column with the derived nucleotide.',required=True) 403 parser.add_argument('--altrClmn',metavar='int',type=int,help='the column with the derived nucleotide.',required=True)
413 parser.add_argument('--output',metavar='output',type=str,help='the output',required=True) 404 parser.add_argument('--output',metavar='output',type=str,help='the output',required=True)
414 parser.add_argument('--output_id',metavar='int',type=int,help='the output id',required=True)
415 parser.add_argument('--output_dir',metavar='output folder sequences',type=str,help='the output folder with the sequences.',required=True)
416 parser.add_argument('--gd_indivs',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input file.',required=True) 405 parser.add_argument('--gd_indivs',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input file.',required=True)
417 #~ 406 #~
418 parser.add_argument('--inputCover',metavar='input gd_snp cover file',type=str,help='the input file with the table in gd_snp/gd_genotype cover format.',required=False,default=False) 407 parser.add_argument('--inputCover',metavar='input gd_snp cover file',type=str,help='the input file with the table in gd_snp/gd_genotype cover format.',required=False,default=False)
419 parser.add_argument('--inputCover_type',metavar='input cover type',type=str,help='the cover input file type (gd_snp or gd_genotype)',required=False,default=False) 408 parser.add_argument('--inputCover_type',metavar='input cover type',type=str,help='the cover input file type (gd_snp or gd_genotype)',required=False,default=False)
420 parser.add_argument('--gd_indivs_cover',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input cover file.',required=False,default=False) 409 parser.add_argument('--gd_indivs_cover',metavar='input gd_indivs file',type=str,help='the input reference species columns in the input cover file.',required=False,default=False)
440 args = parser.parse_args() 429 args = parser.parse_args()
441 430
442 inSNPf = args.input 431 inSNPf = args.input
443 inSNPf_type = args.input_type 432 inSNPf_type = args.input_type
444 outfile = args.output 433 outfile = args.output
445 outfile_id = args.output_id
446 outFastaFold = './out'
447 files_dir = args.output_dir
448 gd_indivs = args.gd_indivs 434 gd_indivs = args.gd_indivs
449 pxchrx = args.chrClmn 435 pxchrx = args.chrClmn
450 pxpos = args.posClmn 436 pxpos = args.posClmn
451 pxntA = args.refClmn 437 pxntA = args.refClmn
452 pxntB = args.altrClmn 438 pxntB = args.altrClmn
499 #~ 485 #~
500 dENSEMBLTChrPosdAlls,dChrPosdPopsAllls=rtrnFxdChrPosinCodReg(dChrdStrtEndENSEMBLT,dChrdPosdPopsAlllsInit)#~ print '3. Getting fixed alleles in exons...' 486 dENSEMBLTChrPosdAlls,dChrPosdPopsAllls=rtrnFxdChrPosinCodReg(dChrdStrtEndENSEMBLT,dChrdPosdPopsAlllsInit)#~ print '3. Getting fixed alleles in exons...'
501 #~ 487 #~
502 dENSEMBLTPopsFasta,lchrStartexEndposAll=rtrnSeqVars(dENSEMBLTseqChrStEnEx,dENSEMBLTChrPosdAlls)#~ print '4. Getting fasta sequences of populations...' 488 dENSEMBLTPopsFasta,lchrStartexEndposAll=rtrnSeqVars(dENSEMBLTseqChrStEnEx,dENSEMBLTChrPosdAlls)#~ print '4. Getting fasta sequences of populations...'
503 #~ 489 #~
504 wrapSeqsFasta(dENSEMBLTPopsFasta,outFastaFold,sPopsIntrst) 490 wrapSeqsFasta(dENSEMBLTPopsFasta,sPopsIntrst)
505 #~ 491 #~
506 492
507 ## get a list of output files 493 ## get a list of output files
508 files = [] 494 files = glob.glob('*.phy')
509 for dirpath, dirnames, filenames in os.walk(outFastaFold):
510 for file in filenames:
511 if file.endswith('.phy'):
512 files.append( os.path.join(dirpath, file) )
513 del dirnames[:]
514 495
515 if len(files) == 0: 496 if len(files) == 0:
516 with open(outfile, 'w') as ofh: 497 with open(outfile, 'w') as ofh:
517 print >> ofh, 'No output.' 498 print >> ofh, 'No output.'
518 else: 499 else:
519 ## the first file becomes the output 500 ## the first file becomes the output
520 file = files.pop(0) 501 file = files.pop(0)
521 shutil.move(file, outfile) 502 shutil.move(file, outfile)
522 503
523 ## rename/move the rest of the files 504 ## rename the rest of the files
524 for file in files: 505 for file in files:
525 name = os.path.basename(file)[:-4] 506 name = file[:-4]
526 name = name.replace('_', '-') 507 name = name.replace('_', '-')
527 new_filename = 'primary_{0}_{1}_visible_txt_?'.format(outfile_id, name) 508 new_filename = 'primary_{0}_{1}_visible_txt_?'.format(outfile_id, name)
528 new_pathname = os.path.join(files_dir, new_filename) 509 os.rename(file, new_filename)
529 shutil.move(file, new_pathname)
530 510
531 return 0 511 return 0
532 512
533 if __name__ == '__main__': 513 if __name__ == '__main__':
534 main() 514 main()