view commons/launcher/LaunchRefalign_old.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line source

#!/usr/bin/env python

import os
import sys
import getopt

import pyRepet.launcher.programLauncher
from commons.core.seq.BioseqDB import BioseqDB
from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders


def help():
    print
    print "usage: launchRefalign.py [ options ]"
    print "options:"
    print "     -h: this help"
    print "     -i: name of the input file (refseq is first, format='fasta')"
    print "     -r: keep the reference sequence"
    print "     -o: name of the output file (default=inFileName+'.fa_aln')"
    print "     -v: verbose (default=0)"
    print


def main():
    """
    Launch 'refalign' to build a master-slave multiple sequence alignment.
    """
    inFileName = ""
    keepRefseq = False
    outFileName = ""
    verbose = 0
    try:
        opts,args=getopt.getopt(sys.argv[1:],"hi:ro:v:")
    except getopt.GetoptError, err:
            print str(err); help(); sys.exit(1)
    for o,a in opts:
        if o == "-h":
            help()
            sys.exit(0)
        elif o == "-i":
            inFileName = a
        elif o == "-r":
            keepRefseq = True
        elif o == "-o":
            outFileName = a
        elif o == "-v":
            verbose = int(a)
    if inFileName == "":
        print "ERROR: missing compulsory options"
        help()
        sys.exit(1)
        
    if verbose > 0:
        print "START %s" % (sys.argv[0].split("/")[-1])
        sys.stdout.flush()
        
    if verbose > 0:
        print "build a multiple alignment from '%s'..." % ( inFileName )
        sys.stdout.flush()
        
    if outFileName == "":
        outFileName = "%s.fa_aln" % ( inFileName )
        
    csh = ChangeSequenceHeaders()
    csh.setInputFile( inFileName )
    csh.setFormat( "fasta" )
    csh.setStep( 1 )
    csh.setPrefix( "seq" )
    csh.setLinkFile(  inFileName+".shortHlink" )
    csh.setOutputFile( inFileName+".shortH" )
    csh.setVerbosityLevel( verbose - 1 )
    csh.run()
    
    bsDB = BioseqDB( inFileName+".shortH" )
    bsDB.upCase()
    bsDB.save( inFileName+".shortHtmp" )
    del bsDB
    os.rename( inFileName+".shortHtmp", inFileName+".shortH" )
    
    pL = pyRepet.launcher.programLauncher.programLauncher( inFileName+".shortH" )
    if keepRefseq:
        pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", refseqName="seq1", verbose=verbose )
    else:
        pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", verbose=verbose )
        
    csh.setInputFile( inFileName+".shortH.fa_aln" )
    csh.setFormat( "fasta" )
    csh.setStep( 2 )
    csh.setLinkFile(  inFileName+".shortHlink" )
    csh.setOutputFile(  outFileName )
    csh.setVerbosityLevel( verbose - 1 )
    csh.run()
    
    for f in [ inFileName+".shortH", inFileName+".shortHlink", inFileName+".shortH.fa_aln" ]:
            os.remove( f )
            
    if verbose > 0:
        print "END %s" % (sys.argv[0].split("/")[-1])
        sys.stdout.flush()
        
    return 0


if __name__ == "__main__":
    main()