view commons/launcher/launchTEclass.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env python

import os
import sys
import getopt
import glob
import shutil


def help():
    print
    print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
    print "options:"
    print "     -h: this help"
    print "     -i: name of the input file (format='fasta')"
    print "     -o: name of the output file (format='map', default=inFileName+'.map')"
    print "     -c: clean"
    print "     -v: verbosity level (default=0/1)"
    print

def parseFastaFileFromTEclass( inFile, outFile, verbose=0 ):
    tmpHandler = open( inFile, "r" )
    outHandler = open( outFile, "w" )
    dClassif2Count = {}
    header = ""
    classif = ""
    while True:
        line = tmpHandler.readline()
        if line == "":
            break
        if line[0] == ">":
            header = line[1:].split("|")[0]
            classif = line[1:-1].split(": ")[1].split("|")[0]
            if not dClassif2Count.has_key( classif ):
                dClassif2Count[ classif ] = 0
            dClassif2Count[ classif ] += 1
        else:
            seqLength = len(line[:-1])
            outHandler.write( "%s\t%s\t%i\t%i\n" % ( classif, header, 1, seqLength ) )
    tmpHandler.close()
    outHandler.close()
    if verbose > 0:
        for classif in dClassif2Count.keys():
            print "%s: %i sequences" % ( classif, dClassif2Count[ classif ] )
            sys.stdout.flush()
            
            
def main():
    """
    Launch TEclass to classify TE sequences.
    """
    inFileName = ""
    outFileName = ""
    clean = False
    verbose = 0

    try:
        opts, args = getopt.getopt( sys.argv[1:], "hi:o:cv:" )
    except getopt.GetoptError, err:
        print str(err)
        help()
        sys.exit(1)
    for o,a in opts:
        if o == "-h":
            help()
            sys.exit(0)
        elif o == "-i":
            inFileName = a
        elif o == "-o":
            outFileName = a
        elif o == "-c":
            clean = True
        elif o == "-v":
            verbose = int(a)
            
    if inFileName == "":
        print "ERROR: missing input file (-i)"
        help()
        sys.exit(1)
    if not os.path.exists( inFileName ):
        print "ERROR: can't find input file '%s'" % ( inFileName )
        help()
        sys.exit(1)
    if outFileName == "":
        outFileName = "%s.TEclass.map" % ( inFileName )
        
    if verbose > 0:
        print "START %s" % ( sys.argv[0].split("/")[-1] )
        sys.stdout.flush()
        
    if verbose > 0:
        print "launch TEclass..."
        sys.stdout.flush()
    prg = "test_consensi_2.1.pl"
    cmd = prg
    cmd += " %s" % ( inFileName )
    returnValue = os.system( cmd )
    if returnValue != 0:
        print "ERROR: '%s' returned %i" % ( prg, returnValue )
        sys.exit(1)
        
    lOut1 = glob.glob( "%s_*" % ( inFileName ) )
    outDir = ""
    for i in lOut1:
        if os.path.isdir( i ):
            lOut2 = glob.glob( "%s/*" % ( i ) )
            if len(lOut2) == 4 and "%s/%s.lib" % ( i, inFileName ) in lOut2:
                outDir = i
                break
    if outDir == "":
        print "ERROR: can't find output directory"
        sys.exit(1)
    os.chdir( outDir )
    
    if verbose > 0:
        print "parse the results..."
        sys.stdout.flush()
    parseFastaFileFromTEclass( "%s.lib" % ( inFileName ),
                               outFileName,
                               verbose )
    os.system( "mv %s .." % ( outFileName ) )
    os.chdir( ".." )
    
    if clean:
        if verbose > 0:
            print "clean the temporary files..."
            sys.stdout.flush()
        shutil.rmtree( outDir )
        
    if verbose > 0:
        print "END %s" % ( sys.argv[0].split("/")[-1] )
        sys.stdout.flush()
        
    return 0


if __name__ == "__main__":
    main()