diff commons/launcher/launchTEclass.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/launchTEclass.py	Tue Apr 30 14:33:21 2013 -0400
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+import glob
+import shutil
+
+
+def help():
+    print
+    print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format='fasta')"
+    print "     -o: name of the output file (format='map', default=inFileName+'.map')"
+    print "     -c: clean"
+    print "     -v: verbosity level (default=0/1)"
+    print
+
+def parseFastaFileFromTEclass( inFile, outFile, verbose=0 ):
+    tmpHandler = open( inFile, "r" )
+    outHandler = open( outFile, "w" )
+    dClassif2Count = {}
+    header = ""
+    classif = ""
+    while True:
+        line = tmpHandler.readline()
+        if line == "":
+            break
+        if line[0] == ">":
+            header = line[1:].split("|")[0]
+            classif = line[1:-1].split(": ")[1].split("|")[0]
+            if not dClassif2Count.has_key( classif ):
+                dClassif2Count[ classif ] = 0
+            dClassif2Count[ classif ] += 1
+        else:
+            seqLength = len(line[:-1])
+            outHandler.write( "%s\t%s\t%i\t%i\n" % ( classif, header, 1, seqLength ) )
+    tmpHandler.close()
+    outHandler.close()
+    if verbose > 0:
+        for classif in dClassif2Count.keys():
+            print "%s: %i sequences" % ( classif, dClassif2Count[ classif ] )
+            sys.stdout.flush()
+            
+            
+def main():
+    """
+    Launch TEclass to classify TE sequences.
+    """
+    inFileName = ""
+    outFileName = ""
+    clean = False
+    verbose = 0
+
+    try:
+        opts, args = getopt.getopt( sys.argv[1:], "hi:o:cv:" )
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-c":
+            clean = True
+        elif o == "-v":
+            verbose = int(a)
+            
+    if inFileName == "":
+        print "ERROR: missing input file (-i)"
+        help()
+        sys.exit(1)
+    if not os.path.exists( inFileName ):
+        print "ERROR: can't find input file '%s'" % ( inFileName )
+        help()
+        sys.exit(1)
+    if outFileName == "":
+        outFileName = "%s.TEclass.map" % ( inFileName )
+        
+    if verbose > 0:
+        print "START %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+        
+    if verbose > 0:
+        print "launch TEclass..."
+        sys.stdout.flush()
+    prg = "test_consensi_2.1.pl"
+    cmd = prg
+    cmd += " %s" % ( inFileName )
+    returnValue = os.system( cmd )
+    if returnValue != 0:
+        print "ERROR: '%s' returned %i" % ( prg, returnValue )
+        sys.exit(1)
+        
+    lOut1 = glob.glob( "%s_*" % ( inFileName ) )
+    outDir = ""
+    for i in lOut1:
+        if os.path.isdir( i ):
+            lOut2 = glob.glob( "%s/*" % ( i ) )
+            if len(lOut2) == 4 and "%s/%s.lib" % ( i, inFileName ) in lOut2:
+                outDir = i
+                break
+    if outDir == "":
+        print "ERROR: can't find output directory"
+        sys.exit(1)
+    os.chdir( outDir )
+    
+    if verbose > 0:
+        print "parse the results..."
+        sys.stdout.flush()
+    parseFastaFileFromTEclass( "%s.lib" % ( inFileName ),
+                               outFileName,
+                               verbose )
+    os.system( "mv %s .." % ( outFileName ) )
+    os.chdir( ".." )
+    
+    if clean:
+        if verbose > 0:
+            print "clean the temporary files..."
+            sys.stdout.flush()
+        shutil.rmtree( outDir )
+        
+    if verbose > 0:
+        print "END %s" % ( sys.argv[0].split("/")[-1] )
+        sys.stdout.flush()
+        
+    return 0
+
+
+if __name__ == "__main__":
+    main()