comparison commons/launcher/launchTEclass.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
comparison
equal deleted inserted replaced
30:5677346472b5 31:0ab839023fe4
1 #!/usr/bin/env python
2
3 import os
4 import sys
5 import getopt
6 import glob
7 import shutil
8
9
10 def help():
11 print
12 print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
13 print "options:"
14 print " -h: this help"
15 print " -i: name of the input file (format='fasta')"
16 print " -o: name of the output file (format='map', default=inFileName+'.map')"
17 print " -c: clean"
18 print " -v: verbosity level (default=0/1)"
19 print
20
21 def parseFastaFileFromTEclass( inFile, outFile, verbose=0 ):
22 tmpHandler = open( inFile, "r" )
23 outHandler = open( outFile, "w" )
24 dClassif2Count = {}
25 header = ""
26 classif = ""
27 while True:
28 line = tmpHandler.readline()
29 if line == "":
30 break
31 if line[0] == ">":
32 header = line[1:].split("|")[0]
33 classif = line[1:-1].split(": ")[1].split("|")[0]
34 if not dClassif2Count.has_key( classif ):
35 dClassif2Count[ classif ] = 0
36 dClassif2Count[ classif ] += 1
37 else:
38 seqLength = len(line[:-1])
39 outHandler.write( "%s\t%s\t%i\t%i\n" % ( classif, header, 1, seqLength ) )
40 tmpHandler.close()
41 outHandler.close()
42 if verbose > 0:
43 for classif in dClassif2Count.keys():
44 print "%s: %i sequences" % ( classif, dClassif2Count[ classif ] )
45 sys.stdout.flush()
46
47
48 def main():
49 """
50 Launch TEclass to classify TE sequences.
51 """
52 inFileName = ""
53 outFileName = ""
54 clean = False
55 verbose = 0
56
57 try:
58 opts, args = getopt.getopt( sys.argv[1:], "hi:o:cv:" )
59 except getopt.GetoptError, err:
60 print str(err)
61 help()
62 sys.exit(1)
63 for o,a in opts:
64 if o == "-h":
65 help()
66 sys.exit(0)
67 elif o == "-i":
68 inFileName = a
69 elif o == "-o":
70 outFileName = a
71 elif o == "-c":
72 clean = True
73 elif o == "-v":
74 verbose = int(a)
75
76 if inFileName == "":
77 print "ERROR: missing input file (-i)"
78 help()
79 sys.exit(1)
80 if not os.path.exists( inFileName ):
81 print "ERROR: can't find input file '%s'" % ( inFileName )
82 help()
83 sys.exit(1)
84 if outFileName == "":
85 outFileName = "%s.TEclass.map" % ( inFileName )
86
87 if verbose > 0:
88 print "START %s" % ( sys.argv[0].split("/")[-1] )
89 sys.stdout.flush()
90
91 if verbose > 0:
92 print "launch TEclass..."
93 sys.stdout.flush()
94 prg = "test_consensi_2.1.pl"
95 cmd = prg
96 cmd += " %s" % ( inFileName )
97 returnValue = os.system( cmd )
98 if returnValue != 0:
99 print "ERROR: '%s' returned %i" % ( prg, returnValue )
100 sys.exit(1)
101
102 lOut1 = glob.glob( "%s_*" % ( inFileName ) )
103 outDir = ""
104 for i in lOut1:
105 if os.path.isdir( i ):
106 lOut2 = glob.glob( "%s/*" % ( i ) )
107 if len(lOut2) == 4 and "%s/%s.lib" % ( i, inFileName ) in lOut2:
108 outDir = i
109 break
110 if outDir == "":
111 print "ERROR: can't find output directory"
112 sys.exit(1)
113 os.chdir( outDir )
114
115 if verbose > 0:
116 print "parse the results..."
117 sys.stdout.flush()
118 parseFastaFileFromTEclass( "%s.lib" % ( inFileName ),
119 outFileName,
120 verbose )
121 os.system( "mv %s .." % ( outFileName ) )
122 os.chdir( ".." )
123
124 if clean:
125 if verbose > 0:
126 print "clean the temporary files..."
127 sys.stdout.flush()
128 shutil.rmtree( outDir )
129
130 if verbose > 0:
131 print "END %s" % ( sys.argv[0].split("/")[-1] )
132 sys.stdout.flush()
133
134 return 0
135
136
137 if __name__ == "__main__":
138 main()