annotate commons/launcher/launchTEclass.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 import glob
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 import shutil
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 def help():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 print " -i: name of the input file (format='fasta')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 print " -o: name of the output file (format='map', default=inFileName+'.map')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 print " -c: clean"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 print " -v: verbosity level (default=0/1)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 def parseFastaFileFromTEclass( inFile, outFile, verbose=0 ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 tmpHandler = open( inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 outHandler = open( outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 dClassif2Count = {}
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 header = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 classif = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 line = tmpHandler.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 if line[0] == ">":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 header = line[1:].split("|")[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 classif = line[1:-1].split(": ")[1].split("|")[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 if not dClassif2Count.has_key( classif ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 dClassif2Count[ classif ] = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 dClassif2Count[ classif ] += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 seqLength = len(line[:-1])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 outHandler.write( "%s\t%s\t%i\t%i\n" % ( classif, header, 1, seqLength ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 tmpHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 outHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 for classif in dClassif2Count.keys():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 print "%s: %i sequences" % ( classif, dClassif2Count[ classif ] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 def main():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 Launch TEclass to classify TE sequences.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 inFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 outFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 clean = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 opts, args = getopt.getopt( sys.argv[1:], "hi:o:cv:" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 print str(err)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 elif o == "-i":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 inFileName = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 elif o == "-o":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 outFileName = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 elif o == "-c":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 clean = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 verbose = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 if inFileName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 print "ERROR: missing input file (-i)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 if not os.path.exists( inFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 print "ERROR: can't find input file '%s'" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 if outFileName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 outFileName = "%s.TEclass.map" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 print "START %s" % ( sys.argv[0].split("/")[-1] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 print "launch TEclass..."
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 prg = "test_consensi_2.1.pl"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 cmd += " %s" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 returnValue = os.system( cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 if returnValue != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 print "ERROR: '%s' returned %i" % ( prg, returnValue )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 lOut1 = glob.glob( "%s_*" % ( inFileName ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 outDir = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 for i in lOut1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 if os.path.isdir( i ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 lOut2 = glob.glob( "%s/*" % ( i ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 if len(lOut2) == 4 and "%s/%s.lib" % ( i, inFileName ) in lOut2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 outDir = i
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 if outDir == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 print "ERROR: can't find output directory"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 os.chdir( outDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 print "parse the results..."
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 parseFastaFileFromTEclass( "%s.lib" % ( inFileName ),
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 outFileName,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 verbose )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 os.system( "mv %s .." % ( outFileName ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 os.chdir( ".." )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 if clean:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 print "clean the temporary files..."
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 shutil.rmtree( outDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 print "END %s" % ( sys.argv[0].split("/")[-1] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 return 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 main()