annotate commons/tools/getCumulLengthFromTEannot.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 ##@file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 # usage: getCumulLengthFromTEannot.py [ options ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # options:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 # -h: this help
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # -i: table with the annotations (format=path)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 # -r: name of a TE reference sequence (if empty, all subjects are considered)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # -g: length of the genome (in bp)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 # -C: configuration file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # -c: clean
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 # -v: verbosity level (default=0/1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 from commons.core.sql.DbMySql import DbMySql
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 from commons.core.sql.TablePathAdaptator import TablePathAdaptator
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 class getCumulLengthFromTEannot( object ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 Give the cumulative length of TE annotations (subjects mapped on queries).
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 def __init__( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 Constructor.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 self._tableName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 self._TErefseq = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 self._genomeLength = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 self._configFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 self._clean = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 self._verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 self._db = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 self._tpA = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 def help( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 Display the help on stdout.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 print "usage: getCumulLengthFromTEannot.py [ options ]"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 print " -i: table with the annotations (format=path)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 print " -r: name of a TE reference sequence (if empty, all subjects are considered)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 print " -g: length of the genome (in bp)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 print " -C: configuration file"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 print " -c: clean"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 print " -v: verbosity level (default=0/1)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 def setAttributesFromCmdLine( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 Set the attributes from the command-line.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 opts, args = getopt.getopt(sys.argv[1:],"hi:r:g:C:cv:")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 print str(err); self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 self.help(); sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 elif o == "-i":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 self.setInputTable( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 elif o == "-r":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 self.setTErefseq( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 elif o == "-g":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 self.setGenomeLength( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 elif o == "-C":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 self.setConfigFileName( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 elif o == "-c":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 self.setClean()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 self.setVerbosityLevel( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 def setInputTable( self, inTable ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 self._tableName = inTable
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 def setTErefseq( self, a ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 self._TErefseq = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 def setGenomeLength( self, genomeLength ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 self._genomeLength = int(genomeLength)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 def setConfigFileName( self, configFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 self._configFileName = configFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 def setClean( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 self._clean = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 def setVerbosityLevel( self, verbose ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 self._verbose = int(verbose)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 def checkAttributes( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 Check the attributes are valid before running the algorithm.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 if self._tableName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 print "ERROR: missing input table"; self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 def setAdaptatorToTable( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 self._db = DbMySql( cfgFileName=self._configFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 self._tpA = TablePathAdaptator( self._db, self._tableName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 def getAllSubjectsAsMapOfQueries( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 mapFileName = "%s.map" % self._tableName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 mapFile = open( mapFileName, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 if self._TErefseq != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 lPathnums = self._tpA.getIdListFromSubject( self._TErefseq )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 lPathnums = self._tpA.getIdList()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 print "nb of paths: %i" % ( len(lPathnums) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 for pathnum in lPathnums:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 lPaths = self._tpA.getPathListFromId( pathnum )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 for path in lPaths:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 map = path.getSubjectAsMapOfQuery()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 map.write( mapFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 mapFile.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 return mapFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 def mergeRanges( self, mapFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 mergeFileName = "%s.merge" % mapFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 prg = os.environ["REPET_PATH"] + "/bin/mapOp"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 cmd += " -q %s" % ( mapFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 cmd += " -m"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 cmd += " 2>&1 > /dev/null"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 log = os.system( cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 if log != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 print "*** Error: %s returned %i" % ( prg, log )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 if self._clean:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 os.remove( mapFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 return mergeFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 def getCumulLength( self, mergeFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 mergeFile = open( mergeFileName, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 total = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 line = mergeFile.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 tok = line.split("\t")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 total += abs( int(tok[3]) - int(tok[2]) ) + 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 mergeFile.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 if self._clean:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 os.remove( mergeFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 return total
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 def start( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 Useful commands before running the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 self.checkAttributes()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 print "START %s" % ( type(self).__name__ ); sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 self.setAdaptatorToTable()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 def end( self, mapFileName, mergeFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 Useful commands before ending the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 self._db.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 print "END %s" % ( type(self).__name__ ); sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 def run( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 Run the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 self.start()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 mapFileName = self.getAllSubjectsAsMapOfQueries()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189 mergeFileName = self.mergeRanges( mapFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190 total = self.getCumulLength( mergeFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191 print "cumulative length: %i bp" % total
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192 if self._genomeLength > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193 print "TE content: %.2f%%" % ( 100 * total / float(self._genomeLength) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195 self.end( mapFileName, mergeFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
197
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
198 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
199 i = getCumulLengthFromTEannot()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
200 i.setAttributesFromCmdLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
201 i.run()