comparison SMART/Java/Python/CompareOverlappingSmallQuery.py @ 9:1eb55963fe39

Updated CompareOverlappingSmall*.py
author m-zytnicki
date Thu, 14 Mar 2013 05:23:05 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
8:4dded8b1fbc4 9:1eb55963fe39
70 self.nbOverlaps = 0 70 self.nbOverlaps = 0
71 self.distance = None 71 self.distance = None
72 self.invert = False 72 self.invert = False
73 self.antisense = False 73 self.antisense = False
74 self.collinear = False 74 self.collinear = False
75 self.pcOverlapQuery = False
76 self.pcOverlapRef = False
77 self.minOverlap = False
78 self.included = False
79 self.including = False
75 self.bins = {} 80 self.bins = {}
76 self.overlaps = {} 81 self.overlaps = {}
77 self.notOverlapping = False 82 self.notOverlapping = False
78 83
79 def setReferenceFile(self, fileName, format): 84 def setReferenceFile(self, fileName, format):
98 def setCollinear(self, boolean): 103 def setCollinear(self, boolean):
99 self.collinear = boolean 104 self.collinear = boolean
100 105
101 def setAntisense(self, boolean): 106 def setAntisense(self, boolean):
102 self.antisense = boolean 107 self.antisense = boolean
108
109 def setMinPercentOverlap(self, pcOverlapQuery, pcOverlapRef):
110 self.pcOverlapQuery = pcOverlapQuery
111 self.pcOverlapRef = pcOverlapRef
112
113 def setMinOverlap(self, minOverlap):
114 self.minOverlap = minOverlap
115
116 def setInclude(self, included, including):
117 self.included = included
118 self.including = including
103 119
104 def includeNotOverlapping(self, boolean): 120 def includeNotOverlapping(self, boolean):
105 self.notOverlapping = boolean 121 self.notOverlapping = boolean
106 122
107 def loadQuery(self): 123 def loadQuery(self):
126 if not queryTranscript.overlapWithExon(refTranscript): 142 if not queryTranscript.overlapWithExon(refTranscript):
127 return False 143 return False
128 if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection(): 144 if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection():
129 return False 145 return False
130 if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection(): 146 if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection():
147 return False
148 if self.included and not refTranscript.include(queryTranscript):
149 return False
150 if self.including and not queryTranscript.include(refTranscript):
151 return False
152 querySize = queryTranscript.getSize()
153 if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)):
154 return False
155 refSize = refTranscript.getSize()
156 if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)):
157 return False
158 if self.minOverlap and not queryTranscript.overlapWithExon(refTranscript, self.minOverlap):
131 return False 159 return False
132 return True 160 return True
133 161
134 def _alterTranscript(self, transcript, type): 162 def _alterTranscript(self, transcript, type):
135 if type == REFERENCE: 163 if type == REFERENCE:
182 self.writer.addTranscript(transcript) 210 self.writer.addTranscript(transcript)
183 self.nbWritten += 1 211 self.nbWritten += 1
184 self.writer.close() 212 self.writer.close()
185 213
186 def displayResults(self): 214 def displayResults(self):
187 print "# queries: %d" % (self.nbQueries) 215 if self.verbosity:
188 print "# refs: %d" % (self.nbRefs) 216 print "# queries: %d" % (self.nbQueries)
189 print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps) 217 print "# refs: %d" % (self.nbRefs)
218 print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)
190 219
191 def run(self): 220 def run(self):
192 self.loadQuery() 221 self.loadQuery()
193 self.compare() 222 self.compare()
194 self.printResults() 223 self.printResults()
197 if __name__ == "__main__": 226 if __name__ == "__main__":
198 227
199 description = "Compare Overlapping Small Query v1.0.1: Provide the queries that overlap with a reference, when the query is small. [Category: Data Comparison]" 228 description = "Compare Overlapping Small Query v1.0.1: Provide the queries that overlap with a reference, when the query is small. [Category: Data Comparison]"
200 229
201 parser = OptionParser(description = description) 230 parser = OptionParser(description = description)
202 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") 231 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
203 parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") 232 parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")
204 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") 233 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
205 parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") 234 parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")
206 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") 235 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
207 parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") 236 parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]")
208 parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") 237 parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]")
209 parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]") 238 parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]")
210 parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]") 239 parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]")
240 parser.add_option("-m", "--minOverlap", dest="minOverlap", action="store", default=False, type="int", help="min. #nt overlap [format: bool] [default: false]")
241 parser.add_option("-p", "--pcOverlapQuery", dest="pcOverlapQuery", action="store", default=False, type="int", help="min. % overlap of the query [format: bool] [default: false]")
242 parser.add_option("-P", "--pcOverlapRef", dest="pcOverlapRef", action="store", default=False, type="int", help="min. % overlap of the reference [format: bool] [default: false]")
243 parser.add_option("-k", "--included", dest="included", action="store_true", default=False, help="provide query elements which are nested in reference elements [format: bool] [default: false]")
244 parser.add_option("-K", "--including", dest="including", action="store_true", default=False, help="provide query elements in which reference elements are nested [format: bool] [default: false]")
211 parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]") 245 parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]")
212 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") 246 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
213 (options, args) = parser.parse_args() 247 (options, args) = parser.parse_args()
214 248
215 cosq = CompareOverlappingSmallQuery(options.verbosity) 249 cosq = CompareOverlappingSmallQuery(options.verbosity)
216 cosq.setQueryFile(options.inputFileName1, options.format1) 250 cosq.setQueryFile(options.inputFileName1, options.format1)
217 cosq.setReferenceFile(options.inputFileName2, options.format2) 251 cosq.setReferenceFile(options.inputFileName2, options.format2)
218 cosq.setOutputFile(options.outputFileName) 252 cosq.setOutputFile(options.outputFileName)
219 cosq.includeNotOverlapping(options.notOverlapping) 253 cosq.includeNotOverlapping(options.notOverlapping)
220 cosq.setDistance(options.distance) 254 cosq.setDistance(options.distance)
221 cosq.setCollinear(options.collinear) 255 cosq.setCollinear(options.collinear)
222 cosq.setAntisense(options.antisense) 256 cosq.setAntisense(options.antisense)
257 cosq.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)
258 cosq.setMinOverlap(options.minOverlap)
259 cosq.setInclude(options.included, options.including)
223 cosq.setInvert(options.exclude) 260 cosq.setInvert(options.exclude)
224 cosq.run() 261 cosq.run()
225
226