comparison SMART/Java/Python/CompareOverlappingSmallRef.py @ 10:c081f25e1572

Updated CompareOverlappingSmallRef.py
author m-zytnicki
date Thu, 14 Mar 2013 05:25:40 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
9:1eb55963fe39 10:c081f25e1572
70 self.nbOverlaps = 0 70 self.nbOverlaps = 0
71 self.invert = False 71 self.invert = False
72 self.antisense = False 72 self.antisense = False
73 self.collinear = False 73 self.collinear = False
74 self.distance = None 74 self.distance = None
75 self.minOverlap = False
76 self.pcOverlapQuery = False
77 self.pcOverlapRef = False
78 self.included = False
79 self.including = False
75 self.bins = {} 80 self.bins = {}
76 self.notOverlapping = False 81 self.notOverlapping = False
77 82
78 def setReferenceFile(self, fileName, format): 83 def setReferenceFile(self, fileName, format):
79 chooser = ParserChooser(self.verbosity) 84 chooser = ParserChooser(self.verbosity)
97 def setAntisense(self, boolean): 102 def setAntisense(self, boolean):
98 self.antisense = boolean 103 self.antisense = boolean
99 104
100 def setInvert(self, boolean): 105 def setInvert(self, boolean):
101 self.invert = boolean 106 self.invert = boolean
107
108 def setMinPercentOverlap(self, pcOverlapQuery, pcOverlapRef):
109 self.pcOverlapQuery = pcOverlapQuery
110 self.pcOverlapRef = pcOverlapRef
111
112 def setInclude(self, included, including):
113 self.included = included
114 self.including = including
102 115
103 def includeNotOverlapping(self, boolean): 116 def includeNotOverlapping(self, boolean):
104 self.notOverlapping = boolean 117 self.notOverlapping = boolean
105 118
106 def loadRef(self): 119 def loadRef(self):
130 if not queryTranscript.overlapWithExon(refTranscript): 143 if not queryTranscript.overlapWithExon(refTranscript):
131 return False 144 return False
132 if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection(): 145 if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection():
133 return False 146 return False
134 if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection(): 147 if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection():
148 return False
149 if self.included and not queryTranscript.isIncluded(refTranscript):
150 return False
151 if self.including and not refTranscript.isIncluded(queryTranscript):
152 return False
153 querySize = queryTranscript.getSize()
154 if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)):
155 return False
156 refSize = refTranscript.getSize()
157 if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)):
158 return False
159 if self.minOverlap and not queryTranscript.overlapWithExon(refTranscript, self.minOverlap):
135 return False 160 return False
136 return True 161 return True
137 162
138 def _compareTranscript(self, queryTranscript): 163 def _compareTranscript(self, queryTranscript):
139 queryChromosome = queryTranscript.getChromosome() 164 queryChromosome = queryTranscript.getChromosome()
175 self.nbWritten += 1 200 self.nbWritten += 1
176 progress.done() 201 progress.done()
177 self.writer.close() 202 self.writer.close()
178 203
179 def displayResults(self): 204 def displayResults(self):
180 print "# queries: %d" % (self.nbQueries) 205 if self.verbosity > 0:
181 print "# refs: %d" % (self.nbRefs) 206 print "# queries: %d" % (self.nbQueries)
182 print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps) 207 print "# refs: %d" % (self.nbRefs)
208 print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)
183 209
184 def run(self): 210 def run(self):
185 self.loadRef() 211 self.loadRef()
186 self.compare() 212 self.compare()
187 self.displayResults() 213 self.displayResults()
189 if __name__ == "__main__": 215 if __name__ == "__main__":
190 216
191 description = "Compare Overlapping Small Reference v1.0.1: Provide the queries that overlap with a reference, when the reference is small. [Category: Data Comparison]" 217 description = "Compare Overlapping Small Reference v1.0.1: Provide the queries that overlap with a reference, when the reference is small. [Category: Data Comparison]"
192 218
193 parser = OptionParser(description = description) 219 parser = OptionParser(description = description)
194 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") 220 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
195 parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") 221 parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")
196 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") 222 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
197 parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") 223 parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")
198 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") 224 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
199 parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") 225 parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]")
200 parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") 226 parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]")
201 parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]") 227 parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]")
202 parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]") 228 parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]")
203 parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]") 229 parser.add_option("-m", "--minOverlap", dest="minOverlap", action="store", default=False, type="int", help="min. #nt overlap [format: bool] [default: false]")
204 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") 230 parser.add_option("-p", "--pcOverlapQuery", dest="pcOverlapQuery", action="store", default=False, type="int", help="min. % overlap of the query [format: bool] [default: false]")
231 parser.add_option("-P", "--pcOverlapRef", dest="pcOverlapRef", action="store", default=False, type="int", help="min. % overlap of the reference [format: bool] [default: false]")
232 parser.add_option("-k", "--included", dest="included", action="store_true", default=False, help="provide query elements which are nested in reference elements [format: bool] [default: false]")
233 parser.add_option("-K", "--including", dest="including", action="store_true", default=False, help="provide query elements in which reference elements are nested [format: bool] [default: false]")
234 parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]")
235 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
205 (options, args) = parser.parse_args() 236 (options, args) = parser.parse_args()
206 237
207 cosr = CompareOverlappingSmallRef(options.verbosity) 238 cosr = CompareOverlappingSmallRef(options.verbosity)
208 cosr.setQueryFile(options.inputFileName1, options.format1) 239 cosr.setQueryFile(options.inputFileName1, options.format1)
209 cosr.setReferenceFile(options.inputFileName2, options.format2) 240 cosr.setReferenceFile(options.inputFileName2, options.format2)
210 cosr.setOutputFile(options.outputFileName) 241 cosr.setOutputFile(options.outputFileName)
211 cosr.includeNotOverlapping(options.notOverlapping) 242 cosr.includeNotOverlapping(options.notOverlapping)
212 cosr.setDistance(options.distance) 243 cosr.setDistance(options.distance)
213 cosr.setAntisense(options.antisense) 244 cosr.setAntisense(options.antisense)
245 cosr.setInclude(options.included, options.including)
214 cosr.setInvert(options.exclude) 246 cosr.setInvert(options.exclude)
215 cosr.setInvert(options.exclude) 247 cosr.setMinOverlap(options.minOverlap)
248 cosr.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)
216 cosr.run() 249 cosr.run()
217 250