comparison SMART/Java/Python/CompareOverlappingSmallRef.py @ 46:169d364ddd91

Uploaded
author m-zytnicki
date Mon, 30 Sep 2013 03:19:26 -0400
parents 44d5973c188c
children 2ac71607aa60
comparison
equal deleted inserted replaced
45:e454402ba9d9 46:169d364ddd91
70 self.nbOverlaps = 0 70 self.nbOverlaps = 0
71 self.invert = False 71 self.invert = False
72 self.antisense = False 72 self.antisense = False
73 self.collinear = False 73 self.collinear = False
74 self.distance = None 74 self.distance = None
75 self.minOverlap = False
76 self.pcOverlapQuery = False 75 self.pcOverlapQuery = False
77 self.pcOverlapRef = False 76 self.pcOverlapRef = False
78 self.included = False
79 self.including = False
80 self.bins = {} 77 self.bins = {}
81 self.notOverlapping = False 78 self.notOverlapping = False
82 79
83 def setReferenceFile(self, fileName, format): 80 def setReferenceFile(self, fileName, format):
84 chooser = ParserChooser(self.verbosity) 81 chooser = ParserChooser(self.verbosity)
106 self.invert = boolean 103 self.invert = boolean
107 104
108 def setMinPercentOverlap(self, pcOverlapQuery, pcOverlapRef): 105 def setMinPercentOverlap(self, pcOverlapQuery, pcOverlapRef):
109 self.pcOverlapQuery = pcOverlapQuery 106 self.pcOverlapQuery = pcOverlapQuery
110 self.pcOverlapRef = pcOverlapRef 107 self.pcOverlapRef = pcOverlapRef
111
112 def setInclude(self, included, including):
113 self.included = included
114 self.including = including
115 108
116 def includeNotOverlapping(self, boolean): 109 def includeNotOverlapping(self, boolean):
117 self.notOverlapping = boolean 110 self.notOverlapping = boolean
118 111
119 def loadRef(self): 112 def loadRef(self):
144 return False 137 return False
145 if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection(): 138 if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection():
146 return False 139 return False
147 if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection(): 140 if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection():
148 return False 141 return False
149 if self.included and not queryTranscript.isIncluded(refTranscript):
150 return False
151 if self.including and not refTranscript.isIncluded(queryTranscript):
152 return False
153 querySize = queryTranscript.getSize() 142 querySize = queryTranscript.getSize()
154 if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)): 143 if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)):
155 return False 144 return False
156 refSize = refTranscript.getSize() 145 refSize = refTranscript.getSize()
157 if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)): 146 if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)):
158 return False
159 if self.minOverlap and not queryTranscript.overlapWithExon(refTranscript, self.minOverlap):
160 return False 147 return False
161 return True 148 return True
162 149
163 def _compareTranscript(self, queryTranscript): 150 def _compareTranscript(self, queryTranscript):
164 queryChromosome = queryTranscript.getChromosome() 151 queryChromosome = queryTranscript.getChromosome()
178 overlaps[refTranscript.getName()] = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1 165 overlaps[refTranscript.getName()] = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1
179 self.nbOverlaps += nbElements 166 self.nbOverlaps += nbElements
180 return overlaps 167 return overlaps
181 168
182 def _updateTranscript(self, queryTranscript, overlaps): 169 def _updateTranscript(self, queryTranscript, overlaps):
183 queryTranscript.setTagValue("nbOverlaps", sum(overlaps.values()))
184 if overlaps: 170 if overlaps:
171 queryTranscript.setTagValue("nbOverlaps", sum(overlaps.values()))
185 queryTranscript.setTagValue("overlapsWith", "--".join(overlaps.keys())[:100]) 172 queryTranscript.setTagValue("overlapsWith", "--".join(overlaps.keys())[:100])
173 else:
174 queryTranscript.setTagValue("nbOverlaps", "0")
186 return queryTranscript 175 return queryTranscript
187 176
188 def compare(self): 177 def compare(self):
189 progress = UnlimitedProgress(10000, "Comparing queries", self.verbosity) 178 progress = UnlimitedProgress(10000, "Comparing queries", self.verbosity)
190 for queryTranscript in self.queryParser.getIterator(): 179 for queryTranscript in self.queryParser.getIterator():
220 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") 209 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
221 parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") 210 parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")
222 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") 211 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
223 parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") 212 parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]")
224 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") 213 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
225 parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") 214 parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]")
226 parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") 215 parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]")
227 parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]") 216 parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]")
228 parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]") 217 parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]")
229 parser.add_option("-m", "--minOverlap", dest="minOverlap", action="store", default=False, type="int", help="min. #nt overlap [format: bool] [default: false]") 218 parser.add_option("-p", "--pcOverlapQuery", dest="pcOverlapQuery", action="store", default=False, type="int", help="min. % overlap of the query [format: bool] [default: false]")
230 parser.add_option("-p", "--pcOverlapQuery", dest="pcOverlapQuery", action="store", default=False, type="int", help="min. % overlap of the query [format: bool] [default: false]") 219 parser.add_option("-P", "--pcOverlapRef", dest="pcOverlapRef", action="store", default=False, type="int", help="min. % overlap of the reference [format: bool] [default: false]")
231 parser.add_option("-P", "--pcOverlapRef", dest="pcOverlapRef", action="store", default=False, type="int", help="min. % overlap of the reference [format: bool] [default: false]") 220 parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]")
232 parser.add_option("-k", "--included", dest="included", action="store_true", default=False, help="provide query elements which are nested in reference elements [format: bool] [default: false]") 221 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
233 parser.add_option("-K", "--including", dest="including", action="store_true", default=False, help="provide query elements in which reference elements are nested [format: bool] [default: false]")
234 parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]")
235 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
236 (options, args) = parser.parse_args() 222 (options, args) = parser.parse_args()
237 223
238 cosr = CompareOverlappingSmallRef(options.verbosity) 224 cosr = CompareOverlappingSmallRef(options.verbosity)
239 cosr.setQueryFile(options.inputFileName1, options.format1) 225 cosr.setQueryFile(options.inputFileName1, options.format1)
240 cosr.setReferenceFile(options.inputFileName2, options.format2) 226 cosr.setReferenceFile(options.inputFileName2, options.format2)
241 cosr.setOutputFile(options.outputFileName) 227 cosr.setOutputFile(options.outputFileName)
242 cosr.includeNotOverlapping(options.notOverlapping) 228 cosr.includeNotOverlapping(options.notOverlapping)
243 cosr.setDistance(options.distance) 229 cosr.setDistance(options.distance)
244 cosr.setAntisense(options.antisense) 230 cosr.setAntisense(options.antisense)
245 cosr.setInclude(options.included, options.including)
246 cosr.setInvert(options.exclude) 231 cosr.setInvert(options.exclude)
247 cosr.setMinOverlap(options.minOverlap)
248 cosr.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef) 232 cosr.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)
249 cosr.run() 233 cosr.run()
250 234