# HG changeset patch # User m-zytnicki # Date 1363253140 14400 # Node ID c081f25e1572e76296ba8acfd7d8abbc6b40de2e # Parent 1eb55963fe390177112e01e662ef1e10a7722ee5 Updated CompareOverlappingSmallRef.py diff -r 1eb55963fe39 -r c081f25e1572 SMART/Java/Python/CompareOverlappingSmallRef.py --- a/SMART/Java/Python/CompareOverlappingSmallRef.py Thu Mar 14 05:23:05 2013 -0400 +++ b/SMART/Java/Python/CompareOverlappingSmallRef.py Thu Mar 14 05:25:40 2013 -0400 @@ -72,6 +72,11 @@ self.antisense = False self.collinear = False self.distance = None + self.minOverlap = False + self.pcOverlapQuery = False + self.pcOverlapRef = False + self.included = False + self.including = False self.bins = {} self.notOverlapping = False @@ -100,6 +105,14 @@ def setInvert(self, boolean): self.invert = boolean + def setMinPercentOverlap(self, pcOverlapQuery, pcOverlapRef): + self.pcOverlapQuery = pcOverlapQuery + self.pcOverlapRef = pcOverlapRef + + def setInclude(self, included, including): + self.included = included + self.including = including + def includeNotOverlapping(self, boolean): self.notOverlapping = boolean @@ -133,6 +146,18 @@ return False if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection(): return False + if self.included and not queryTranscript.isIncluded(refTranscript): + return False + if self.including and not refTranscript.isIncluded(queryTranscript): + return False + querySize = queryTranscript.getSize() + if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)): + return False + refSize = refTranscript.getSize() + if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)): + return False + if self.minOverlap and not queryTranscript.overlapWithExon(refTranscript, self.minOverlap): + return False return True def _compareTranscript(self, queryTranscript): @@ -177,9 +202,10 @@ self.writer.close() def displayResults(self): - print "# queries: %d" % (self.nbQueries) - print "# refs: %d" % (self.nbRefs) - print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps) + if self.verbosity > 0: + print "# queries: %d" % (self.nbQueries) + print "# refs: %d" % (self.nbRefs) + print "# written: %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps) def run(self): self.loadRef() @@ -191,17 +217,22 @@ description = "Compare Overlapping Small Reference v1.0.1: Provide the queries that overlap with a reference, when the reference is small. [Category: Data Comparison]" parser = OptionParser(description = description) - parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") - parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") - parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") - parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") - parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") - parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") - parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") - parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]") - parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]") - parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]") - parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="query input file [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]") + parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of previous file [compulsory] [format: transcript file format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]") + parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") + parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="accept some distance between query and reference [format: int]") + parser.add_option("-c", "--collinear", dest="collinear", action="store_true", default=False, help="provide collinear features [format: bool] [default: false]") + parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="provide antisense features [format: bool] [default: false]") + parser.add_option("-m", "--minOverlap", dest="minOverlap", action="store", default=False, type="int", help="min. #nt overlap [format: bool] [default: false]") + parser.add_option("-p", "--pcOverlapQuery", dest="pcOverlapQuery", action="store", default=False, type="int", help="min. % overlap of the query [format: bool] [default: false]") + parser.add_option("-P", "--pcOverlapRef", dest="pcOverlapRef", action="store", default=False, type="int", help="min. % overlap of the reference [format: bool] [default: false]") + parser.add_option("-k", "--included", dest="included", action="store_true", default=False, help="provide query elements which are nested in reference elements [format: bool] [default: false]") + parser.add_option("-K", "--including", dest="including", action="store_true", default=False, help="provide query elements in which reference elements are nested [format: bool] [default: false]") + parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") (options, args) = parser.parse_args() cosr = CompareOverlappingSmallRef(options.verbosity) @@ -211,7 +242,9 @@ cosr.includeNotOverlapping(options.notOverlapping) cosr.setDistance(options.distance) cosr.setAntisense(options.antisense) + cosr.setInclude(options.included, options.including) cosr.setInvert(options.exclude) - cosr.setInvert(options.exclude) + cosr.setMinOverlap(options.minOverlap) + cosr.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef) cosr.run()