changeset 60:90f4b29d884f

Uploaded
author m-zytnicki
date Fri, 21 Feb 2014 08:32:36 -0500
parents 2a4884ba3e5c
children 3a52eb875640
files SMART/Java/Python/CompareOverlappingSmallRef.py SMART/Java/Python/clusterize.py SMART/Java/Python/misc/RPlotter.pyc SMART/Java/Python/structure/TranscriptContainer.pyc SMART/galaxy/.CompareOverlappingSmallRef.xml.swp SMART/galaxy/Clusterize.xml SMART/galaxy/CompareOverlappingAdapt.xml SMART/galaxy/CompareOverlappingSmallQuery.xml SMART/galaxy/CompareOverlappingSmallRef.xml
diffstat 9 files changed, 135 insertions(+), 283 deletions(-) [+]
line wrap: on
line diff
--- a/SMART/Java/Python/CompareOverlappingSmallRef.py	Mon Feb 10 03:39:09 2014 -0500
+++ b/SMART/Java/Python/CompareOverlappingSmallRef.py	Fri Feb 21 08:32:36 2014 -0500
@@ -235,6 +235,7 @@
 	cosr.includeNotOverlapping(options.notOverlapping)
 	cosr.setDistance(options.distance)
 	cosr.setAntisense(options.antisense)
+	cosr.setCollinear(options.collinear)
 	cosr.setInvert(options.exclude)
 	cosr.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)
 	cosr.setMinOverlap(options.minOverlap)
--- a/SMART/Java/Python/clusterize.py	Mon Feb 10 03:39:09 2014 -0500
+++ b/SMART/Java/Python/clusterize.py	Fri Feb 21 08:32:36 2014 -0500
@@ -33,6 +33,7 @@
 
 import os, os.path, random
 from optparse import OptionParser
+from heapq import heappush, heappop
 from commons.core.parsing.ParserChooser import ParserChooser
 from commons.core.writer.Gff3Writer import Gff3Writer
 from SMART.Java.Python.structure.Transcript import Transcript
@@ -44,26 +45,31 @@
 class Clusterize(object):
 
 	def __init__(self, verbosity):
-		self.normalize		 = False
-		self.presorted		 = False
-		self.distance		  = 1
-		self.colinear		  = False
-		self.nbWritten		 = 0
-		self.nbMerges		  = 0
-		self.verbosity		 = verbosity
+		self.parsers           = {}
+		self.sortedFileNames   = {}
+		self.normalize         = False
+		self.presorted         = False
+		self.distance          = 1
+		self.collinear         = False
+		self.nbWritten         = 0
+		self.nbMerges          = 0
+		self.verbosity         = verbosity
 		self.splittedFileNames = {}
+		self.chromosomes       = set()
 
 	def __del__(self):
-		for fileName in self.splittedFileNames.values():
-			os.remove(fileName)
+		for fileName1 in self.splittedFileNames:
+			for fileName2 in self.splittedFileNames[fileName1].values():
+				os.remove(fileName2)
 
-	def setInputFile(self, fileName, format):
+	def setInputFiles(self, fileNames, format):
 		parserChooser = ParserChooser(self.verbosity)
 		parserChooser.findFormat(format)
-		self.parser = parserChooser.getParser(fileName)
-		self.sortedFileName = "%s_sorted_%d.pkl" % (os.path.splitext(fileName)[0], random.randint(1, 100000))
-		if "SMARTTMPPATH" in os.environ:
-			self.sortedFileName = os.path.join(os.environ["SMARTTMPPATH"], os.path.basename(self.sortedFileName))
+		for fileName in fileNames:
+			self.parsers[fileName] = parserChooser.getParser(fileName)
+			self.sortedFileNames[fileName] = "%s_sorted_%d.pkl" % (os.path.splitext(fileName)[0], random.randint(1, 100000))
+			if "SMARTTMPPATH" in os.environ:
+				self.sortedFileNames[fileName] = os.path.join(os.environ["SMARTTMPPATH"], os.path.basename(self.sortedFileNames[fileName]))
 
 	def setOutputFileName(self, fileName, format="gff3", title="S-MART", feature="transcript", featurePart="exon"):
 		writerChooser = WriterChooser()
@@ -76,8 +82,8 @@
 	def setDistance(self, distance):
 		self.distance = distance
 
-	def setColinear(self, colinear):
-		self.colinear = colinear
+	def setColinear(self, collinear):
+		self.collinear = collinear
 
 	def setNormalize(self, normalize):
 		self.normalize = normalize
@@ -85,42 +91,60 @@
 	def setPresorted(self, presorted):
 		self.presorted = presorted
 
-	def _sortFile(self):
+	def _sortFiles(self):
 		if self.presorted:
 			return
-		fs = FileSorter(self.parser, self.verbosity-4)
-		fs.perChromosome(True)
-		fs.setPresorted(self.presorted)
-		fs.setOutputFileName(self.sortedFileName)
-		fs.sort()
-		self.splittedFileNames       = fs.getOutputFileNames()
-		self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()
-		self.nbElements              = fs.getNbElements()
+		for fileName, parser in self.parsers.iteritems():
+			fs = FileSorter(parser, self.verbosity-4)
+			fs.perChromosome(True)
+			fs.setPresorted(self.presorted)
+			fs.setOutputFileName(self.sortedFileNames[fileName])
+			fs.sort()
+			self.splittedFileNames[fileName] = fs.getOutputFileNames()
+			self.nbElementsPerChromosome     = fs.getNbElementsPerChromosome()
+			self.nbElements                  = fs.getNbElements()
+			self.chromosomes.update(self.splittedFileNames[fileName].keys())
 		
-	def _iterate(self, chromosome):
-		if chromosome == None:
-			progress = UnlimitedProgress(10000, "Reading input file", self.verbosity)
-			parser   = self.parser
-		else:
-			progress = Progress(self.nbElementsPerChromosome[chromosome], "Checking chromosome %s" % (chromosome), self.verbosity)
-			parser   = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)
+	def _iterate(self):
+		progress = UnlimitedProgress(10000, "Reading input file", self.verbosity)
 		transcripts = []
-		for newTranscript in parser.getIterator():
-			newTranscripts = []
-			if newTranscript.__class__.__name__ == "Mapping":
-				newTranscript = newTranscript.getTranscript()
-			for oldTranscript in transcripts:
-				if self._checkOverlap(newTranscript, oldTranscript):
-					self._merge(newTranscript, oldTranscript)
-				elif self._checkPassed(newTranscript, oldTranscript):
-					self._write(oldTranscript)
-				else:
-					newTranscripts.append(oldTranscript)
-			newTranscripts.append(newTranscript)
-			transcripts = newTranscripts
-			progress.inc()
-		for transcript in transcripts:
-			self._write(transcript)
+		heap        = []
+		parsersSets = []
+		if self.chromosomes:
+			parsersSets.append(self.parsers.values())
+		else:
+			for chromosome in self.chromosomes:
+				parsersSets.append([self.splittedFileNames[fileName][chromosome] for fileName in self.splittedFileNames if chromosome in self.splittedFileNames[fileName]])
+		for parsers in parsersSets:
+			for parser in parsers:
+				iterator = parser.getIterator()
+				for transcript in iterator:
+					if transcript.__class__.__name__ == "Mapping":
+						transcript = transcript.getTranscript()
+					heappush(heap, (transcript.getChromosome(), transcript.getStart(), -transcript.getEnd(), transcript, iterator))
+					break
+			while heap:
+				chromosome, start, end, newTranscript, iterator = heappop(heap)
+				for transcript in iterator:
+					if transcript.__class__.__name__ == "Mapping":
+						transcript = transcript.getTranscript()
+					heappush(heap, (transcript.getChromosome(), transcript.getStart(), -transcript.getEnd(), transcript, iterator))
+					break
+				newTranscripts = []
+				if newTranscript.__class__.__name__ == "Mapping":
+					newTranscript = newTranscript.getTranscript()
+				for oldTranscript in transcripts:
+					if self._checkOverlap(newTranscript, oldTranscript):
+						self._merge(newTranscript, oldTranscript)
+					elif self._checkPassed(newTranscript, oldTranscript):
+						self._write(oldTranscript)
+					else:
+						newTranscripts.append(oldTranscript)
+				newTranscripts.append(newTranscript)
+				transcripts = newTranscripts
+				progress.inc()
+			for transcript in transcripts:
+				self._write(transcript)
 		progress.done()
 
 	def _merge(self, transcript1, transcript2):
@@ -135,7 +159,7 @@
 	def _checkOverlap(self, transcript1, transcript2):
 		if transcript1.getChromosome() != transcript2.getChromosome():
 			return False
-		if self.colinear and transcript1.getDirection() != transcript2.getDirection():
+		if self.collinear and transcript1.getDirection() != transcript2.getDirection():
 			return False
 		if transcript1.getDistance(transcript2) > self.distance:
 			return False
@@ -145,12 +169,8 @@
 		return ((transcript1.getChromosome() != transcript2.getChromosome()) or (transcript1.getDistance(transcript2) > self.distance))
 
 	def run(self):
-		self._sortFile()
-		if self.presorted:
-			self._iterate(None)
-		else:
-			for chromosome in sorted(self.splittedFileNames.keys()):
-				self._iterate(chromosome)
+		self._sortFiles()
+		self._iterate()
 		self.writer.close()
 		if self.verbosity > 0:
 			print "# input:   %d" % (self.nbElements)
@@ -162,21 +182,21 @@
 	description = "Clusterize v1.0.3: clusterize the data which overlap. [Category: Merge]"
 
 	parser = OptionParser(description = description)
-	parser.add_option("-i", "--input",     dest="inputFileName",  action="store",				     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-	parser.add_option("-f", "--format",    dest="format",		 action="store",				     type="string", help="format of file [format: transcript file format]")
-	parser.add_option("-o", "--output",    dest="outputFileName", action="store",				     type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
-	parser.add_option("-u", "--outputFormat", dest="outputFormat", action="store",     default="gff",		     type="string", help="output file format [format: transcript file format]")
-	parser.add_option("-c", "--colinear",  dest="colinear",       action="store_true", default=False,				help="merge colinear transcripts only [format: bool] [default: false]")
-	parser.add_option("-d", "--distance",  dest="distance",       action="store",      default=0,     type="int",    help="max. distance between two transcripts to be merged [format: int] [default: 0]")
-	parser.add_option("-n", "--normalize", dest="normalize",      action="store_true", default=False,				help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
-	parser.add_option("-s", "--sorted",    dest="sorted",		 action="store_true", default=False,				help="input is already sorted [format: bool] [default: false]")
-	parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
+	parser.add_option("-i", "--inputs",       dest="inputFileNames", action="store",				     type="string", help="input files (separated by commas) [compulsory] [format: string]")
+	parser.add_option("-f", "--format",       dest="format",		 action="store",				     type="string", help="format of file [format: transcript file format]")
+	parser.add_option("-o", "--output",       dest="outputFileName", action="store",				     type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
+	parser.add_option("-u", "--outputFormat", dest="outputFormat",   action="store",      default="gff",		        type="string", help="output file format [format: transcript file format]")
+	parser.add_option("-c", "--collinear",    dest="collinear",      action="store_true", default=False,				help="merge collinear transcripts only [format: bool] [default: false]")
+	parser.add_option("-d", "--distance",     dest="distance",       action="store",      default=0,     type="int",    help="max. distance between two transcripts to be merged [format: int] [default: 0]")
+	parser.add_option("-n", "--normalize",    dest="normalize",      action="store_true", default=False,				help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
+	parser.add_option("-s", "--sorted",       dest="sorted",		 action="store_true", default=False,				help="input is already sorted [format: bool] [default: false]")
+	parser.add_option("-v", "--verbosity",    dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
 	(options, args) = parser.parse_args()
 
 	c = Clusterize(options.verbosity)
-	c.setInputFile(options.inputFileName, options.format)
+	c.setInputFiles(options.inputFileNames.split(","), options.format)
 	c.setOutputFileName(options.outputFileName, options.outputFormat)
-	c.setColinear(options.colinear)
+	c.setColinear(options.collinear)
 	c.setDistance(options.distance)
 	c.setNormalize(options.normalize)
 	c.setPresorted(options.sorted)
Binary file SMART/Java/Python/misc/RPlotter.pyc has changed
Binary file SMART/Java/Python/structure/TranscriptContainer.pyc has changed
Binary file SMART/galaxy/.CompareOverlappingSmallRef.xml.swp has changed
--- a/SMART/galaxy/Clusterize.xml	Mon Feb 10 03:39:09 2014 -0500
+++ b/SMART/galaxy/Clusterize.xml	Fri Feb 21 08:32:36 2014 -0500
@@ -4,26 +4,8 @@
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
 	<command interpreter="python">
-		../Java/Python/clusterize.py -i $formatType.inputFileName
-		#if $formatType.FormatInputFileName == 'bed':
-			-f bed
-		#elif $formatType.FormatInputFileName == 'gff':
-			-f gff
-		#elif $formatType.FormatInputFileName == 'gff2':
-			-f gff2
-		#elif $formatType.FormatInputFileName == 'gff3':
-			-f gff3
-		#elif $formatType.FormatInputFileName == 'sam':
-			-f sam
-		#elif $formatType.FormatInputFileName == 'bam':
-			-f bam
-		#elif $formatType.FormatInputFileName == 'gtf':
-			-f gtf
-		#end if
-		-o $outputFileGff 
-		$colinear
-		$normalize
-		-d $distance
+		#set $inputFiles = ",".join(["%s" % (s) for s in $formatType.inputFileName])
+		../Java/Python/clusterize.py -i $inputFiles -f $formatType.FormatInputFileName -o $outputFileGff $colinear $normalize -d $distance
 	</command>
 
 	<inputs>
@@ -34,29 +16,29 @@
 				<option value="gff2">gff2</option>
 				<option value="gff3">gff3</option>
 				<option value="sam">sam</option>
+				<option value="gtf">gtf</option>
 				<option value="bam">bam</option>
-				<option value="gtf">gtf</option>
 			</param>
 			<when value="bed">
-				<param name="inputFileName" format="bed" type="data" label="Input File"/>
+				<param name="inputFileName" format="bed" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="gff">
-				<param name="inputFileName" format="gff" type="data" label="Input File"/>
+				<param name="inputFileName" format="gff" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="gff2">
-				<param name="inputFileName" format="gff2" type="data" label="Input File"/>
+				<param name="inputFileName" format="gff2" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="gff3">
-				<param name="inputFileName" format="gff3" type="data" label="Input File"/>
+				<param name="inputFileName" format="gff3" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="sam">
-				<param name="inputFileName" format="sam" type="data" label="Input File"/>
+				<param name="inputFileName" format="sam" type="data" multiple="true" label="Input File"/>
+			</when>
+			<when value="gtf">
+				<param name="inputFileName" format="gtf" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="bam">
-				<param name="inputFileName" format="bam" type="data" label="Input File"/>
-			</when>
-			<when value="gtf">
-				<param name="inputFileName" format="gtf" type="data" label="Input File"/>
+				<param name="inputFileName" format="bam" type="data" multiple="true" label="Input File"/>
 			</when>
 		</conditional>
 
--- a/SMART/galaxy/CompareOverlappingAdapt.xml	Mon Feb 10 03:39:09 2014 -0500
+++ b/SMART/galaxy/CompareOverlappingAdapt.xml	Fri Feb 21 08:32:36 2014 -0500
@@ -4,45 +4,15 @@
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
 	<command interpreter="python">
-		../Java/Python/CompareOverlappingAdapt.py -i $formatType.inputFileName1 
-		#if $formatType.FormatInputFileName1 == 'bed':  
-		-f bed
-		#elif $formatType.FormatInputFileName1 == 'gff':
-			-f gff	
-		#elif $formatType.FormatInputFileName1 == 'gff2':
-			-f gff2
-		#elif $formatType.FormatInputFileName1 == 'gff3':
-			-f gff3
-		#elif $formatType.FormatInputFileName1 == 'sam':
-			-f sam
-		#elif $formatType.FormatInputFileName1 == 'gtf':
-			-f gtf
-		#end if
-		-j $formatType2.inputFileName2
-		#if $formatType2.FormatInputFileName2 == 'bed':
-			-g bed
-		#elif $formatType2.FormatInputFileName2 == 'gff':
-			-g gff	
-		#elif $formatType2.FormatInputFileName2 == 'gff2':
-			-g gff2
-		#elif $formatType2.FormatInputFileName2 == 'gff3':
-			-g gff3
-		#elif $formatType2.FormatInputFileName2 == 'sam':
-			-g sam
-		#elif $formatType2.FormatInputFileName2 == 'gtf':
-		    -g gtf
-		#end if
-		-o $outputFileGff 
+		../Java/Python/CompareOverlappingAdapt.py -i $formatType.inputFileName1 -f $formatType.FormatInputFileName1 -j $formatType2.inputFileName2 -g $formatType2.FormatInputFileName2 -o $outputFileGff $InvertMatch $NotOverlapping -m $NbOverlap
 		#if $OptionDistance.Dist == 'Yes':
 			-d $OptionDistance.distance
 		#end if
-		#if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
+		#if $OptionCA == 'Collinear':
 			-c 
-		#elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
+		#elif $OptionCA == 'AntiSense':
 			-a
 		#end if	
-		$InvertMatch
-		$NotOverlapping
 	</command>
 
 	<inputs>
@@ -114,21 +84,14 @@
 			<when value="No">
 			</when>
 		</conditional>
-		<conditional name="OptionCollinearOrAntiSens">
-			<param name="OptionCA" type="select" label="Collinear or anti-sens">
-				<option value="Collinear">Collinear</option>
-				<option value="AntiSens">AntiSens</option>
-				<option value="NONE" selected="true">NONE</option>
-			</param>
-			<when value="Collinear">
-			</when>
-			<when value="AntiSens">
-			</when>
-			<when value="NONE">
-			</when>
-		</conditional>
+		<param name="OptionCA" type="select" label="Collinear or anti-sense features only">
+			<option value="Collinear">Collinear</option>
+			<option value="AntiSense">AntiSense</option>
+			<option value="All" selected="true">All</option>
+		</param>
 		<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
 		<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
+		<param name="NbOverlap" type="integer" value="1" label="Min. # of overlapping nt. to declare an overlap."/>
 	</inputs>
 
 	<outputs>
--- a/SMART/galaxy/CompareOverlappingSmallQuery.xml	Mon Feb 10 03:39:09 2014 -0500
+++ b/SMART/galaxy/CompareOverlappingSmallQuery.xml	Fri Feb 21 08:32:36 2014 -0500
@@ -4,60 +4,21 @@
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
 	<command interpreter="python">
-		../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 
-		#if $formatType.FormatInputFileName1 == 'bed':  
-		-f bed
-		#elif $formatType.FormatInputFileName1 == 'gff':
-			-f gff	
-		#elif $formatType.FormatInputFileName1 == 'gff2':
-			-f gff2
-		#elif $formatType.FormatInputFileName1 == 'gff3':
-			-f gff3
-		#elif $formatType.FormatInputFileName1 == 'sam':
-			-f sam
-		#elif $formatType.FormatInputFileName1 == 'bam':
-			-f bam
-		#elif $formatType.FormatInputFileName1 == 'gtf':
-			-f gtf
-		#end if
-		-j $formatType2.inputFileName2
-		#if $formatType2.FormatInputFileName2 == 'bed':
-			-g bed
-		#elif $formatType2.FormatInputFileName2 == 'gff':
-			-g gff	
-		#elif $formatType2.FormatInputFileName2 == 'gff2':
-			-g gff2
-		#elif $formatType2.FormatInputFileName2 == 'gff3':
-			-g gff3
-		#elif $formatType2.FormatInputFileName2 == 'sam':
-			-g sam
-		#elif $formatType2.FormatInputFileName2 == 'bam':
-			-g bam
-		#elif $formatType2.FormatInputFileName2 == 'gtf':
-		    -g gtf
-		#end if
-		-o $outputFileGff 
+		../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 -f $formatType.FormatInputFileName1 -j $formatType2.inputFileName2 -g $formatType2.FormatInputFileName2 -o $outputFileGff $InvertMatch $NotOverlapping $OptionInclusionQuery $OptionInclusionRef -m $OptionMinOverlap
 		#if $OptionDistance.Dist == 'Yes':
 			-d $OptionDistance.distance
 		#end if
-		#if $OptionMinOverlap.present == 'Yes':
-			-m $OptionMinOverlap.minOverlap
-		#end if
 		#if $OptionPcOverlapQuery.present == 'Yes':
 			-p $OptionPcOverlapQuery.minOverlap
 		#end if
 		#if $OptionPcOverlapRef.present == 'Yes':
 			-P $OptionPcOverlapRef.minOverlap
 		#end if
-		#if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
+		#if $OptionCA == 'Collinear':
 			-c 
-		#elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
+		#elif $OptionCA == 'AntiSense':
 			-a
 		#end if	
-		$InvertMatch
-		$NotOverlapping
-		$OptionInclusionQuery
-		$OptionInclusionRef
 	</command>
 
 	<inputs>
@@ -91,7 +52,7 @@
 			</when>
 			<when value="gtf">
 				<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
-								                        </when>
+			</when>
 		</conditional>
 
 		<conditional name="formatType2">
@@ -137,24 +98,14 @@
 			<when value="No">
 			</when>
 		</conditional>
-		<conditional name="OptionMinOverlap">
-			<param name="present" type="select" label="Minimum number of common nucleotides to declare an overlap">
-				<option value="Yes">Yes</option>
-				<option value="No" selected="true">No</option>
-			</param>
-			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
-			</when>
-			<when value="No">
-			</when>
-		</conditional>
+		<param name="OptionMinOverlap" type="integer" value="1" label="Min. # of overlapping nt. to declare an overlap."/>
 		<conditional name="OptionPcOverlapQuery">
 			<param name="present" type="select" label="N% of the query must overlap">
 				<option value="Yes">Yes</option>
 				<option value="No" selected="true">No</option>
 			</param>
 			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
+				<param name="minOverlap" type="integer" value="100"/>
 			</when>
 			<when value="No">
 			</when>
@@ -165,26 +116,18 @@
 				<option value="No" selected="true">No</option>
 			</param>
 			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
+				<param name="minOverlap" type="integer" value="100"/>
 			</when>
 			<when value="No">
 			</when>
 		</conditional>
-		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
+		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a reference"/>
 		<param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
-		<conditional name="OptionCollinearOrAntiSens">
-			<param name="OptionCA" type="select" label="Collinear or anti-sens">
-				<option value="Collinear">Collinear</option>
-				<option value="AntiSens">AntiSens</option>
-				<option value="NONE" selected="true">NONE</option>
-			</param>
-			<when value="Collinear">
-			</when>
-			<when value="AntiSens">
-			</when>
-			<when value="NONE">
-			</when>
-		</conditional>
+		<param name="OptionCA" type="select" label="Collinear or anti-sense only">
+			<option value="Collinear">Collinear</option>
+			<option value="AntiSense">AntiSense</option>
+			<option value="All" selected="true">All</option>
+		</param>
 		<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
 		<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
 	</inputs>
--- a/SMART/galaxy/CompareOverlappingSmallRef.xml	Mon Feb 10 03:39:09 2014 -0500
+++ b/SMART/galaxy/CompareOverlappingSmallRef.xml	Fri Feb 21 08:32:36 2014 -0500
@@ -1,63 +1,24 @@
 <tool id="CompareOverlappingSmallRef" name="compare overlapping small reference">
-	<description>Provide the queries that overlap with a reference, when the reference dataset is small.</description>  
+	<description>Provide the queries that overlap with a reference, when the query data set is small.</description>  
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
 	<command interpreter="python">
-		../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 
-		#if $formatType.FormatInputFileName1 == 'bed':  
-		-f bed
-		#elif $formatType.FormatInputFileName1 == 'gff':
-			-f gff	
-		#elif $formatType.FormatInputFileName1 == 'gff2':
-			-f gff2
-		#elif $formatType.FormatInputFileName1 == 'gff3':
-			-f gff3
-		#elif $formatType.FormatInputFileName1 == 'sam':
-			-f sam
-		#elif $formatType.FormatInputFileName1 == 'bam':
-			-f bam
-		#elif $formatType.FormatInputFileName1 == 'gtf':
-			-f gtf
-		#end if
-		-j $formatType2.inputFileName2
-		#if $formatType2.FormatInputFileName2 == 'bed':
-			-g bed
-		#elif $formatType2.FormatInputFileName2 == 'gff':
-			-g gff	
-		#elif $formatType2.FormatInputFileName2 == 'gff2':
-			-g gff2
-		#elif $formatType2.FormatInputFileName2 == 'gff3':
-			-g gff3
-		#elif $formatType2.FormatInputFileName2 == 'sam':
-			-g sam
-		#elif $formatType2.FormatInputFileName2 == 'bam':
-			-g bam
-		#elif $formatType2.FormatInputFileName2 == 'gtf':
-		    -g gtf
-		#end if
-		-o $outputFileGff 
+		../Java/Python/CompareOverlappingSmallRef.py -i $formatType.inputFileName1 -f $formatType.FormatInputFileName1 -j $formatType2.inputFileName2 -g $formatType2.FormatInputFileName2 -o $outputFileGff $InvertMatch $NotOverlapping $OptionInclusionQuery $OptionInclusionRef -m $OptionMinOverlap
 		#if $OptionDistance.Dist == 'Yes':
 			-d $OptionDistance.distance
 		#end if
-		#if $OptionMinOverlap.present == 'Yes':
-			-m $OptionMinOverlap.minOverlap
-		#end if
 		#if $OptionPcOverlapQuery.present == 'Yes':
 			-p $OptionPcOverlapQuery.minOverlap
 		#end if
 		#if $OptionPcOverlapRef.present == 'Yes':
 			-P $OptionPcOverlapRef.minOverlap
 		#end if
-		#if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
+		#if $OptionCA == 'Collinear':
 			-c 
-		#elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
+		#elif $OptionCA == 'AntiSense':
 			-a
 		#end if	
-		$InvertMatch
-		$NotOverlapping
-		$OptionInclusionQuery
-		$OptionInclusionRef
 	</command>
 
 	<inputs>
@@ -91,7 +52,7 @@
 			</when>
 			<when value="gtf">
 				<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
-								                        </when>
+			</when>
 		</conditional>
 
 		<conditional name="formatType2">
@@ -137,24 +98,14 @@
 			<when value="No">
 			</when>
 		</conditional>
-		<conditional name="OptionMinOverlap">
-			<param name="present" type="select" label="Minimum number of common nucleotides to declare an overlap">
-				<option value="Yes">Yes</option>
-				<option value="No" selected="true">No</option>
-			</param>
-			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
-			</when>
-			<when value="No">
-			</when>
-		</conditional>
+		<param name="OptionMinOverlap" type="integer" value="1" label="Min. # of overlapping nt. to declare an overlap."/>
 		<conditional name="OptionPcOverlapQuery">
 			<param name="present" type="select" label="N% of the query must overlap">
 				<option value="Yes">Yes</option>
 				<option value="No" selected="true">No</option>
 			</param>
 			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
+				<param name="minOverlap" type="integer" value="100"/>
 			</when>
 			<when value="No">
 			</when>
@@ -165,26 +116,18 @@
 				<option value="No" selected="true">No</option>
 			</param>
 			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
+				<param name="minOverlap" type="integer" value="100"/>
 			</when>
 			<when value="No">
 			</when>
 		</conditional>
-		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
+		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a reference"/>
 		<param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
-		<conditional name="OptionCollinearOrAntiSens">
-			<param name="OptionCA" type="select" label="Collinear or anti-sens">
-				<option value="Collinear">Collinear</option>
-				<option value="AntiSens">AntiSens</option>
-				<option value="NONE" selected="true">NONE</option>
-			</param>
-			<when value="Collinear">
-			</when>
-			<when value="AntiSens">
-			</when>
-			<when value="NONE">
-			</when>
-		</conditional>
+		<param name="OptionCA" type="select" label="Collinear or anti-sense only">
+			<option value="Collinear">Collinear</option>
+			<option value="AntiSense">AntiSense</option>
+			<option value="All" selected="true">All</option>
+		</param>
 		<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
 		<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
 	</inputs>