Repository 's_mart'
hg clone https://toolshed.g2.bx.psu.edu/repos/yufei-luo/s_mart

Changeset 46:169d364ddd91 (2013-09-30)
Previous changeset 45:e454402ba9d9 (2013-09-18) Next changeset 47:b6481845eb0d (2013-09-30)
Commit message:
Uploaded
modified:
SMART/Java/Python/CompareOverlapping.py
SMART/Java/Python/CompareOverlappingSmallQuery.py
SMART/Java/Python/CompareOverlappingSmallRef.py
SMART/Java/Python/ComputeCoverage.py
SMART/Java/Python/CountLoci.py
SMART/Java/Python/GetDistribution.py
SMART/Java/Python/GetFlanking.py
SMART/Java/Python/GetReadSizes.py
SMART/Java/Python/cleanGff.py
SMART/Java/Python/cleaning/GffCleaner.py
SMART/Java/Python/clusterize.py
SMART/Java/Python/getRandomRegions.py
SMART/Java/Python/mySql/MySqlConnection.py
SMART/Java/Python/mySql/MySqlQuery.py
SMART/Java/Python/mySql/MySqlTable.py
SMART/Java/Python/mySql/MySqlTranscriptTable.py
SMART/Java/Python/ncList/NCList.py
SMART/Java/Python/plot.py
SMART/Java/Python/plotCoverage.py
SMART/Java/Python/structure/Interval.py
SMART/Java/Python/structure/Transcript.py
commons/core/parsing/FastaParser.py
commons/core/parsing/WigParser.py
commons/core/writer/MySqlTranscriptWriter.py
tool_conf.xml
tool_dependencies.xml
added:
SMART/galaxy/CompareOverlappingAdapt.xml
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/CompareOverlapping.py
--- a/SMART/Java/Python/CompareOverlapping.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/CompareOverlapping.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -206,6 +206,8 @@
  if self._verbosity > 2:
  print "Creating %s NC-list..." % (TYPETOSTRING[type])
  self._convertedFileNames[type] = "%s_%d_%d.ncl" % (self._inputFileNames[type], self._randInt, type)
+ if "SMARTTMPPATH" in os.environ:
+ self._convertedFileNames[type] = os.path.join(os.environ["SMARTTMPPATH"], self._convertedFileNames[type])
  ncLists = ConvertToNCList(self._verbosity)
  ncLists.setInputFileName(self._inputFileNames[type], self._inputFileFormats[type])
  ncLists.setOutputFileName(self._convertedFileNames[type])
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/CompareOverlappingSmallQuery.py
--- a/SMART/Java/Python/CompareOverlappingSmallQuery.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/CompareOverlappingSmallQuery.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -74,9 +74,6 @@
  self.collinear      = False
  self.pcOverlapQuery = False
  self.pcOverlapRef   = False
- self.minOverlap     = False
- self.included       = False
- self.including      = False
  self.bins         = {}
  self.overlaps       = {}
  self.notOverlapping = False
@@ -110,13 +107,6 @@
  self.pcOverlapQuery = pcOverlapQuery
  self.pcOverlapRef   = pcOverlapRef
 
- def setMinOverlap(self, minOverlap):
- self.minOverlap = minOverlap
-
- def setInclude(self, included, including):
- self.included  = included
- self.including = including
-
  def includeNotOverlapping(self, boolean):
  self.notOverlapping = boolean
 
@@ -145,18 +135,12 @@
  return False
  if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection():
  return False
- if self.included and not refTranscript.include(queryTranscript):
- return False
- if self.including and not queryTranscript.include(refTranscript):
- return False
  querySize = queryTranscript.getSize()
  if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)):
  return False
  refSize = refTranscript.getSize()
  if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)):
  return False
- if self.minOverlap and not queryTranscript.overlapWithExon(refTranscript, self.minOverlap):
- return False
  return True
 
  def _alterTranscript(self, transcript, type):
@@ -237,11 +221,8 @@
  parser.add_option("-d", "--distance", dest="distance",    action="store",     default=0,    type="int",  help="accept some distance between query and reference [format: int]")
  parser.add_option("-c", "--collinear", dest="collinear",    action="store_true", default=False,    help="provide collinear features [format: bool] [default: false]")
  parser.add_option("-a", "--antisense", dest="antisense",    action="store_true", default=False,    help="provide antisense features [format: bool] [default: false]")
- parser.add_option("-m", "--minOverlap",     dest="minOverlap",     action="store",      default=False, type="int",  help="min. #nt overlap [format: bool] [default: false]")
  parser.add_option("-p", "--pcOverlapQuery", dest="pcOverlapQuery", action="store",      default=False, type="int",  help="min. % overlap of the query [format: bool] [default: false]")
  parser.add_option("-P", "--pcOverlapRef", dest="pcOverlapRef",   action="store",      default=False, type="int",   help="min. % overlap of the reference [format: bool] [default: false]")
- parser.add_option("-k", "--included", dest="included",    action="store_true", default=False,    help="provide query elements which are nested in reference elements [format: bool] [default: false]")
- parser.add_option("-K", "--including", dest="including",    action="store_true", default=False,    help="provide query elements in which reference elements are nested [format: bool] [default: false]")
  parser.add_option("-x", "--exclude", dest="exclude",    action="store_true", default=False,    help="invert the match [format: bool] [default: false]")
  parser.add_option("-v", "--verbosity",      dest="verbosity",    action="store",      default=1,     type="int",  help="trace level [format: int]")
  (options, args) = parser.parse_args()
@@ -255,7 +236,5 @@
  cosq.setCollinear(options.collinear)
  cosq.setAntisense(options.antisense)
  cosq.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)
- cosq.setMinOverlap(options.minOverlap)
- cosq.setInclude(options.included, options.including)
  cosq.setInvert(options.exclude)
  cosq.run()
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/CompareOverlappingSmallRef.py
--- a/SMART/Java/Python/CompareOverlappingSmallRef.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/CompareOverlappingSmallRef.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -72,11 +72,8 @@
  self.antisense      = False
  self.collinear      = False
  self.distance       = None
- self.minOverlap     = False
  self.pcOverlapQuery = False
  self.pcOverlapRef   = False
- self.included       = False
- self.including      = False
  self.bins         = {}
  self.notOverlapping = False
 
@@ -109,10 +106,6 @@
  self.pcOverlapQuery = pcOverlapQuery
  self.pcOverlapRef   = pcOverlapRef
 
- def setInclude(self, included, including):
- self.included  = included
- self.including = including
-
  def includeNotOverlapping(self, boolean):
  self.notOverlapping = boolean
 
@@ -146,18 +139,12 @@
  return False
  if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection():
  return False
- if self.included and not queryTranscript.isIncluded(refTranscript):
- return False
- if self.including and not refTranscript.isIncluded(queryTranscript):
- return False
  querySize = queryTranscript.getSize()
  if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)):
  return False
  refSize = refTranscript.getSize()
  if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)):
  return False
- if self.minOverlap and not queryTranscript.overlapWithExon(refTranscript, self.minOverlap):
- return False
  return True
 
  def _compareTranscript(self, queryTranscript):
@@ -180,9 +167,11 @@
  return overlaps
 
  def _updateTranscript(self, queryTranscript, overlaps):
- queryTranscript.setTagValue("nbOverlaps", sum(overlaps.values()))
  if overlaps:
+ queryTranscript.setTagValue("nbOverlaps", sum(overlaps.values()))
  queryTranscript.setTagValue("overlapsWith", "--".join(overlaps.keys())[:100])
+ else:
+ queryTranscript.setTagValue("nbOverlaps", "0")
  return queryTranscript
 
  def compare(self):
@@ -222,17 +211,14 @@
  parser.add_option("-j", "--input2",         dest="inputFileName2", action="store",            type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
  parser.add_option("-g", "--format2",        dest="format2",   action="store",            type="string", help="format of previous file [compulsory] [format: transcript file format]")
  parser.add_option("-o", "--output",         dest="outputFileName", action="store",            type="string", help="output file [format: output file in GFF3 format]")
- parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,   help="also output not overlapping data [format: bool] [default: false]")
- parser.add_option("-d", "--distance", dest="distance",    action="store",     default=0,    type="int",   help="accept some distance between query and reference [format: int]")
- parser.add_option("-c", "--collinear", dest="collinear",    action="store_true", default=False,     help="provide collinear features [format: bool] [default: false]")
- parser.add_option("-a", "--antisense", dest="antisense",    action="store_true", default=False,     help="provide antisense features [format: bool] [default: false]")
- parser.add_option("-m", "--minOverlap",     dest="minOverlap",     action="store",      default=False, type="int",   help="min. #nt overlap [format: bool] [default: false]")
- parser.add_option("-p", "--pcOverlapQuery", dest="pcOverlapQuery", action="store",      default=False, type="int",   help="min. % overlap of the query [format: bool] [default: false]")
- parser.add_option("-P", "--pcOverlapRef", dest="pcOverlapRef",   action="store",      default=False, type="int",    help="min. % overlap of the reference [format: bool] [default: false]")
- parser.add_option("-k", "--included", dest="included",    action="store_true", default=False,     help="provide query elements which are nested in reference elements [format: bool] [default: false]")
- parser.add_option("-K", "--including", dest="including",    action="store_true", default=False,     help="provide query elements in which reference elements are nested [format: bool] [default: false]")
- parser.add_option("-x", "--exclude", dest="exclude",    action="store_true", default=False,     help="invert the match [format: bool] [default: false]")
- parser.add_option("-v", "--verbosity",      dest="verbosity",    action="store",      default=1,     type="int",   help="trace level [format: int]")
+ parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,  help="also output not overlapping data [format: bool] [default: false]")
+ parser.add_option("-d", "--distance", dest="distance",    action="store",     default=0,    type="int",  help="accept some distance between query and reference [format: int]")
+ parser.add_option("-c", "--collinear", dest="collinear",    action="store_true", default=False,    help="provide collinear features [format: bool] [default: false]")
+ parser.add_option("-a", "--antisense", dest="antisense",    action="store_true", default=False,    help="provide antisense features [format: bool] [default: false]")
+ parser.add_option("-p", "--pcOverlapQuery", dest="pcOverlapQuery", action="store",      default=False, type="int",  help="min. % overlap of the query [format: bool] [default: false]")
+ parser.add_option("-P", "--pcOverlapRef", dest="pcOverlapRef",   action="store",      default=False, type="int",   help="min. % overlap of the reference [format: bool] [default: false]")
+ parser.add_option("-x", "--exclude", dest="exclude",    action="store_true", default=False,    help="invert the match [format: bool] [default: false]")
+ parser.add_option("-v", "--verbosity",      dest="verbosity",    action="store",      default=1,     type="int",  help="trace level [format: int]")
  (options, args) = parser.parse_args()
 
  cosr = CompareOverlappingSmallRef(options.verbosity)
@@ -242,9 +228,7 @@
  cosr.includeNotOverlapping(options.notOverlapping)
  cosr.setDistance(options.distance)
  cosr.setAntisense(options.antisense)
- cosr.setInclude(options.included, options.including)
  cosr.setInvert(options.exclude)
- cosr.setMinOverlap(options.minOverlap)
  cosr.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)
  cosr.run()
 
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/ComputeCoverage.py
--- a/SMART/Java/Python/ComputeCoverage.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/ComputeCoverage.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -130,7 +130,7 @@
  parser.add_option("-g", "--format2",   dest="format2",        action="store",                     type="string", help="format of the second file [compulsory] [format: transcript file format]")
  parser.add_option("-t", "--introns",   dest="introns",        action="store_true", default=False,                help="also include introns [format: boolean] [default: false]")
  parser.add_option("-o", "--output",    dest="outputFileName", action="store",    default=None,  type="string", help="output file [format: output file in GFF3 format]")
- parser.add_option("-v", "--verbosity", dest="verbosity",   action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
+ parser.add_option("-v", "--verbosity", dest="verbosity",   action="store",                     type="int",    help="trace level [default: 1] [format: int]")
  (options, args) = parser.parse_args()
 
  computer = CoverageComputer(options.verbosity)
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/CountLoci.py
--- a/SMART/Java/Python/CountLoci.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/CountLoci.py Mon Sep 30 03:19:26 2013 -0400
b
@@ -85,7 +85,7 @@
                                "five_prime_UTR":            "%sfive.gff3"     % (self.outputBase), \
                                "three_prime_UTR":           "%sthree.gff3"    % (self.outputBase), \
                                "mRNA":                      "%smrna.gff3"     % (self.outputBase), \
-                               "ncRNA":                     "%sncRNA.gff3"    % (self.outputBase), \
+                               "ncRNA":                     "%sncRNA.gff3"     % (self.outputBase), \
                                "transposable_element_gene": "%sTE.gff3"       % (self.outputBase), \
                                "vic":                       "%svicinity.gff3" % (self.outputBase)}
         self.tmpFileNames.extend(self.referenceFiles.values())
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/GetDistribution.py
--- a/SMART/Java/Python/GetDistribution.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/GetDistribution.py Mon Sep 30 03:19:26 2013 -0400
[
b'@@ -45,28 +45,33 @@\n class GetDistribution(object):\n \n \tdef __init__(self, verbosity):\n-\t\tself.verbosity     = verbosity\n-\t\tself.sizes         = None\n-\t\tself.twoStrands    = False\n-\t\tself.start         = 1\n-\t\tself.names         = ["nbElements"]\n-\t\tself.average       = False\n-\t\tself.nbValues      = {}\n-\t\tself.height        = 300\n-\t\tself.width         = 600\n-\t\tself.colors        = None\n-\t\tself.gffFileName   = None\n-\t\tself.csvFileName   = None\n-\t\tself.yMin          = None\n-\t\tself.yMax          = None\n-\t\tself.chromosome    = None\n-\t\tself.merge         = False\n-\t\tself.nbTranscripts = None\n+\t\tself.verbosity        = verbosity\n+\t\tself.sizes            = None\n+\t\tself.nbBins           = None\n+\t\tself.sliceSize        = None\n+\t\tself.twoStrands       = False\n+\t\tself.start            = 1\n+\t\tself.names            = ["nbElements"]\n+\t\tself.average          = False\n+\t\tself.nbValues         = {}\n+\t\tself.height           = 300\n+\t\tself.width            = 600\n+\t\tself.dots             = False\n+\t\tself.colors           = None\n+\t\tself.gffFileName      = None\n+\t\tself.csvFileName      = None\n+\t\tself.yMin             = None\n+\t\tself.yMax             = None\n+\t\tself.chromosome       = None\n+\t\tself.merge            = False\n+\t\tself.nbTranscripts    = None\n+\t\tself.factors          = None\n+\t\tself.thicknessCurve   = 1\n+\t\tself.sizePoliceLegend = 1.5\n \n-\tdef setInputFile(self, fileName, format):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tself.parser = chooser.getParser(fileName)\n+\tdef setInputFiles(self, fileNames, format):\n+\t\tself.fileNames = fileNames\n+\t\tself.format    = format\n \n \tdef setReferenceFile(self, fileName):\n \t\tif fileName == None:\n@@ -77,7 +82,7 @@\n \t\tself.maxSize     = max(self.sizes.values())\n \n \tdef setRegion(self, chromosome, start, end):\n-\t\tif chromosome == None:\n+\t\tif chromosome == None or start == None or end == None:\n \t\t\treturn\n \t\tself.maxSize     = options.end\n \t\tself.sizes       = {chromosome: end}\n@@ -90,13 +95,20 @@\n \t\tself.outputFileName = fileName\n \n \tdef setNbBins(self, nbBins):\n-\t\tself.nbBins = nbBins\n+\t\tif nbBins != None:\n+\t\t\tself.nbBins = int(nbBins)\n+\n+\tdef setBinSize(self, binSize):\n+\t\tif binSize != None:\n+\t\t\tself.sliceSize = int(binSize)\n \n \tdef set2Strands(self, twoStrands):\n \t\tself.twoStrands = twoStrands\n \n \tdef setNames(self, names):\n \t\tself.names = names\n+\t\tif len(self.names) == 1 and len(self.fileNames) > 1:\n+\t\t\tself.names = ["file %d" % (i+1) for i in range(len(self.fileNames))]\n \n \tdef setAverage(self, average):\n \t\tself.average = average\n@@ -104,10 +116,16 @@\n \tdef setNormalization(self, normalization):\n \t\tself.normalization = normalization\n \t\n+\tdef setNormalizationFactors(self, factors):\n+\t\tself.factors = dict([name, 1.0] for name in self.names) if factors == None else dict(zip(self.names, factors))\n+\t\n \tdef setImageSize(self, height, width):\n \t\tself.height = height\n \t\tself.width  = width\n \n+\tdef setDots(self, dots):\n+\t\tself.dots = dots\n+\n \tdef setYLimits(self, yMin, yMax):\n \t\tself.yMin = yMin\n \t\tself.yMax = yMax\n@@ -124,15 +142,29 @@\n \tdef mergePlots(self, merge):\n \t\tself.merge = merge\n \n+\tdef setThicknessCurve(self, thickness) :\n+\t\tself.thickness = thickness\n+\n+\tdef setSizePoliceLegend(self, sizePoliceLegend):\n+\t\tself.sizePoliceLegend = sizePoliceLegend\n+\n \tdef _estimateSizes(self):\n-\t\tprogress = UnlimitedProgress(10000, "Reading input for chromosome size estimate", self.verbosity)\n-\t\tself.sizes = {}\n-\t\tfor self.nbTranscripts, transcript in enumerate(self.parser.getIterator()):\n-\t\t\tchromosome = transcript.getChromosome()\n-\t\t\tstart      = transcript.getStart()\n-\t\t\tself.sizes[chromosome] = max(start, self.sizes.get(chromosome, 0))\n-\t\t\tprogress.inc()\n-\t\tprogress.done()\n+\t\tself.sizes         = {}\n+\t\tself.nbTranscripts = {}\n+\t\tfor fileName in self.fileNames:\n+\t\t\tprogress = UnlimitedProgress(10000, "Reading %s for chromosome size estimate" % (fileName), self.verbosity)\n+\t\t\tparserChooser = ParserChooser(self.verbosity)\n+\t\t\tparserChooser.findFormat(self.format)\n+\t\t\tparser = parserChooser.getPa'..b'imum value on the y-axis to plot [format: int]")\n+\tparser.add_option("-x", "--csv",          dest="csv",                 action="store",      default=None,                        help="write a .csv file [format: output file in CSV format] [default: None]")\n+\tparser.add_option("-g", "--gff",          dest="gff",                 action="store",      default=None,                        help="also write GFF3 file [format: output file in GFF format] [default: None]")\n+\tparser.add_option("-H", "--height",       dest="height",              action="store",      default=500,          type="int",    help="height of the graphics [format: int] [default: 300]")\n+\tparser.add_option("-W", "--width",        dest="width",               action="store",      default=800,          type="int",    help="width of the graphics [format: int] [default: 1000]")\n+\tparser.add_option("-t", "--thickness", \t  dest="lineThickness", \t  action="store",      default=1,            type="int",    help="thickness of the lines [format : int] [default : 1]")\n+\tparser.add_option("-d", "--policeLegend", dest="sizePoliceLegend",    action="store",      default=1.5,          type="float",  help="size of the police of the legend  [format : float] [default : 1.5]")\n+\tparser.add_option("-D", "--dots",         dest="dots",                action="store_true", default=False,                       help="plot dots instead of lines  [format : bool] [default : false]")\n+\tparser.add_option("-a", "--average",      dest="average",             action="store_true", default=False,                       help="plot average (instead of sum) [default: false] [format: boolean]")\n+\tparser.add_option("-n", "--names",        dest="names",               action="store",      default="nbElements", type="string", help="name for the tags (separated by commas and no space) [default: None] [format: string]")\n+\tparser.add_option("-l", "--color",        dest="colors",              action="store",      default=None,         type="string", help="color of the lines (separated by commas and no space) [format: string]")\n+\tparser.add_option("-z", "--normalize",    dest="normalize",           action="store_true", default=False,                       help="normalize data (when panels are different) [format: bool] [default: false]")\n+\tparser.add_option("-Z", "--normalizeFac", dest="normalizeFactors",    action="store",      default=None,                        help="normalize data with given factors (when panels are different) [format: string]")\n+\tparser.add_option("-m", "--merge",        dest="mergePlots",          action="store_true", default=False,                       help="merge all plots in one figure [format: bool] [default: false]")\n+\tparser.add_option("-v", "--verbosity",    dest="verbosity",           action="store",      default=1,            type="int",    help="trace level [default: 1] [format: int]")\n \t(options, args) = parser.parse_args()\n \n \tgt = GetDistribution(options.verbosity)\n-\tgt.setInputFile(options.inputFileName, options.format)\n+\tgt.setInputFiles(options.inputFileNames.split(","), options.format)\n \tgt.setOutputFile(options.outputFileName)\n \tgt.setReferenceFile(options.referenceFileName)\n-\tgt.setNbBins(int(options.nbBins))\n+\tgt.setNbBins(options.nbBins)\n+\tgt.setBinSize(options.binSize)\n \tgt.set2Strands(options.bothStrands)\n \tgt.setRegion(options.chromosome, options.start, options.end)\n \tgt.setNormalization(options.normalize)\n@@ -355,8 +415,12 @@\n \tgt.writeGff(options.gff)\n \tgt.setImageSize(options.height, options.width)\n \tgt.setNames(options.names.split(","))\n+\tgt.setThicknessCurve(options.lineThickness)\n+\tgt.setSizePoliceLegend(options.sizePoliceLegend)\n \tgt.setColors(None if options.colors == None else options.colors.split(","))\n+\tgt.setDots(options.dots)\n \tgt.setNormalization(options.normalize)\n+\tgt.setNormalizationFactors(None if options.normalizeFactors == None else [float(factor) for factor in options.normalizeFactors.split(",")])\n \tgt.mergePlots(options.mergePlots)\n \tgt.run()\n \n'
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/GetFlanking.py
--- a/SMART/Java/Python/GetFlanking.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/GetFlanking.py Mon Sep 30 03:19:26 2013 -0400
[
b'@@ -44,190 +44,188 @@\n TAG_REGION   = "_region"\n TAGS_REGION  = {-1: "_upstream", 0: "", 1: "_downstream"}\n TAGS_RREGION = {-1: "upstream", 0: "overlapping", 1: "downstream"}\n-TAGS_SENSE   = {-1: "antisense", 0: "", 1: "collinear"}\n+TAGS_SENSE   = {-1: "antisense", 0: "", 1: "colinear"}\n STRANDSTOSTR = {-1: "(-)", 0: "", 1: "(+)"}\n \n \n-def getOrderKey(transcript, direction, input):\n-\tif direction == 1:\n-\t\tif input == QUERY:\n-\t\t\treturn (transcript.getEnd(), -transcript.getStart())\n-\t\treturn (transcript.getStart(), -transcript.getEnd())\n-\tif input == QUERY:\n-\t\treturn (-transcript.getStart(), transcript.getEnd())\n-\treturn (-transcript.getEnd(), transcript.getStart())\n+def getOrderKey(transcript, direction):\n+    if direction == 1:\n+        return transcript.getEnd()\n+    return - transcript.getStart()\n+\n+def isInGoodRegion(transcriptRef, transcriptQuery, direction):\n+    if direction == 1:\n+        return transcriptQuery.getEnd() > transcriptRef.getEnd()\n+    return transcriptQuery.getStart() < transcriptRef.getStart()\n \n \n class GetFlanking(object):\n \n-\tdef __init__(self, verbosity):\n-\t\tself.verbosity   = verbosity\n-\t\tself.transcripts = dict([id, {}] for id in INPUTS)\n-\t\tself.directions  = []\n-\t\tself.noOverlap   = False\n-\t\tself.colinear    = False\n-\t\tself.antisense   = False\n-\t\tself.distance    = None\n-\t\tself.minDistance = None\n-\t\tself.maxDistance = None\n-\t\tself.tagName     = "flanking"\n+    def __init__(self, verbosity):\n+        self.verbosity   = verbosity\n+        self.transcripts = dict([id, {}] for id in INPUTS)\n+        self.directions  = []\n+        self.noOverlap   = False\n+        self.colinear    = False\n+        self.antisense   = False\n+        self.distance    = None\n+        self.minDistance = None\n+        self.maxDistance = None\n+        self.tagName     = "flanking"\n \n-\tdef setInputFile(self, fileName, format, id):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tparser = chooser.getParser(fileName)\n-\t\tfor transcript in parser.getIterator():\n-\t\t\tchromosome = transcript.getChromosome()\n-\t\t\tif chromosome not in self.transcripts[id]:\n-\t\t\t\tself.transcripts[id][chromosome] = []\n-\t\t\tself.transcripts[id][chromosome].append(transcript)\n+    def setInputFile(self, fileName, format, id):\n+        chooser = ParserChooser(self.verbosity)\n+        chooser.findFormat(format)\n+        parser = chooser.getParser(fileName)\n+        for transcript in parser.getIterator():\n+            chromosome = transcript.getChromosome()\n+            if chromosome not in self.transcripts[id]:\n+                self.transcripts[id][chromosome] = []\n+            self.transcripts[id][chromosome].append(transcript)\n \n-\tdef setOutputFile(self, fileName):\n-\t\tself.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n+    def setOutputFile(self, fileName):\n+        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n+\n+    def addUpstreamDirection(self, upstream):\n+        if upstream:\n+            self.directions.append(-1)\n+\n+    def addDownstreamDirection(self, downstream):\n+        if downstream:\n+            self.directions.append(1)\n \n-\tdef addUpstreamDirection(self, upstream):\n-\t\tif upstream:\n-\t\t\tself.directions.append(-1)\n+    def setColinear(self, colinear):\n+        self.colinear = colinear\n+\n+    def setAntisense(self, antisense):\n+        self.antisense = antisense\n \n-\tdef addDownstreamDirection(self, downstream):\n-\t\tif downstream:\n-\t\t\tself.directions.append(1)\n+    def setNoOverlap(self, noOverlap):\n+        self.noOverlap = noOverlap\n \n-\tdef setColinear(self, colinear):\n-\t\tself.colinear = colinear\n+    def setMinDistance(self, distance):\n+        self.minDistance = distance\n+\n+    def setMaxDistance(self, distance):\n+        self.maxDistance = distance\n \n-\tdef setAntisense(self, antisense):\n-\t\tself.antisense = antisense\n-\n-\tdef setNoOverlap(self, noOverlap):\n-\t\tself.noOverlap = noOverlap\n+    def setNewTagName(self, tagName):\n+        self.tagName = tagName\n \n-\tdef setMinDistance(self, dist'..b'= parser.parse_args()\n+    parser = OptionParser(description = description)\n+    parser.add_option("-i", "--input1",      dest="inputFileName1", action="store",                          type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n+    parser.add_option("-f", "--format1",     dest="format1",        action="store",                          type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+    parser.add_option("-j", "--input2",      dest="inputFileName2", action="store",                          type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n+    parser.add_option("-g", "--format2",     dest="format2",        action="store",                          type="string", help="format of previous file [compulsory] [format: transcript file format]")\n+    parser.add_option("-5", "--upstream",    dest="upstream",       action="store_true", default=False,                     help="output upstream elements [format: boolean] [default: False]")\n+    parser.add_option("-3", "--downstream",  dest="downstream",     action="store_true", default=False,                     help="output downstream elements [format: boolean] [default: False]")\n+    parser.add_option("-c", "--colinear",    dest="colinear",       action="store_true", default=False,                     help="find first colinear element [format: boolean] [default: False]")\n+    parser.add_option("-a", "--antisense",   dest="antisense",      action="store_true", default=False,                     help="find first anti-sense element [format: boolean] [default: False]")\n+    parser.add_option("-e", "--noOverlap",   dest="noOverlap",      action="store_true", default=False,                     help="do not consider elements which are overlapping reference elements [format: boolean] [default: False]")\n+    parser.add_option("-d", "--minDistance", dest="minDistance",    action="store",      default=None,       type="int",    help="minimum distance between 2 elements [format: int]")\n+    parser.add_option("-D", "--maxDistance", dest="maxDistance",    action="store",      default=None,       type="int",    help="maximum distance between 2 elements [format: int]")\n+    parser.add_option("-t", "--tag",         dest="tagName",        action="store",      default="flanking", type="string", help="name of the new tag [format: string] [default: flanking]")\n+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                          type="string", help="output file [format: output file in GFF3 format]")\n+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,          type="int",    help="trace level [format: int]")\n+    (options, args) = parser.parse_args()\n \n-\tgf = GetFlanking(options.verbosity)\n-\tgf.setInputFile(options.inputFileName1, options.format1, QUERY)\n-\tgf.setInputFile(options.inputFileName2, options.format2, REFERENCE)\n-\tgf.setOutputFile(options.outputFileName)\n-\tgf.addUpstreamDirection(options.upstream)\n-\tgf.addDownstreamDirection(options.downstream)\n-\tgf.setColinear(options.colinear)\n-\tgf.setAntisense(options.antisense)\n-\tgf.setNoOverlap(options.noOverlap)\n-\tgf.setMinDistance(options.minDistance)\n-\tgf.setMaxDistance(options.maxDistance)\n-\tgf.setNewTagName(options.tagName)\n-\tgf.run()\n+    gf = GetFlanking(options.verbosity)\n+    gf.setInputFile(options.inputFileName1, options.format1, QUERY)\n+    gf.setInputFile(options.inputFileName2, options.format2, REFERENCE)\n+    gf.setOutputFile(options.outputFileName)\n+    gf.addUpstreamDirection(options.upstream)\n+    gf.addDownstreamDirection(options.downstream)\n+    gf.setColinear(options.colinear)\n+    gf.setAntisense(options.antisense)\n+    gf.setNoOverlap(options.noOverlap)\n+    gf.setMinDistance(options.minDistance)\n+    gf.setMaxDistance(options.maxDistance)\n+    gf.setNewTagName(options.tagName)\n+    gf.run()\n'
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/GetReadSizes.py
--- a/SMART/Java/Python/GetReadSizes.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/GetReadSizes.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -52,6 +52,7 @@
  self.sizes      = {}
  self.factors    = {}
  self.regions    = None
+ self.percentage = False
  self.tmpDatName = None
  self.tmpRName   = None
  self.width      = 800
@@ -66,6 +67,8 @@
  chooser.findFormat(format)
  for cpt, fileName in enumerate(fileNames):
  self.parsers[self.names[cpt]] = chooser.getParser(fileName)
+ if not self.factors:
+ self.factors = dict([name, 1.0] for name in self.names)
 
  def setOutputFileName(self, fileName):
  self.outputFileName = fileName
@@ -82,12 +85,17 @@
  self.colors = colors
 
  def setFactors(self, factors):
- self.factors = dict(zip(self.names, factors))
+ if factors:
+ self.factors = dict(zip(self.names, factors))
 
  def setRegionsFile(self, fileName):
  if fileName != None:
  self._loadRegions(fileName)
 
+ def setPercentage(self, percentage):
+ self.percentage = percentage
+ self.xLab = "% reads"
+
  def setImageSize(self, width, height):
  if width != None:
  self.width = width
@@ -165,6 +173,14 @@
  def _checkQuorum(self, region):
  return (max([sum(self.sizes[region][name].values()) for name in self.sizes[region]]) > 0)
 
+ def _computePercentage(self):
+ for region in self.sizes:
+ for name in self.sizes[region]:
+ if self.sizes[region][name]:
+ sumData = float(sum(self.sizes[region][name].values()))
+ for size in self.sizes[region][name]:
+ self.sizes[region][name][size] = self.sizes[region][name][size] / sumData * 100
+
  def _writeData(self, region):
  self.tmpDatName = "tmpFile%d.dat" % (self.number)
  handle          = open(self.tmpDatName, "w")
@@ -223,6 +239,8 @@
  self.log.info("START Get Read Sizes")
  for name in self.names:
  self._parse(name)
+ if self.percentage:
+ self._computePercentage()
  self._plot()
  self._cleanFiles()
  self.log.info("END Get Read Sizes")
@@ -243,6 +261,7 @@
  parser.add_option("-c", "--colors",    dest="colors",          action="store",      default=None,      type="string", help="colors of the bars, separated by commas  [format: string]")
  parser.add_option("-a", "--factors",   dest="factors",         action="store",      default=None,      type="string", help="normalization factors, separated by commas  [format: string]")
  parser.add_option("-r", "--regions",   dest="regionsFileName", action="store",      default=None,      type="string", help="regions to plot [format: transcript file in GFF format]")
+ parser.add_option("-p", "--percent",   dest="percentage",      action="store_true", default=False,                    help="compute percentage instead [format: boolean] [default: false]")
  parser.add_option("-z", "--width",     dest="width",           action="store",      default=800,       type="int",    help="width of the image [format: int] [default: 800]")
  parser.add_option("-Z", "--height",    dest="height",          action="store",      default=300,       type="int",    help="height of the image [format: int] [default: 300]")
  parser.add_option("-A", "--arial",     dest="arial",           action="store_true", default=False,                    help="use Arial font [format: boolean] [default: false]")
@@ -252,11 +271,12 @@
  iGetReadSizes.setNames(options.names.split(","))
  iGetReadSizes.setInputFiles(options.inputFileNames.split(","), options.format)
  iGetReadSizes.setOutputFileName(options.outputFileName)
- iGetReadSizes.setLabs(options.xLab, options.yLab)
  iGetReadSizes.setSizes(options.minSize, options.maxSize)
  iGetReadSizes.setColors(None if options.colors == None else options.colors.split(","))
  iGetReadSizes.setFactors(None if options.factors == None else map(float, options.factors.split(",")))
  iGetReadSizes.setRegionsFile(options.regionsFileName)
+ iGetReadSizes.setPercentage(options.percentage)
  iGetReadSizes.setImageSize(options.width, options.height)
+ iGetReadSizes.setLabs(options.xLab, options.yLab)
  iGetReadSizes.setArial(options.arial)
  iGetReadSizes.run()
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/cleanGff.py
--- a/SMART/Java/Python/cleanGff.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/cleanGff.py Mon Sep 30 03:19:26 2013 -0400
[
b'@@ -43,158 +43,153 @@\n count = {}\n \n class ParsedLine(object):\n-\tdef __init__(self, line, cpt):\n-\t\tself.line = line\n-\t\tself.cpt  = cpt\n-\t\tself.parse()\n+    def __init__(self, line, cpt):\n+        self.line = line\n+        self.cpt  = cpt\n+        self.parse()\n \n-\tdef parse(self):\n-\t\tself.line = self.line.strip()\n-\t\tself.splittedLine = self.line.split(None, 8)\n-\t\tif len(self.splittedLine) < 9:\n-\t\t\traise Exception("Line \'%s\' has less than 9 fields.  Exiting..." % (self.line))\n-\t\tself.type = self.splittedLine[2]\n-\t\tself.parseOptions()\n-\t\tself.getId()\n-\t\tself.getParents()\n+    def parse(self):\n+        self.line = self.line.strip()\n+        self.splittedLine = self.line.split(None, 8)\n+        if len(self.splittedLine) < 9:\n+            raise Exception("Line \'%s\' has less than 9 fields.  Exiting..." % (self.line))\n+        self.type = self.splittedLine[2]\n+        self.parseOptions()\n+        self.getId()\n+        self.getParents()\n \n-\tdef parseOptions(self):\n-\t\tself.parsedOptions = {}\n-\t\tfor option in self.splittedLine[8].split(";"):\n-\t\t\toption = option.strip()\n-\t\t\tif option == "": continue\n-\t\t\tposSpace = option.find(" ")\n-\t\t\tposEqual = option.find("=")\n-\t\t\tif posEqual != -1 and (posEqual < posSpace or posSpace == -1):\n-\t\t\t\tkey, value = option.split("=", 1)\n-\t\t\telif posSpace != -1:\n-\t\t\t\tkey, value = option.split(None, 1)\n-\t\t\telse:\n-\t\t\t\tkey   = "ID"\n-\t\t\t\tvalue = option\n-\t\t\tself.parsedOptions[key.strip()] = value.strip(" \\"")\n+    def parseOptions(self):\n+        self.parsedOptions = {}\n+        for option in self.splittedLine[8].split(";"):\n+            option = option.strip()\n+            if option == "": continue\n+            posSpace = option.find(" ")\n+            posEqual = option.find("=")\n+            if posEqual != -1 and (posEqual < posSpace or posSpace == -1):\n+                key, value = option.split("=", 1)\n+            elif posSpace != -1:\n+                key, value = option.split(None, 1)\n+            else:\n+                key   = "ID"\n+                value = option\n+            self.parsedOptions[key.strip()] = value.strip(" \\"")\n \n-\tdef getId(self):\n-\t\tfor key in self.parsedOptions:\n-\t\t\tif key.lower() == "id":\n-\t\t\t\tself.id = self.parsedOptions[key]\n-\t\t\t\treturn\n-\t\tif "Parent" in self.parsedOptions:\n-\t\t\tparent = self.parsedOptions["Parent"].split(",")[0]\n-\t\t\tif parent not in count:\n-\t\t\t\tcount[parent] = {}\n-\t\t\tif self.type not in count[parent]:\n-\t\t\t\tcount[parent][self.type] = 0\n-\t\t\tcount[parent][self.type] += 1\n-\t\t\tself.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])\n-\t\telse:\n-\t\t\tself.id = "smart%d" % (self.cpt)\n-\t\tself.parsedOptions["ID"] = self.id\n+    def getId(self):\n+        for key in self.parsedOptions:\n+            if key.lower() == "id":\n+                self.id = self.parsedOptions[key]\n+                return\n+        if "Parent" in self.parsedOptions:\n+            parent = self.parsedOptions["Parent"].split(",")[0]\n+            if parent not in count:\n+                count[parent] = {}\n+            if self.type not in count[parent]:\n+                count[parent][self.type] = 0\n+            count[parent][self.type] += 1\n+            self.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])\n+        else:\n+            self.id = "smart%d" % (self.cpt)\n+        self.parsedOptions["ID"] = self.id\n \n-\tdef getParents(self):\n-\t\tfor key in self.parsedOptions:\n-\t\t\tif key.lower() in ("parent", "derives_from"):\n-\t\t\t\tself.parents = self.parsedOptions[key].split(",")\n-\t\t\t\treturn\n-\t\tself.parents = None\n+    def getParents(self):\n+        for key in self.parsedOptions:\n+            if key.lower() in ("parent", "derives_from"):\n+                self.parents = self.parsedOptions[key].split(",")\n+                return\n+        self.parents = None\n \n-\tdef removeParent(self):\n-\t\tfor key in self.parsedOptions.keys():\n-\t\t\tif key.lower() in ("parent", "derives_from"):\n-\t\t\t\tdel self.parsedOptions[key]\n+    def removeParent(self):\n+        for key in self.parsedOptions.keys():\n+            if key.'..b'= False\n+            if line.parents:\n+                for parent in line.parents:\n+                    if parent in self.lines:\n+                        parentFound = True\n+                        if parent in self.children:\n+                            self.children[parent].append(line)\n+                        else:\n+                            self.children[parent] = [line]\n+            if not parentFound:\n+                line.removeParent()\n+                self.parents.append(line)\n+            progress.inc()\n+        progress.done()\n \n-\tdef write(self):\n-\t\tprogress = Progress(len(self.parents), "Writing output file", self.verbosity)\n-\t\tfor line in self.parents:\n-\t\t\tself.writeLine(line)\n-\t\t\tprogress.inc()\n-\t\tself.outputFile.close()\n-\t\tprogress.done()\n+    def write(self):\n+        progress = Progress(len(self.parents), "Writing output file", self.verbosity)\n+        for line in self.parents:\n+            self.writeLine(line)\n+            progress.inc()\n+        self.outputFile.close()\n+        progress.done()\n \n-\tdef writeLine(self, line):\n-\t\tself.outputFile.write(line.export())\n-\t\tif line.id in self.children:\n-\t\t\tfor child in self.children[line.id]:\n-\t\t\t\tself.writeLine(child)\n+    def writeLine(self, line):\n+        self.outputFile.write(line.export())\n+        if line.id in self.children:\n+            for child in self.children[line.id]:\n+                self.writeLine(child)\n \n-\tdef run(self):\n-\t\tself.parse()\n-\t\tself.sort()\n-\t\tself.write()\n+    def run(self):\n+        self.parse()\n+        self.sort()\n+        self.write()\n \n \n if __name__ == "__main__":\n-\t\n-\t# parse command line\n-\tdescription = "Clean GFF v1.0.3: Clean a GFF file (as given by NCBI) and outputs a GFF3 file. [Category: Other]"\n+    \n+    # parse command line\n+    description = "Clean GFF v1.0.3: Clean a GFF file (as given by NCBI) and outputs a GFF3 file. [Category: Other]"\n \n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--input",     dest="inputFileName",  action="store",                      type="string", help="input file name [compulsory] [format: file in GFF format]")\n-\tparser.add_option("-o", "--output",    dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-\tparser.add_option("-t", "--types",     dest="types",          action="store", default="mRNA,exon", type="string", help="list of comma-separated types that you want to keep [format: string] [default: mRNA,exon]")\n-\tparser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,           type="int",    help="trace level [format: int]")\n-\t(options, args) = parser.parse_args()\n+    parser = OptionParser(description = description)\n+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                      type="string", help="input file name [compulsory] [format: file in GFF format]")\n+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n+    parser.add_option("-t", "--types",     dest="types",          action="store", default="mRNA,exon", type="string", help="list of comma-separated types that you want to keep [format: string] [default: mRNA,exon]")\n+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,           type="int",    help="trace level [format: int]")\n+    (options, args) = parser.parse_args()\n \n-\tcleanGff = CleanGff(options.verbosity)\n-\tcleanGff.setInputFileName(options.inputFileName)\n-\tcleanGff.setOutputFileName(options.outputFileName)\n-\tcleanGff.setAcceptedTypes(options.types.split(","))\n-\tcleanGff.run()\n+    cleanGff = CleanGff(options.verbosity)\n+    cleanGff.setInputFileName(options.inputFileName)\n+    cleanGff.setOutputFileName(options.outputFileName)\n+    cleanGff.setAcceptedTypes(options.types.split(","))\n+    cleanGff.run()\n \n'
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/cleaning/GffCleaner.py
--- a/SMART/Java/Python/cleaning/GffCleaner.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/cleaning/GffCleaner.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -127,11 +127,6 @@
  if line[0] == ">": break
  parsedLine = ParsedLine(line, cpt)
  if self.acceptedTypes == None or parsedLine.type in self.acceptedTypes:
- if parsedLine.id in self.lines:
- cpt = 1
- while "%s-%d" % (parsedLine.id, cpt) in self.lines:
- cpt += 1
- parsedLine.id = "%s-%d" % (parsedLine.id, cpt)
  self.lines[parsedLine.id] = parsedLine
  progress.inc()
  progress.done()
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/clusterize.py
--- a/SMART/Java/Python/clusterize.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/clusterize.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -104,8 +104,7 @@
  else:
  progress = Progress(self.nbElementsPerChromosome[chromosome], "Checking chromosome %s" % (chromosome), self.verbosity)
  parser   = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)
- transcripts     = []
- self.nbElements = 0
+ transcripts = []
  for newTranscript in parser.getIterator():
  newTranscripts = []
  if newTranscript.__class__.__name__ == "Mapping":
@@ -119,7 +118,6 @@
  newTranscripts.append(oldTranscript)
  newTranscripts.append(newTranscript)
  transcripts = newTranscripts
- self.nbElements += 1
  progress.inc()
  for transcript in transcripts:
  self._write(transcript)
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/getRandomRegions.py
--- a/SMART/Java/Python/getRandomRegions.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/getRandomRegions.py Mon Sep 30 03:19:26 2013 -0400
[
b'@@ -44,224 +44,228 @@\n \n class RandomRegionsGenerator(object):\n \n-    def __init__(self, verbosity):\n-        self.verbosity      = verbosity\n-        self.strands        = False\n-        self.distribution   = "uniform"\n-        self.transcripts    = None\n-        self.sequenceParser = None\n-        random.seed()\n+\tdef __init__(self, verbosity):\n+\t\tself.verbosity      = verbosity\n+\t\tself.strands        = False\n+\t\tself.distribution   = "uniform"\n+\t\tself.transcripts    = None\n+\t\tself.sequenceParser = None\n+\t\trandom.seed()\n \n \n-    def setInput(self, fileName):\n-        self.sequenceParser = FastaParser(fileName, self.verbosity)\n+\tdef setInput(self, fileName):\n+\t\tself.sequenceParser = FastaParser(fileName, self.verbosity)\n \n \n-    def setGenomeSize(self, size):\n-        self.genomeSize = size\n+\tdef setGenomeSize(self, size):\n+\t\tself.genomeSize = size\n \n \n-    def setChromosomeName(self, name):\n-        self.chromosomeName = name\n+\tdef setChromosomeName(self, name):\n+\t\tself.chromosomeName = name\n \n \n-    def setAnnotation(self, fileName, format):\n-        parser           = TranscriptContainer(fileName, format, self.verbosity)\n-        self.transcripts = []\n-        for transcript in parser.getIterator():\n-            self.transcripts.append(transcript)\n-        self.setNumber(len(self.transcripts))\n-        self.setSize(0)\n+\tdef setAnnotation(self, fileName, format):\n+\t\tparser           = TranscriptContainer(fileName, format, self.verbosity)\n+\t\tself.transcripts = []\n+\t\tfor transcript in parser.getIterator():\n+\t\t\tself.transcripts.append(transcript)\n+\t\tself.setNumber(len(self.transcripts))\n+\t\tself.setSize(0)\n \n \n-    def setOutputFile(self, fileName):\n-        self.outputFileName = fileName\n+\tdef setOutputFile(self, fileName):\n+\t\tself.outputFileName = fileName\n \n \n-    def setSize(self, size):\n-        self.minSize = size\n-        self.maxSize = size\n+\tdef setSize(self, size):\n+\t\tself.minSize = size\n+\t\tself.maxSize = size\n \n \n-    def setMinSize(self, size):\n-        self.minSize = size\n+\tdef setMinSize(self, size):\n+\t\tself.minSize = size\n \n \n-    def setMaxSize(self, size):\n-        self.maxSize = size\n+\tdef setMaxSize(self, size):\n+\t\tself.maxSize = size\n \n \n-    def setNumber(self, number):\n-        self.number = number\n+\tdef setNumber(self, number):\n+\t\tself.number = number\n \n \n-    def setStrands(self, strands):\n-        self.strands = strands\n+\tdef setStrands(self, strands):\n+\t\tself.strands = strands\n \n \n-    def setMaxDistribution(self, maxElements):\n-        if maxElements == None:\n-            return\n-        self.maxElements = maxElements\n-        self.distribution = "gaussian"\n+\tdef setMaxDistribution(self, maxElements):\n+\t\tif maxElements == None:\n+\t\t\treturn\n+\t\tself.maxElements = maxElements\n+\t\tself.distribution = "gaussian"\n \n \n-    def setDeviationDistribution(self, deviation):\n-        if deviation == None:\n-            return\n-        self.deviation = deviation\n-        self.distribution = "gaussian"\n+\tdef setDeviationDistribution(self, deviation):\n+\t\tif deviation == None:\n+\t\t\treturn\n+\t\tself.deviation = deviation\n+\t\tself.distribution = "gaussian"\n \n \n-    def getSizes(self):\n-        if self.sequenceParser == None:\n-            self.chromosomes    = [self.chromosomeName]\n-            self.sizes          = {self.chromosomeName: self.genomeSize}\n-            self.cumulatedSize  = self.genomeSize\n-            self.cumulatedSizes = {self.chromosomeName: self.genomeSize}\n-            return\n-        self.chromosomes    = self.sequenceParser.getRegions()\n-        self.sizes          = {}\n-        self.cumulatedSize  = 0\n-        self.cumulatedSizes = {}\n-        for chromosome in self.chromosomes:\n-            self.sizes[chromosome]          = self.sequenceParser.getSizeOfRegion(chromosome)\n-            self.cumulatedSize             += self.sizes[chromosome]\n-            self.cumulatedSizes[chromosome] = self.cumulatedSize\n+\tdef getSizes(self):\n+\t\tif self.sequenceParser == None:\n+\t\t\tself.chromosomes    = [self.chromosomeNa'..b'-referenceSize", dest="referenceSize",  action="store",      default=None,  type="int",    help="size of the chromosome (when no reference is given) [format: int]")\n+\tparser.add_option("-c", "--chromosome",    dest="chromosome",     action="store",      default=None,  type="string", help="name of the chromosome (when no reference is given) [format: string]")\n+\tparser.add_option("-o", "--output",        dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in FASTA format]")\n+\tparser.add_option("-i", "--input",         dest="inputFileName",  action="store",      default=None,  type="string", help="optional file containing regions to shuffle [format: file in transcript format given by -f]")\n+\tparser.add_option("-f", "--format",        dest="format",         action="store",      default=None,  type="string", help="format of the previous file [format: transcript file format]")\n+\tparser.add_option("-s", "--size",          dest="size",           action="store",      default=None,  type="int",    help="size of the regions (if no region set is provided) [format: int]")\n+\tparser.add_option("-z", "--minSize",       dest="minSize",        action="store",      default=None,  type="int",    help="minimum size of the regions (if no region set nor a fixed size are provided) [format: int]")\n+\tparser.add_option("-Z", "--maxSize",       dest="maxSize",        action="store",      default=None,  type="int",    help="maximum size of the regions (if no region set nor a fixed size are provided) [format: int]")\n+\tparser.add_option("-n", "--number",        dest="number",         action="store",      default=None,  type="int",    help="number of regions (if no region set is provided) [format: int]")\n+\tparser.add_option("-t", "--strands",       dest="strands",        action="store_true", default=False,                help="use both strands (if no region set is provided) [format: boolean]")\n+\tparser.add_option("-m", "--max",           dest="max",            action="store",      default=None,  type="int",    help="max. # reads in a cluster (for Gaussian dist.) [format: int]")\n+\tparser.add_option("-d", "--deviation",     dest="deviation",      action="store",      default=None,  type="int",    help="deviation around the center of the cluster (for Gaussian dist.) [format: int]")\n+\tparser.add_option("-v", "--verbosity",     dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")\n+\t(options, args) = parser.parse_args()\n \n-    rrg = RandomRegionsGenerator(options.verbosity)\n-    if options.reference == None:\n-        rrg.setGenomeSize(options.referenceSize)\n-        rrg.setChromosomeName(options.chromosome)\n-    else:\n-        rrg.setInput(options.reference)\n-    rrg.setOutputFile(options.outputFileName)\n-    if options.inputFileName == None:\n-        if options.size != None:\n-            rrg.setSize(options.size)\n-        else:\n-            rrg.setMinSize(options.minSize)\n-            rrg.setMaxSize(options.maxSize)\n-        rrg.setNumber(options.number)\n-        rrg.setStrands(options.strands)\n-    else:\n-        rrg.setAnnotation(options.inputFileName, options.format)\n-    rrg.setMaxDistribution(options.max)\n-    rrg.setDeviationDistribution(options.deviation)\n-    rrg.run()\n+\trrg = RandomRegionsGenerator(options.verbosity)\n+\tif options.reference == None:\n+\t\trrg.setGenomeSize(options.referenceSize)\n+\t\trrg.setChromosomeName(options.chromosome)\n+\telse:\n+\t\trrg.setInput(options.reference)\n+\trrg.setOutputFile(options.outputFileName)\n+\tif options.inputFileName == None:\n+\t\tif options.size != None:\n+\t\t\trrg.setSize(options.size)\n+\t\telse:\n+\t\t\trrg.setMinSize(options.minSize)\n+\t\t\trrg.setMaxSize(options.maxSize)\n+\t\trrg.setNumber(options.number)\n+\t\trrg.setStrands(options.strands)\n+\telse:\n+\t\trrg.setAnnotation(options.inputFileName, options.format)\n+\trrg.setMaxDistribution(options.max)\n+\trrg.setDeviationDistribution(options.deviation)\n+\trrg.run()\n \n'
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/mySql/MySqlConnection.py
--- a/SMART/Java/Python/mySql/MySqlConnection.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/mySql/MySqlConnection.py Mon Sep 30 03:19:26 2013 -0400
b
@@ -88,18 +88,6 @@
             self.connection.commit()
 
 
-    def executeManyFormattedQueries(self, command, lines, insertion = False):
-        cursor = self.connection.cursor()
-        query = MySqlQuery(cursor, self.verbosity)
-        for line in lines:
-            result = query.executeFormat(command, line)
-        self.connection.commit()
-        if insertion:
-            return result
-        else:
-            return query
-
-
     def executeManyQueriesIterator(self, table):
         cursor = self.connection.cursor()
         query = MySqlQuery(cursor, self.verbosity)
@@ -113,25 +101,9 @@
             self.connection.commit()
 
 
-    def executeManyFormattedQueriesIterator(self, table):
+    def executeFormattedQuery(self, command, *parameters):
         cursor = self.connection.cursor()
         query = MySqlQuery(cursor, self.verbosity)
-        try:
-            for command, values in table.getIterator():
-                query.executeFormat(command, values)
-            self.connection.commit()
-        except:
-            for command, values in table.getIterator():
-                query.execute(command, values)
-            self.connection.commit()
-
-
-    def executeFormattedQuery(self, command, parameters, insertion = False):
-        cursor = self.connection.cursor()
-        query = MySqlQuery(cursor, self.verbosity)
-        result = query.executeFormat(command, parameters)
+        query.executeFormat(command, parameters)
         self.connection.commit()
-        if insertion:
-            return result
-        else:
-            return query
\ No newline at end of file
+        return query
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/mySql/MySqlQuery.py
--- a/SMART/Java/Python/mySql/MySqlQuery.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/mySql/MySqlQuery.py Mon Sep 30 03:19:26 2013 -0400
b
@@ -91,4 +91,4 @@
 
     def show(self):
         for line in self.getIterator():
-            print line
\ No newline at end of file
+            print line
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/mySql/MySqlTable.py
--- a/SMART/Java/Python/mySql/MySqlTable.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/mySql/MySqlTable.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -116,18 +116,6 @@
         self.mySqlConnection.executeManyQueries(commands)
         
         
-    def insertManyFormatted(self, lines):
-        """
-        Insert many lines
-        @param lines: the list of values
-        @type  lines: list of lists
-        """
-        replacer = ["?"] * len(self.variables)
-        command = "INSERT INTO '%s' (%s) VALUES (%s)" % (self.name, ", ".join(self.variables), ", ".join(replacer))
-        values  = [[line[variable] for variable in self.variables] for line in lines]
-        self.mySqlConnection.executeManyFormattedQueries(command, values)
-        
-        
     def rename(self, name):
         """
         Rename the table
@@ -229,10 +217,6 @@
         @type  values: dict
         @return:       the id of the added row
         """
-        sqlValues = [values[variable] for variable in self.variables]
-        command = "INSERT INTO '%s' (%%s) VALUES (%s)" % (self.name, ", ".join(self.variables))
-        id = self.mySqlConnection.executeFormattedQueryQuery(command, sqlValues, True)
-        return id
         sqlValues = []
         for variable in self.variables:
             sqlValues.append(self.formatSql(values[variable], self.types[variable], self.sizes[variable]))
@@ -347,3 +331,4 @@
         query = self.mySqlConnection.executeQuery("SELECT * FROM '%s'" % (self.name))
         print query.getLines()
 
+
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/mySql/MySqlTranscriptTable.py
--- a/SMART/Java/Python/mySql/MySqlTranscriptTable.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/mySql/MySqlTranscriptTable.py Mon Sep 30 03:19:26 2013 -0400
b
@@ -146,4 +146,4 @@
             
 
     def setDefaultTagValue(self, name, value):
-        super(MySqlTranscriptTable, self).setDefaultTagValue(Transcript.getSqlVariables().index("tags")+1, name, value)
\ No newline at end of file
+        super(MySqlTranscriptTable, self).setDefaultTagValue(Transcript.getSqlVariables().index("tags")+1, name, value)
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/ncList/NCList.py
--- a/SMART/Java/Python/ncList/NCList.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/ncList/NCList.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -108,10 +108,12 @@
  self._offsets[fileType] = offset
 
  def _setFileNames(self, fileName):
+ print "Got file name", fileName
  if self._chromosome != None and fileName != None:
  coreName = os.path.splitext(fileName)[0]
  if "SMARTTMPPATH" in os.environ:
  coreName = os.path.join(os.environ["SMARTTMPPATH"], coreName)
+ print "Used core name", coreName
  self._hFileName = "%s_H.bin" % (coreName)
  self._lFileName = "%s_L.bin" % (coreName)
  self._tFileName = "%s_T.bin" % (coreName)
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/plot.py
--- a/SMART/Java/Python/plot.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/plot.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -134,11 +134,7 @@
         values   = dict([i * step + minValue, 0] for i in range(0, self.nbBars))
         top      = (self.nbBars - 1) * step + minValue
         for key, value in line.iteritems():
-            divisor = float(maxValue - minValue) * self.nbBars
-            tmpMinValue = top
-            if divisor != 0:
-                tmpMinValue = min(top, int(math.floor((key - minValue) / divisor)))
-            newKey =  tmpMinValue * step + minValue
+            newKey = min(top, int(math.floor((key - minValue) / float(maxValue - minValue) * self.nbBars)) * step + minValue)
             values[newKey] += value
         return values
 
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/plotCoverage.py
--- a/SMART/Java/Python/plotCoverage.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/plotCoverage.py Mon Sep 30 03:19:26 2013 -0400
[
b'@@ -32,7 +32,7 @@\n from optparse import OptionParser\n from SMART.Java.Python.structure.Interval import Interval\n from SMART.Java.Python.structure.Transcript import Transcript\n-from commons.core.parsing.ParserChooser import ParserChooser\n+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n from SMART.Java.Python.misc.RPlotter import RPlotter\n from SMART.Java.Python.misc.Progress import Progress\n from commons.core.parsing.FastaParser import FastaParser\n@@ -42,440 +42,430 @@\n colorLine = "black"\n \n def parseTargetField(field):\n-\tstrand             = "+"\n-\tsplittedFieldSpace = field.split()\n-\tsplittedFieldPlus  = field.split("+", 4)\n-\tif len(splittedFieldSpace) == 3:\n-\t\tid, start, end = splittedFieldSpace\n-\telif len(splittedFieldSpace) == 4:\n-\t\tid, start, end, strand = splittedFieldSpace\n-\telif len(splittedFieldPlus) == 3:\n-\t\tid, start, end = splittedFieldPlus\n-\telif len(splittedFieldPlus) == 4:\n-\t\tid, start, end, strand = splittedFieldPlus\n-\telse:\n-\t\traise Exception("Cannot parse Target field \'%s\'." % (field))\n-\treturn (id, int(start), int(end), strand)\n+    strand             = "+"\n+    splittedFieldSpace = field.split()\n+    splittedFieldPlus  = field.split("+", 4)\n+    if len(splittedFieldSpace) == 3:\n+        id, start, end = splittedFieldSpace\n+    elif len(splittedFieldSpace) == 4:\n+        id, start, end, strand = splittedFieldSpace\n+    elif len(splittedFieldPlus) == 3:\n+        id, start, end = splittedFieldPlus\n+    elif len(splittedFieldPlus) == 4:\n+        id, start, end, strand = splittedFieldPlus\n+    else:\n+        raise Exception("Cannot parse Target field \'%s\'." % (field))\n+    return (id, int(start), int(end), strand)\n \n \n class SimpleTranscript(object):\n-\tdef __init__(self, transcript1, transcript2, color = None):\n-\t\tself.start  = max(0, transcript1.getStart() - transcript2.getStart())\n-\t\tself.end    = min(transcript2.getEnd() - transcript2.getStart(), transcript1.getEnd() - transcript2.getStart())\n-\t\tself.strand = transcript1.getDirection() * transcript2.getDirection()\n-\t\tself.exons  = []\n-\t\tfor exon in transcript1.getExons():\n-\t\t\tif exon.getEnd() >= transcript2.getStart() and exon.getStart() <= transcript2.getEnd():\n-\t\t\t\tstart = max(0, exon.getStart() - transcript2.getStart())\n-\t\t\t\tend   = min(transcript2.getEnd() - transcript2.getStart(), exon.getEnd() - transcript2.getStart())\n-\t\t\t\tself.addExon(start, end, self.strand, color)\n+    def __init__(self, transcript1, transcript2, color = None):\n+        self.start  = max(0, transcript1.getStart() - transcript2.getStart())\n+        self.end    = min(transcript2.getEnd() - transcript2.getStart(), transcript1.getEnd() - transcript2.getStart())\n+        self.strand = transcript1.getDirection() * transcript2.getDirection()\n+        self.exons  = []\n+        for exon in transcript1.getExons():\n+            if exon.getEnd() >= transcript2.getStart() and exon.getStart() <= transcript2.getEnd():\n+                start = max(0, exon.getStart() - transcript2.getStart())\n+                end   = min(transcript2.getEnd() - transcript2.getStart(), exon.getEnd() - transcript2.getStart())\n+                self.addExon(start, end, self.strand, color)\n \n-\tdef addExon(self, start, end, strand, color):\n-\t\texon = SimpleExon(start, end, strand, color)\n-\t\tself.exons.append(exon)\n+    def addExon(self, start, end, strand, color):\n+        exon = SimpleExon(start, end, strand, color)\n+        self.exons.append(exon)\n \n-\tdef getRScript(self, yOffset, height):\n-\t\trString     = ""\n-\t\tpreviousEnd = None\n-\t\tfor exon in sorted(self.exons, key=lambda exon: exon.start):\n-\t\t\tif previousEnd != None:\n-\t\t\t\trString += "segments(%.1f, %.1f, %.1f, %.1f, col = \\"%s\\")\\n" % (previousEnd, yOffset + height / 4.0, exon.start, yOffset + height / 4.0, colorLine)\n-\t\t\trString    += exon.getRScript(yOffset, height)\n-\t\t\tpreviousEnd = exon.end\n-\t\treturn rString\n+    def getRScript(self, yOffset, height):\n+        rString     = ""\n+        previousEnd = None\n+        for exon in sorte'..b'e format]")\n+    parser.add_option("-q", "--sequence",     dest="inputSequence",  action="store",      default=None,    type="string", help="input sequence file [format: file in FASTA format] [default: None]")\n+    parser.add_option("-o", "--output",       dest="outputFileName", action="store",                       type="string", help="output file [compulsory] [format: output file in PNG format]")\n+    parser.add_option("-w", "--width",        dest="width",          action="store",      default=1500,    type="int",    help="width of the plots (in px) [format: int] [default: 1500]")\n+    parser.add_option("-e", "--height",       dest="height",         action="store",      default=1000,    type="int",    help="height of the plots (in px) [format: int] [default: 1000]")\n+    parser.add_option("-t", "--title",        dest="title",          action="store",      default="",      type="string", help="title of the plots [format: string]")\n+    parser.add_option("-x", "--xlab",         dest="xLabel",         action="store",      default="",      type="string", help="label on the x-axis [format: string]")\n+    parser.add_option("-y", "--ylab",         dest="yLabel",         action="store",      default="",      type="string", help="label on the y-axis [format: string]")\n+    parser.add_option("-p", "--plusColor",    dest="plusColor",      action="store",      default="red",   type="string", help="color for the elements on the plus strand [format: string] [default: red]")\n+    parser.add_option("-m", "--minusColor",   dest="minusColor",     action="store",      default="blue",  type="string", help="color for the elements on the minus strand [format: string] [default: blue]")\n+    parser.add_option("-s", "--sumColor",     dest="sumColor",       action="store",      default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]")\n+    parser.add_option("-l", "--lineColor",    dest="lineColor",      action="store",      default="black", type="string", help="color for the lines [format: string] [default: black]")\n+    parser.add_option("-1", "--merge",        dest="merge",          action="store_true", default=False,                  help="merge the 2 plots in 1 [format: boolean] [default: false]")\n+    parser.add_option("-D", "--directory",    dest="working_Dir",    action="store",      default=os.getcwd(), type="string", help="the directory to store the results [format: directory]")\n+    parser.add_option("-v", "--verbosity",    dest="verbosity",      action="store",      default=1,       type="int",    help="trace level [format: int]")\n+    (options, args) = parser.parse_args()\n \n-\tcolors[1]  = options.plusColor\n-\tcolors[-1] = options.minusColor\n-\tcolors[0]  = options.sumColor\n-\tcolorLine  = options.lineColor\n+    colors[1]  = options.plusColor\n+    colors[-1] = options.minusColor\n+    colors[0]  = options.sumColor\n+    colorLine  = options.lineColor\n \n-\tpp = PlotParser(options.verbosity)\n-\tpp.addInput(0, options.inputFileName1, options.inputFormat1)\n-\tpp.addInput(1, options.inputFileName2, options.inputFormat2)\n-\tpp.addSequence(options.inputSequence)\n-\tpp.setOutput(options.outputFileName if os.path.isabs(options.outputFileName) else os.path.join(options.working_Dir, options.outputFileName))\n-\tpp.setPlotSize(options.width, options.height)\n-\tpp.setLabels(options.xLabel, options.yLabel)\n-\tpp.setTitle(options.title)\n-\tpp.setMerge(options.merge)\n-\tpp.start()\n+    pp = PlotParser(options.verbosity)\n+    pp.addInput(0, options.inputFileName1, options.inputFormat1)\n+    pp.addInput(1, options.inputFileName2, options.inputFormat2)\n+    pp.addSequence(options.inputSequence)\n+    pp.setOutput(options.outputFileName if os.path.isabs(options.outputFileName) else os.path.join(options.working_Dirpath, options.outputFileName))\n+    pp.setPlotSize(options.width, options.height)\n+    pp.setLabels(options.xLabel, options.yLabel)\n+    pp.setTitle(options.title)\n+    pp.setMerge(options.merge)\n+    pp.start()\n \n+\n'
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/structure/Interval.py
--- a/SMART/Java/Python/structure/Interval.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/structure/Interval.py Mon Sep 30 03:19:26 2013 -0400
b
@@ -139,7 +139,7 @@
         if not chromosome:
             self.seqname = None
         else:
-            self.seqname = chromosome.replace(".", "_").replace("|", "_")
+            self.seqname = chromosome.replace("|", "_")
 
 
     def setStart(self, start):
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/Java/Python/structure/Transcript.py
--- a/SMART/Java/Python/structure/Transcript.py Wed Sep 18 08:51:22 2013 -0400
+++ b/SMART/Java/Python/structure/Transcript.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -347,31 +347,6 @@
  newTranscript.exons = theseExons
  return newTranscript
 
-
- def getIntersection(self, transcript):
- """
- Get the intersection between this transcript and another one
- @param transcript: object to be compared to
- @type  transcript: class L{Transcript<Transcript>}
- @return:           an other transcript
- """
- if self.getChromosome() != transcript.getChromosome() or self.getDirection() != transcript.getDirection():
- return None
- newTranscript = Transcript()
- newTranscript.setDirection(self.getDirection())
- newTranscript.setChromosome(self.getChromosome())
- newTranscript.setName("%s_intersect_%s" % (self.getName(), transcript.getName()))
- newExons = []
- for thisExon in self.getExons():
- for thatExon in transcript.getExons():
- newExon = thisExon.getIntersection(thatExon)
- if newExon != None:
- newExons.append(newExon)
- if not newExons:
- return None
- newTranscript.exons = newExons
- return newTranscript
-
 
  def getSqlVariables(cls):
  """
b
diff -r e454402ba9d9 -r 169d364ddd91 SMART/galaxy/CompareOverlappingAdapt.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/CompareOverlappingAdapt.xml Mon Sep 30 03:19:26 2013 -0400
b
@@ -0,0 +1,153 @@
+<tool id="CompareOverlappingAdapt" name="compare overlapping">
+ <description>Provide the queries that overlap with a reference.</description>  
+ <requirements>
+ <requirement type="set_environment">PYTHONPATH</requirement>
+ </requirements>
+ <command interpreter="python">
+ ../Java/Python/CompareOverlappingAdapt.py -i $formatType.inputFileName1 
+ #if $formatType.FormatInputFileName1 == 'bed':  
+ -f bed
+ #elif $formatType.FormatInputFileName1 == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName1 == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName1 == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName1 == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName1 == 'gtf':
+ -f gtf
+ #end if
+ -j $formatType2.inputFileName2
+ #if $formatType2.FormatInputFileName2 == 'bed':
+ -g bed
+ #elif $formatType2.FormatInputFileName2 == 'gff':
+ -g gff
+ #elif $formatType2.FormatInputFileName2 == 'gff2':
+ -g gff2
+ #elif $formatType2.FormatInputFileName2 == 'gff3':
+ -g gff3
+ #elif $formatType2.FormatInputFileName2 == 'sam':
+ -g sam
+ #elif $formatType2.FormatInputFileName2 == 'gtf':
+     -g gtf
+ #end if
+ -o $outputFileGff 
+ #if $OptionDistance.Dist == 'Yes':
+ -d $OptionDistance.distance
+ #end if
+ #if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
+ -c 
+ #elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
+ -a
+ #end if
+ $InvertMatch
+ $NotOverlapping
+ </command>
+
+ <inputs>
+ <conditional name="formatType">
+ <param name="FormatInputFileName1" type="select" label="Input Query File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
+                         </when>
+ </conditional>
+
+ <conditional name="formatType2">
+ <param name="FormatInputFileName2" type="select" label="Input Reference File Format">
+ <option value="bed">bed</option>
+ <option value="gff">gff</option>
+ <option value="gff2">gff2</option>
+ <option value="gff3">gff3</option>
+ <option value="sam">sam</option>
+ <option value="gtf">gtf</option>
+ </param>
+ <when value="bed">
+ <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff">
+ <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff2">
+ <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
+ </when>
+ <when value="gff3">
+ <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
+ </when>
+ <when value="sam">
+ <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
+ </when>
+ <when value="gtf">
+ <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
+ </when>
+ </conditional>
+ <conditional name="OptionDistance">
+ <param name="Dist" type="select" label="Maximum Distance between two reads">
+ <option value="Yes">Yes</option>
+ <option value="No" selected="true">No</option>
+ </param>
+ <when value="Yes">
+ <param name="distance" type="integer" value="0"/>
+ </when>
+ <when value="No">
+ </when>
+ </conditional>
+ <conditional name="OptionCollinearOrAntiSens">
+ <param name="OptionCA" type="select" label="Collinear or anti-sens">
+ <option value="Collinear">Collinear</option>
+ <option value="AntiSens">AntiSens</option>
+ <option value="NONE" selected="true">NONE</option>
+ </param>
+ <when value="Collinear">
+ </when>
+ <when value="AntiSens">
+ </when>
+ <when value="NONE">
+ </when>
+ </conditional>
+ <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
+ <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
+ </inputs>
+
+ <outputs>
+ <data name="outputFileGff" format="gff3"/>
+ </outputs> 
+
+ <help>
+This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).
+  
+It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.
+
+Various modifiers are also available:
+
+-Invert selection (report those which do not overlap).
+
+-Restrict to collinear / anti-sense overlapping data.
+
+-Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.
+
+Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
+ </help>
+</tool>
b
diff -r e454402ba9d9 -r 169d364ddd91 commons/core/parsing/FastaParser.py
--- a/commons/core/parsing/FastaParser.py Wed Sep 18 08:51:22 2013 -0400
+++ b/commons/core/parsing/FastaParser.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -80,7 +80,7 @@
  if self.currentLine != None:
  if self.currentLine[0] != ">":
  raise Exception("First line is weird: %s" % (self.currentLine))
- name = self.currentLine[1:].split()[0].replace("|", "_").replace(".", "_")
+ name = self.currentLine[1:].split()[0]
  self.currentLine = None
 
  for line in self.handle:
@@ -89,7 +89,7 @@
  pass
  elif line[0] == ">":
  if name == None:
- name = line[1:].split()[0].replace("|", "_").replace(".", "_")
+ name = line[1:].split()[0]
  else:
  self.currentLine = line
  return Sequence(name, string)
b
diff -r e454402ba9d9 -r 169d364ddd91 commons/core/parsing/WigParser.py
--- a/commons/core/parsing/WigParser.py Wed Sep 18 08:51:22 2013 -0400
+++ b/commons/core/parsing/WigParser.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -85,11 +85,12 @@
  Create an index name for a file
  """
  directoryName = os.path.dirname(self.fileName)
+ baseName      = os.path.splitext(os.path.basename(self.fileName))[0]
  if strand == None:
  strandName = ""
  else:
  strandName = "+" if strand == 1 else "-"
- indexName = os.path.join(directoryName, ".%s%s.index" % (chromosome, strandName))
+ indexName = os.path.join(directoryName, ".%s.%s%s.index" % (baseName, chromosome, strandName))
  return indexName
 
 
b
diff -r e454402ba9d9 -r 169d364ddd91 commons/core/writer/MySqlTranscriptWriter.py
--- a/commons/core/writer/MySqlTranscriptWriter.py Wed Sep 18 08:51:22 2013 -0400
+++ b/commons/core/writer/MySqlTranscriptWriter.py Mon Sep 30 03:19:26 2013 -0400
[
@@ -164,7 +164,7 @@
         @type  transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}
         """
         self.transcriptListParser = transcriptListParser
-        self.mySqlConnection.executeManyFormattedQueriesIterator(self)
+        self.mySqlConnection.executeManyQueriesIterator(self)
             
             
     def getIterator(self):
@@ -178,8 +178,7 @@
                 self.createTable(chromosome)
             self.nbTranscriptsByChromosome[chromosome] = self.nbTranscriptsByChromosome.get(chromosome, 0) + 1
             values = transcript.getSqlValues()
-            #yield "INSERT INTO '%s' (%s) VALUES (%s)" % (self.tables[chromosome].name, ", ".join(self.tables[chromosome].variables), ", ".join([MySqlTable.formatSql(values[variable], self.tables[chromosome].types[variable], self.tables[chromosome].sizes[variable]) for variable in self.tables[chromosome].variables]))
-            yield ("INSERT INTO '%s' (%s) VALUES (%s)" % (self.tables[chromosome].name, ", ".join(self.tables[chromosome].variables), ", ".join(["?"] * len(self.tables[chromosome].variables))), [values[variable] for variable in self.tables[chromosome].variables])
+            yield "INSERT INTO '%s' (%s) VALUES (%s)" % (self.tables[chromosome].name, ", ".join(self.tables[chromosome].variables), ", ".join([MySqlTable.formatSql(values[variable], self.tables[chromosome].types[variable], self.tables[chromosome].sizes[variable]) for variable in self.tables[chromosome].variables]))
             progress.inc()
         progress.done()
             
@@ -191,7 +190,7 @@
         """
         for chromosome in self.transcriptValues:
             if chromosome in self.transcriptValues:
-                self.tables[chromosome].insertManyFormatted(self.transcriptValues[chromosome])
+                self.tables[chromosome].insertMany(self.transcriptValues[chromosome])
         self.transcriptValues = {}
         self.toBeWritten      = False
             
@@ -212,4 +211,4 @@
         Drop the tables
         """
         for chromosome in self.tables:
-            self.tables[chromosome].remove()
\ No newline at end of file
+            self.tables[chromosome].remove()
b
diff -r e454402ba9d9 -r 169d364ddd91 tool_conf.xml
--- a/tool_conf.xml Wed Sep 18 08:51:22 2013 -0400
+++ b/tool_conf.xml Mon Sep 30 03:19:26 2013 -0400
b
@@ -1,48 +1,45 @@
-  <section id="s_mart" name="S-MART" version="">
-    <label id="Smart_Comparison" text="Comparison Tools" version=""/>
-      <tool file="s_mart/CompareOverlappingSmallQuery.xml"/>
-      <tool file="s_mart/CompareOverlappingSmallRef.xml"/>
-      <tool file="s_mart/compareOverlapping.xml"/>
+  <section id="s_mart" name="S-MART" version="3.0">
+    <label id="Smart_Comparison" text="Comparison Tools" version="3.0"/>
+      <tool file="s_mart/CompareOverlappingAdapt.xml"/>
       <tool file="s_mart/getDifference.xml"/>
       <tool file="s_mart/computeCoverage.xml"/>
       <tool file="s_mart/GetFlanking.xml"/>
       <tool file="s_mart/GetDifferentialExpression.xml"/>
-    <label id="Smart_Merge" text="Merge Tools" version=""/>
+    <label id="Smart_Merge" text="Merge Tools" version="3.0"/>
       <tool file="s_mart/clusterize.xml"/>
       <tool file="s_mart/mergeTranscriptLists.xml"/>
       <tool file="s_mart/CollapseReads.xml"/>
       <tool file="s_mart/clusterizeBySlidingWindows.xml"/>
       <tool file="s_mart/mergeSlidingWindowsClusters.xml"/>
-    <label id="Smart_Visualization" text="Visualization Tools" version=""/>
+    <label id="Smart_Visualization" text="Visualization Tools" version="3.0"/>
       <tool file="s_mart/getDistribution.xml"/>
       <tool file="s_mart/getDistance.xml"/>
       <tool file="s_mart/getSizes.xml"/>
       <tool file="s_mart/plotCoverage.xml"/>
       <tool file="s_mart/WrappGetLetterDistribution1.xml"/>
       <tool file="s_mart/plotTranscriptList.xml"/>
-    <label id="Smart_Sequence" text="Sequence Tools" version=""/>
+    <label id="Smart_Sequence" text="Sequence Tools" version="3.0"/>
       <tool file="s_mart/CountReadGCPercent.xml"/>
-    <label id="Smart_Modification" text="Modification Tools" version=""/>
+    <label id="Smart_Modification" text="Modification Tools" version="3.0"/>
       <tool file="s_mart/modifyGenomicCoordinates.xml"/>
       <tool file="s_mart/modifySequenceList.xml"/>
       <tool file="s_mart/trimSequences.xml"/>
-    <label id="Smart_Selection" text="Selection Tools" version=""/>
+    <label id="Smart_Selection" text="Selection Tools" version="3.0"/>
       <tool file="s_mart/getExons.xml"/>
       <tool file="s_mart/getIntrons.xml"/>
       <tool file="s_mart/restrictFromSize.xml"/>
       <tool file="s_mart/restrictTranscriptList.xml"/>
-    <label id="Smart_Conversion" text="Conversion Tools" version=""/>
+    <label id="Smart_Conversion" text="Conversion Tools" version="3.0"/>
       <tool file="s_mart/ConvertTranscriptFile.xml"/>
       <tool file="s_mart/coordinatesToSequence.xml"/>
       <tool file="s_mart/mapperAnalyzer.xml"/>
-    <label id="Smart_WIG" text="WIG Manipulation Tools" version=""/>
+    <label id="Smart_WIG" text="WIG Manipulation Tools" version="3.0"/>
       <tool file="s_mart/getWigData.xml"/>
       <tool file="s_mart/getWigDistance.xml"/>
       <tool file="s_mart/getWigProfile.xml"/>
-    <label id="Smart_GFF" text="GFF Manipulation Tools" version=""/>
+    <label id="Smart_GFF" text="GFF Manipulation Tools" version="3.0"/>
       <tool file="s_mart/CleanTranscriptFile.xml"/>
       <tool file="s_mart/changeTagName.xml"/>
       <tool file="s_mart/changeGffFeatures.xml"/>
-      <tool file="s_mart/removeExonLines.xml"/>
       <tool file="s_mart/SelectByTag.xml"/>
   </section>
b
diff -r e454402ba9d9 -r 169d364ddd91 tool_dependencies.xml
--- a/tool_dependencies.xml Wed Sep 18 08:51:22 2013 -0400
+++ b/tool_dependencies.xml Mon Sep 30 03:19:26 2013 -0400
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <set_environment version="1.0">
-        <environment_variable name="PYTHONPATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable>
+        <environment_variable name="PYTHONPATH" action="prepend_to">$REPOSITORY_INSTALL_DIR</environment_variable>
     </set_environment>
 </tool_dependency>