changeset 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents d22fadc825e3
children 1236e5a49595
files LICENSE.txt README.txt SMART/Java/Python/CleanTranscriptFile.py SMART/Java/Python/ClusterizeByTags.py SMART/Java/Python/CollapseReads.py SMART/Java/Python/CombineTags.py SMART/Java/Python/CompareOverlapping.py SMART/Java/Python/CompareOverlappingSmallQuery.py SMART/Java/Python/CompareOverlappingSmallRef.py SMART/Java/Python/ComputeCoverage.py SMART/Java/Python/CountLoci.py SMART/Java/Python/CountReadGCPercent.py SMART/Java/Python/FindOverlapsOptim.py SMART/Java/Python/GetDifferentialExpression.py SMART/Java/Python/GetDistribution.py SMART/Java/Python/GetFlanking.py SMART/Java/Python/GetIntersection.py SMART/Java/Python/GetRandomSubset.py SMART/Java/Python/GetReadDistribution.py SMART/Java/Python/GetReadSizes.py SMART/Java/Python/GetUpDownStream.py SMART/Java/Python/RestrictFromCoverage.py SMART/Java/Python/SelectByTag.py SMART/Java/Python/WrappGetDistribution.py SMART/Java/Python/WrappGetReadDistribution.py SMART/Java/Python/WrappPlotCoverage.py SMART/Java/Python/WrappPlotRepartition.py SMART/Java/Python/__init__.py SMART/Java/Python/adaptorStripper.py SMART/Java/Python/changeGffFeatures.sh SMART/Java/Python/changeTagName.py SMART/Java/Python/cleanGff.py SMART/Java/Python/cleaning/CleanerChooser.py SMART/Java/Python/cleaning/DefaultCleaner.py SMART/Java/Python/cleaning/GffCleaner.py SMART/Java/Python/cleaning/GtfCleaner.py SMART/Java/Python/cleaning/TranscriptListCleaner.py SMART/Java/Python/cleaning/__init__.py SMART/Java/Python/clusterize.py SMART/Java/Python/clusterizeBySlidingWindows.py SMART/Java/Python/compareOverlapping.py SMART/Java/Python/convertTranscriptFile.py SMART/Java/Python/coordinatesToSequence.py SMART/Java/Python/findTss.py SMART/Java/Python/fold.py SMART/Java/Python/getDifference.py SMART/Java/Python/getDistance.py SMART/Java/Python/getDistribution.py SMART/Java/Python/getElement.py SMART/Java/Python/getExons.py SMART/Java/Python/getInfoPerCoverage.py SMART/Java/Python/getIntrons.py SMART/Java/Python/getLetterDistribution.py SMART/Java/Python/getNb.py SMART/Java/Python/getRandomRegions.py SMART/Java/Python/getReadDistribution.py SMART/Java/Python/getSequence.py SMART/Java/Python/getSizes.py SMART/Java/Python/getWigData.py SMART/Java/Python/getWigDistance.py SMART/Java/Python/getWigProfile.py SMART/Java/Python/mapperAnalyzer.py SMART/Java/Python/mappingToCoordinates.py SMART/Java/Python/mergeSlidingWindowsClusters.py SMART/Java/Python/mergeTranscriptLists.py SMART/Java/Python/misc/MultipleRPlotter.py SMART/Java/Python/misc/Progress.py SMART/Java/Python/misc/RPlotter.py SMART/Java/Python/misc/UnlimitedProgress.py SMART/Java/Python/misc/Utils.py SMART/Java/Python/misc/__init__.py SMART/Java/Python/modifyFasta.py SMART/Java/Python/modifyGenomicCoordinates.py SMART/Java/Python/modifySequenceList.py SMART/Java/Python/mySql/MySqlConnection.py SMART/Java/Python/mySql/MySqlExonTable.py SMART/Java/Python/mySql/MySqlQuery.py SMART/Java/Python/mySql/MySqlTable.py SMART/Java/Python/mySql/MySqlTranscriptTable.py SMART/Java/Python/mySql/__init__.py SMART/Java/Python/ncList/.NCList.py.swp SMART/Java/Python/ncList/.NCListCursor.py.swp SMART/Java/Python/ncList/Benchmark.py SMART/Java/Python/ncList/ConvertToNCList.py SMART/Java/Python/ncList/FileSorter.py SMART/Java/Python/ncList/FindOverlapsWithOneInterval.py SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervals.py SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsBin.py SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsIndex.py SMART/Java/Python/ncList/FindOverlaps_naif.py SMART/Java/Python/ncList/NCIndex.py SMART/Java/Python/ncList/NCList.py SMART/Java/Python/ncList/NCListCursor.py SMART/Java/Python/ncList/NCListFilePickle.py SMART/Java/Python/ncList/NCListHandler.py SMART/Java/Python/ncList/NCListMerger.py SMART/Java/Python/ncList/NCListParser.py SMART/Java/Python/ncList/__init__.py SMART/Java/Python/plot.py SMART/Java/Python/plotCoverage.py SMART/Java/Python/plotGenomeCoverage.py SMART/Java/Python/plotRepartition.py SMART/Java/Python/plotTranscriptList.py SMART/Java/Python/qualToFastq.py SMART/Java/Python/removeAllTmpTables.py SMART/Java/Python/removeEmptySequences.py SMART/Java/Python/removeExonLines.sh SMART/Java/Python/repetGffConverter.py SMART/Java/Python/restrictFromNucleotides.py SMART/Java/Python/restrictFromSize.py SMART/Java/Python/restrictSequenceList.py SMART/Java/Python/restrictTranscriptList.py SMART/Java/Python/runRandomJobs.py SMART/Java/Python/selectByNbOccurrences.py SMART/Java/Python/sequenceListSplitter.py SMART/Java/Python/splitByTag.py SMART/Java/Python/splitMultiFasta.py SMART/Java/Python/structure/Bins.py SMART/Java/Python/structure/Interval.py SMART/Java/Python/structure/Mapping.py SMART/Java/Python/structure/Sequence.py SMART/Java/Python/structure/SequenceList.py SMART/Java/Python/structure/SubMapping.py SMART/Java/Python/structure/Transcript.py SMART/Java/Python/structure/TranscriptContainer.py SMART/Java/Python/structure/TranscriptList.py SMART/Java/Python/structure/TranscriptListIterator.py SMART/Java/Python/structure/TranscriptListsComparator.py SMART/Java/Python/structure/__init__.py SMART/Java/Python/testInstall.py SMART/Java/Python/toolLauncher/RnaFoldLauncher.py SMART/Java/Python/toolLauncher/__init__.py SMART/Java/Python/trimAdaptor.py SMART/Java/Python/trimSequence.py SMART/Java/Python/trimSequences.py SMART/Java/Python/txtToFasta.py SMART/Java/Python/updateQual.py SMART/Java/Python/wigExploder.py SMART/Java/Python/wrongFastqToQual.py SMART/Java/__init__.py SMART/__init__.py SMART/galaxy/CleanTranscriptFile.xml SMART/galaxy/Clusterize.xml SMART/galaxy/CollapseReads.xml SMART/galaxy/CompareOverlappingSmallQuery.xml SMART/galaxy/CompareOverlappingSmallRef.xml SMART/galaxy/ConvertTranscriptFile.xml SMART/galaxy/CountReadGCPercent.xml SMART/galaxy/GetDifferentialExpression.xml SMART/galaxy/GetFlanking.xml SMART/galaxy/SelectByTag.xml SMART/galaxy/WrappGetLetterDistribution.py SMART/galaxy/WrappGetLetterDistribution.xml SMART/galaxy/__init__.py SMART/galaxy/changeGffFeatures.xml SMART/galaxy/changeTagName.xml SMART/galaxy/clusterizeBySlidingWindows.xml SMART/galaxy/compareOverlapping.xml SMART/galaxy/computeCoverage.xml SMART/galaxy/coordinatesToSequence.xml SMART/galaxy/getDifference.xml SMART/galaxy/getDistance.xml SMART/galaxy/getDistribution.xml SMART/galaxy/getExons.xml SMART/galaxy/getIntrons.xml SMART/galaxy/getReadDistribution.xml SMART/galaxy/getSizes.xml SMART/galaxy/getWigData.xml SMART/galaxy/getWigDistance.xml SMART/galaxy/getWigProfile.xml SMART/galaxy/mapperAnalyzer.xml SMART/galaxy/mergeSlidingWindowsClusters.xml SMART/galaxy/mergeTranscriptLists.xml SMART/galaxy/modifyGenomicCoordinates.xml SMART/galaxy/modifySequenceList.xml SMART/galaxy/plotCoverage.xml SMART/galaxy/plotTranscriptList.xml SMART/galaxy/removeExonLines.sh SMART/galaxy/removeExonLines.xml SMART/galaxy/restrictFromSize.xml SMART/galaxy/restrictTranscriptList.xml SMART/galaxy/test.py SMART/galaxy/trimSequences.xml __init__.py commons/__init__.py commons/core/LoggerFactory.py commons/core/__init__.py commons/core/checker/AbstractChecker.py commons/core/checker/CheckerException.py commons/core/checker/CheckerUtils.py commons/core/checker/ConfigChecker.py commons/core/checker/ConfigException.py commons/core/checker/ConfigValue.py commons/core/checker/IChecker.py commons/core/checker/OldConfigChecker.py commons/core/checker/RepetException.py commons/core/checker/__init__.py commons/core/coord/Align.py commons/core/coord/AlignUtils.py commons/core/coord/ConvCoord.py commons/core/coord/Map.py commons/core/coord/MapUtils.py commons/core/coord/Match.py commons/core/coord/MatchUtils.py commons/core/coord/MergedRange.py commons/core/coord/Path.py commons/core/coord/PathUtils.py commons/core/coord/Range.py commons/core/coord/Set.py commons/core/coord/SetUtils.py commons/core/coord/SlidingWindow.py commons/core/coord/__init__.py commons/core/coord/align2set.py commons/core/parsing/.BamParser.py.swp commons/core/parsing/AxtParser.py commons/core/parsing/BamParser.py commons/core/parsing/BedParser.py commons/core/parsing/BlastParser.py commons/core/parsing/BlatFileParser.py commons/core/parsing/BlatParser.py commons/core/parsing/BlatToGff.py commons/core/parsing/BlatToGffForBesPaired.py commons/core/parsing/BowtieParser.py commons/core/parsing/CoordsParser.py commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py commons/core/parsing/ElandParser.py commons/core/parsing/ExoParser.py commons/core/parsing/FastaParser.py commons/core/parsing/FastqParser.py commons/core/parsing/FindRep.py commons/core/parsing/GbParser.py commons/core/parsing/GffParser.py commons/core/parsing/GtfParser.py commons/core/parsing/MapParser.py commons/core/parsing/MapperParser.py commons/core/parsing/MaqParser.py commons/core/parsing/MrepsToSet.py commons/core/parsing/Multifasta2SNPFile.py commons/core/parsing/MummerParser.py commons/core/parsing/NCListParser.py commons/core/parsing/NucmerParser.py commons/core/parsing/PalsToAlign.py commons/core/parsing/ParserChooser.py commons/core/parsing/PathNum2Id.py commons/core/parsing/PilerTAToGrouperMap.py commons/core/parsing/PklParser.py commons/core/parsing/PslParser.py commons/core/parsing/README_MultiFasta2SNPFile commons/core/parsing/RmapParser.py commons/core/parsing/SamParser.py commons/core/parsing/SeqmapParser.py commons/core/parsing/SequenceListParser.py commons/core/parsing/ShrimpParser.py commons/core/parsing/Soap2Parser.py commons/core/parsing/SoapParser.py commons/core/parsing/SsrParser.py commons/core/parsing/TranscriptListParser.py commons/core/parsing/VarscanFile.py commons/core/parsing/VarscanFileForGnpSNP.py commons/core/parsing/VarscanHit.py commons/core/parsing/VarscanHitForGnpSNP.py commons/core/parsing/VarscanHit_WithTag.py commons/core/parsing/VarscanHit_v2_2_8.py commons/core/parsing/VarscanHit_v2_2_8_WithTag.py commons/core/parsing/VarscanToVCF.py commons/core/parsing/WigParser.py commons/core/parsing/__init__.py commons/core/parsing/multifastaParserLauncher.py commons/core/seq/AlignedBioseqDB.py commons/core/seq/Bioseq.py commons/core/seq/BioseqDB.py commons/core/seq/BioseqUtils.py commons/core/seq/ClusterConsensusCollection.py commons/core/seq/FastaUtils.py commons/core/seq/__init__.py commons/core/utils/FileUtils.py commons/core/utils/PipelineStepFTests.py commons/core/utils/RepetConfigParser.py commons/core/utils/RepetOptionParser.py commons/core/utils/__init__.py commons/core/writer/BedWriter.py commons/core/writer/CsvWriter.py commons/core/writer/EmblWriter.py commons/core/writer/FastaWriter.py commons/core/writer/FastqWriter.py commons/core/writer/GbWriter.py commons/core/writer/Gff2Writer.py commons/core/writer/Gff3Writer.py commons/core/writer/GtfWriter.py commons/core/writer/MapWriter.py commons/core/writer/MySqlTranscriptWriter.py commons/core/writer/SamWriter.py commons/core/writer/SequenceListWriter.py commons/core/writer/TranscriptListWriter.py commons/core/writer/TranscriptWriter.py commons/core/writer/UcscWriter.py commons/core/writer/WigWriter.py commons/core/writer/WriterChooser.py commons/core/writer/__init__.py doc.pdf tool_conf.xml tool_dependencies.xml
diffstat 283 files changed, 47982 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE.txt	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,506 @@
+
+CeCILL FREE SOFTWARE LICENSE AGREEMENT
+
+
+    Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+    * firstly, compliance with the principles governing the distribution
+      of Free Software: access to source code, broad rights granted to
+      users,
+    * secondly, the election of a governing law, French law, with which
+      it is conformant, both as regards the law of torts and
+      intellectual property law, and the protection that it offers to
+      both authors and holders of the economic rights over software.
+
+The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
+license are:
+
+Commissariat  l'Energie Atomique - CEA, a public scientific, technical
+and industrial research establishment, having its principal place of
+business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+    Preamble
+
+The purpose of this Free Software license agreement is to grant users
+the right to modify and redistribute the software governed by this
+license within the framework of an open source distribution model.
+
+The exercising of these rights is conditional upon certain obligations
+for users so as to preserve this status for all subsequent redistributions.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+suitability of the software as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+
+    Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Contribution: means any or all modifications, corrections, translations,
+adaptations and/or new functions integrated into the Software by any or
+all Contributors, as well as any or all Internal Modules.
+
+Module: means a set of sources files including their documentation that
+enables supplementary functions or services in addition to those offered
+by the Software.
+
+External Module: means any or all Modules, not derived from the
+Software, so that this Module and the Software run in separate address
+spaces, with one calling the other when they are run.
+
+Internal Module: means any or all Module, connected to the Software so
+that they both execute in the same address space.
+
+GNU GPL: means the GNU General Public License version 2 or any
+subsequent version, as published by the Free Software Foundation Inc.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+    Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 hereinafter for the whole term of the
+protection granted by the rights over said Software. 
+
+
+    Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+    * (i) loading the Software by any or all means, notably, by
+      downloading from a remote server, or by loading from a physical
+      medium;
+    * (ii) the first time the Licensee exercises any of the rights
+      granted hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+hereinabove, and the Licensee hereby acknowledges that it has read and
+understood it.
+
+
+    Article 4 - EFFECTIVE DATE AND TERM
+
+
+      4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1.
+
+
+      4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+    Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+      5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+   1. permanent or temporary reproduction of all or part of the Software
+      by any or all means and in any or all form.
+
+   2. loading, displaying, running, or storing the Software on any or
+      all medium.
+
+   3. entitlement to observe, study or test its operation so as to
+      determine the ideas and principles behind any or all constituent
+      elements of said Software. This shall apply when the Licensee
+      carries out any or all loading, displaying, running, transmission
+      or storage operation as regards the Software, that it is entitled
+      to carry out hereunder.
+
+
+      5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
+
+The right to make Contributions includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting software.
+
+The Licensee is authorized to make any or all Contributions to the
+Software provided that it includes an explicit notice that it is the
+author of said Contribution and indicates the date of the creation thereof.
+
+
+      5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+        5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+   1. a copy of the Agreement,
+
+   2. a notice relating to the limitation of both the Licensor's
+      warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows future Licensees unhindered access to
+the full Source Code of the Software by indicating how to access it, it
+being understood that the additional cost of acquiring the Source Code
+shall not exceed the cost of transferring the data.
+
+
+        5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+When the Licensee makes a Contribution to the Software, the terms and
+conditions for the distribution of the resulting Modified Software
+become subject to all the provisions of this Agreement.
+
+The Licensee is authorized to distribute the Modified Software, in
+source code or object code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+   1. a copy of the Agreement,
+
+   2. a notice relating to the limitation of both the Licensor's
+      warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the object code of the Modified
+Software is redistributed, the Licensee allows future Licensees
+unhindered access to the full source code of the Modified Software by
+indicating how to access it, it being understood that the additional
+cost of acquiring the source code shall not exceed the cost of
+transferring the data.
+
+
+        5.3.3 DISTRIBUTION OF EXTERNAL MODULES
+
+When the Licensee has developed an External Module, the terms and
+conditions of this Agreement do not apply to said External Module, that
+may be distributed under a separate license agreement.
+
+
+        5.3.4 COMPATIBILITY WITH THE GNU GPL
+
+The Licensee can include a code that is subject to the provisions of one
+of the versions of the GNU GPL in the Modified or unmodified Software,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+The Licensee can include the Modified or unmodified Software in a code
+that is subject to the provisions of one of the versions of the GNU GPL,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+
+    Article 6 - INTELLECTUAL PROPERTY
+
+
+      6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by this Agreement, for the duration set forth in Article 4.2.
+
+
+      6.2 OVER THE CONTRIBUTIONS
+
+The Licensee who develops a Contribution is the owner of the
+intellectual property rights over this Contribution as defined by
+applicable law.
+
+
+      6.3 OVER THE EXTERNAL MODULES
+
+The Licensee who develops an External Module is the owner of the
+intellectual property rights over this External Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution.
+
+
+      6.4 JOINT PROVISIONS
+
+The Licensee expressly undertakes:
+
+   1. not to remove, or modify, in any manner, the intellectual property
+      notices attached to the Software;
+
+   2. to reproduce said notices, in an identical manner, in the copies
+      of the Software modified or not.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights of the Holder and/or Contributors on the
+Software and to take, where applicable, vis--vis its staff, any and all
+measures required to ensure respect of said intellectual property rights
+of the Holder and/or Contributors.
+
+
+    Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+    Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+    Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the suitability of the product for its requirements, its good working
+order, and for ensuring that it shall not cause damage to either persons
+or properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 and, in particular, without any warranty 
+as to its commercial value, its secured, safe, innovative or relevant
+nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+assistance for its defense. Such technical and legal assistance shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+    Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+    Article 11 - MISCELLANEOUS
+
+
+      11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+      11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+    Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version, subject to the provisions of Article 5.3.4.
+
+
+    Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.
+
+
+Version 2.0 dated 2006-09-05.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,75 @@
+----------
+|  NAME  |
+----------
+S-MART
+
+
+Description
+-----------
+Several tools are now available for mapping high-throughput sequencing data from a genome, but few can extract biological knowledge from the mapped reads. We have developed a toolbox, S-MART, which handles mapped RNA-Seq and ChIP-Seq data.
+
+S-MART is an intuitive and lightweight tool, performing several tasks that are usually required during the analysis of mapped RNA-Seq and ChIP-Seq reads, including data selection and data visualization.
+
+S-MART does not require a computer science background and thus can be used by all biologists through a graphical interface. S-MART can run on any personal computer, yielding results within an hour for most queries. 
+
+
+Copyright
+---------
+Copyright INRA-URGI 2009-2013
+
+
+Authors
+-------
+Matthias Zytnicki
+
+
+Contact
+-------
+urgi-contact@versailles.inra.fr
+
+
+License
+-------
+This library is distributed under the terms of the CeCILL license 
+(http://www.cecill.info/index.en.html).
+See the LICENSE.txt file.
+
+
+Installation under Galaxy
+-------------------------
+S-MART is available under the Galaxy Tool Shed: http://toolshed.g2.bx.psu.edu/
+Remember to set the variables "tool_config_file" and "tool_dependency_dir" accordingly. Please look up the Galaxy Tool Shed wiki to know more about it.
+It assumes you have R installed, as well as two packages: RColorBrewer (for colors in graphics), and Hmisc (for statistics). You can install them as root with the commands:
+ - R --slave --no-save --no-restore --quiet -e 'if("RColorBrewer" %in% rownames(installed.packages()) == FALSE){install.packages("RColorBrewer", repos = c("http://cran.rstudio.com/"), dependencies = TRUE)}'
+ - R --slave --no-save --no-restore --quiet -e 'if("Hmisc" %in% rownames(installed.packages()) == FALSE){install.packages("Hmisc", repos = c("http://cran.rstudio.com/"), dependencies = TRUE)}'
+
+Optionally, you can organize the layout of S-MART tools following these instructions. This way, all the tools will be correctly sorted and appear in categories.
+ - Locate the directory where S-MART has been installed: probably in "<galaxy install dir>/shed_tool/toolshed.g2.bx.psu.edu/repos/yufei-luo/s_mart/XXX/s_mart/"
+ - Create a symbolic link "<galaxy install dir>/tools/s_mart" directing to "<S-MART install dir>/SMART/galaxy/"
+ - Paste the content of "<S-MART install dir>/SMART/galaxy/tool_conf.xml" to your local "<galaxy install dir>/tool_conf.xml", for instance, right before the </toolbox> mark-up.
+ - Remove the S-MART layout in "<galaxy install dir>/shed_tool_conf.xml" (the name may vary depending on your "universe_wgsi.ini" file) which has been automatically generated: remove the whole block between the markup <section id="s-mart" name="S-MART" version="XXX"> and the corresponding </section>.
+ - Restart Galaxy to complete the install.
+
+
+Stand-alone installation
+------------------------
+This product needs the following softwares :
+ - R, under the GNU General Public License, and several R package (under the same License)
+ - Python, under the Python License, compatible with the GNU General Public License
+ - Java, under the GNU General Public License
+
+
+Instructions
+------------
+Further installation instructions and the user guide are available in the file "doc.pdf".
+
+
+Acknowledgements
+----------------
+Many thanks go helping developers:
+ - Yufei Luo
+ - the URGI team
+and the beta-testers:
+ - Claire Toffano-Nioche
+ - Claire Kuchly
+ - among others...
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CleanTranscriptFile.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,74 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from SMART.Java.Python.cleaning.CleanerChooser import CleanerChooser
+
+
+class CleanTranscriptFile(object):
+
+	def __init__(self, verbosity):
+		self.verbosity = verbosity
+		self.chooser   = CleanerChooser(self.verbosity)
+
+	def setInputFile(self, fileName, format):
+		self.chooser.findFormat(format)
+		self.cleaner = self.chooser.getCleaner()
+		self.cleaner.setInputFileName(fileName)
+
+	def setOutputFile(self, fileName):
+		self.cleaner.setOutputFileName(fileName)
+
+	def setAcceptedTypes(self, types):
+		if types != None:
+			self.cleaner.setAcceptedTypes(types)
+
+	def run(self):
+		self.cleaner.clean()
+
+
+if __name__ == "__main__":
+
+	description = "Clean Transcript File v1.0.1: Clean a transcript file so that it is useable for S-MART. [Category: Other]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format",      dest="format",         action="store",                     type="string", help="format of previous file [compulsory] [format: transcript file format]")
+	parser.add_option("-t", "--types",       dest="acceptedTypes",  action="store",      default=None,  type="string", help="name of the types you want to keep in GFF/GTF (list separated by commas) [format: string] [default: None]")
+	parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [format: output file in GFF3 format]")
+	parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+	(options, args) = parser.parse_args()
+
+	ctf = CleanTranscriptFile(options.verbosity)
+	ctf.setInputFile(options.inputFileName, options.format)
+	ctf.setOutputFile(options.outputFileName)
+	ctf.setAcceptedTypes(None if options.acceptedTypes == None else options.acceptedTypes.split(","))
+	ctf.run()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ClusterizeByTags.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,157 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import random
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection
+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
+
+
+OPERATIONS = ("diff", "div")
+BOOLTOSTRANDS = {True: [0], False: [-1, 1]}
+
+class ClusterizeByTags(object):
+
+    def __init__(self, verbosity):
+        self.verbosity   = verbosity
+        self.connection  = MySqlConnection(self.verbosity-1)
+        self.defautValue = None
+        self.maxDistance = None
+        self.oneStrand   = False
+
+    def setInputFile(self, fileName, format):
+        chooser = ParserChooser(self.verbosity)
+        chooser.findFormat(format)
+        parser = chooser.getParser(fileName)
+        writer = MySqlTranscriptWriter(self.connection, None, self.verbosity)
+        writer.addTranscriptList(parser)
+        writer.write()
+        self.transcriptTables = writer.getTables()
+
+    def setOutputFile(self, fileName):
+        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+    def setTag(self, tagName, defaultValue):
+        self.tagName      = tagName
+        self.defaultValue = defaultValue
+
+    def setThreshold(self, threshold):
+        self.threshold = threshold
+
+    def setOperation(self, operation):
+        self.operation = operation
+        if self.operation not in OPERATIONS:
+            raise Exception("Operation '%s' unsupported: choose among %s" % (self.operation, ", ".join(OPERATIONS)))
+
+    def setMaxDistance(self, distance):
+        self.maxDistance = distance
+
+    def setOneStrand(self, oneStrand):
+        self.oneStrand = oneStrand
+
+    def run(self):
+        for chromosome in sorted(self.transcriptTables.keys()):
+            progress = Progress(self.transcriptTables[chromosome].getNbElements(), "Analyzing %s" % (chromosome), self.verbosity)
+            for strand in BOOLTOSTRANDS[self.oneStrand]:
+                previousValue      = None
+                previousTrend      = None
+                previousTranscript = None
+                sumValue           = 0
+                command = "SELECT * FROM %s" % (self.transcriptTables[chromosome].getName())
+                if not self.oneStrand:
+                    command += " WHERE direction = %d" % (strand)
+                command += " ORDER BY start, end"
+                for index, transcript in self.transcriptTables[chromosome].selectTranscripts(command):
+                    if self.tagName in transcript.getTagNames():
+                        value = transcript.getTagValue(self.tagName)
+                    else:
+                        value = self.defaultValue
+                    if previousValue == None:
+                        trend = None
+                    else:
+                        if self.operation == "diff":
+                            trend = value - previousValue
+                        else:
+                            trend = value / previousValue
+                    if previousTranscript == None:
+                        sumValue = value
+                    elif (previousTrend == None or abs(trend - previousTrend) <= self.threshold) and (self.maxDistance == None or previousTranscript.getDistance(transcript) <= self.maxDistance) and (previousTranscript.getDirection() == transcript.getDirection() or not self.oneStrand):
+                        if previousTranscript.getDirection() != transcript.getDirection():
+                            transcript.reverse()
+                        previousTranscript.merge(transcript)
+                        transcript = previousTranscript
+                        sumValue += value
+                        previousTrend = trend
+                    else:
+                        previousTranscript.setTagValue(self.tagName, sumValue)
+                        self.writer.addTranscript(previousTranscript)
+                        sumValue = value
+                        previousTrend = None
+                    previousValue      = value
+                    previousTranscript = transcript
+                    progress.inc()
+                if previousTranscript != None:
+                    previousTranscript.setTagValue(self.tagName, sumValue)
+                    self.writer.addTranscript(previousTranscript)
+            progress.done()
+        self.writer.close()
+
+
+if __name__ == "__main__":
+    
+    description = "Clusterize By Tags v1.0.1: Clusterize a set of element using their tag values. [Category: Merge]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",      dest="format",         action="store",                     type="string", help="format of previous file [compulsory] [format: transcript file format]")
+    parser.add_option("-t", "--tag",         dest="tagName",        action="store",                     type="string", help="name of the tag [format: string] [compulsory]")
+    parser.add_option("-e", "--default",     dest="defaultValue",   action="store",      default=None,  type="int",    help="default value for the tag [format: string]")
+    parser.add_option("-r", "--threshold",   dest="threshold",      action="store",                     type="int",    help="threshold between two consecutive tags [format: int] [compulsory]")
+    parser.add_option("-p", "--operation",   dest="operation",      action="store",                     type="string", help="operation to apply between 2 different clusters to compare them [format: choice (diff, div)] [compulsory]")
+    parser.add_option("-d", "--distance",    dest="maxDistance",    action="store",      default=None,  type="int",    help="maximum distance for 2 clusters to be merged [format: int] [default: None]")
+    parser.add_option("-1", "--oneStrand",   dest="oneStrand",      action="store_true", default=False,                help="also cluster the elements which are on different strands [format: bool] [default: False]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    cbt = ClusterizeByTags(options.verbosity)
+    cbt.setInputFile(options.inputFileName, options.format)
+    cbt.setOutputFile(options.outputFileName)
+    cbt.setTag(option.tagName, option.defaultValue)
+    cbt.setThreshold(option.threshold)
+    cbt.setOperation(option.operation)
+    cbt.setMaxDistance(operation.maxDistance)
+    cbt.setOneStrand(operation.oneStrand)
+    cbt.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CollapseReads.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,174 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+from optparse import OptionParser, OptionGroup
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
+from SMART.Java.Python.ncList.FileSorter import FileSorter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+class CollapseReads(object):
+    """
+    Merge two reads if they have exactly the same genomic coordinates
+    """
+
+    def __init__(self, verbosity = 0):
+        self.verbosity         = verbosity
+        self.inputReader       = None
+        self.outputWriter      = None
+        self.strands           = True
+        self.nbRead            = 0
+        self.nbWritten         = 0
+        self.nbMerges          = 0
+        self.splittedFileNames = {}
+
+    def __del__(self):
+        for fileName in self.splittedFileNames.values():
+            os.remove(fileName)
+            
+    def close(self):
+        self.outputWriter.close()
+        
+    def setInputFile(self, fileName, format):
+        parserChooser = ParserChooser(self.verbosity)
+        parserChooser.findFormat(format)
+        self.parser = parserChooser.getParser(fileName)
+        self.sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])
+
+    def setOutputFile(self, fileName):
+        self.outputWriter = Gff3Writer(fileName, self.verbosity)
+
+    def getNbElements(self):
+        return self.parser.getNbTranscripts()
+
+    def _sortFile(self):
+        fs = FileSorter(self.parser, self.verbosity-4)
+        fs.perChromosome(True)
+        fs.setOutputFileName(self.sortedFileName)
+        fs.sort()
+        self.splittedFileNames       = fs.getOutputFileNames()
+        self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()
+        self.nbRead                  = fs.getNbElements()
+        
+    def _iterate(self, chromosome):
+        progress    = Progress(self.nbElementsPerChromosome[chromosome], "Checking chromosome %s" % (chromosome), self.verbosity)
+        transcripts = []
+        parser      = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)
+        for newTranscript in parser.getIterator():
+            newTranscripts = []
+            for oldTranscript in transcripts:
+                if self._checkOverlap(newTranscript, oldTranscript):
+                    self._merge(newTranscript, oldTranscript)
+                elif self._checkPassed(newTranscript, oldTranscript):
+                    self._write(oldTranscript)
+                else:
+                    newTranscripts.append(oldTranscript)
+            newTranscripts.append(newTranscript)
+            transcripts = newTranscripts
+            progress.inc()
+        for transcript in transcripts:
+            self._write(transcript)
+        progress.done()
+
+    def _merge(self, transcript1, transcript2):
+        self.nbMerges += 1
+        transcript2.setDirection(transcript1.getDirection())
+        transcript1.merge(transcript2)
+
+    def _write(self, transcript):
+        self.nbWritten += 1
+        self.outputWriter.addTranscript(transcript)
+
+    def _checkOverlap(self, transcript1, transcript2):
+        if transcript1.getStart() != transcript2.getStart() or transcript1.getEnd() != transcript2.getEnd():
+            return False
+        return (not self.strands or transcript1.getDirection() == transcript2.getDirection())
+
+    def _checkPassed(self, transcript1, transcript2):
+        return (transcript2.getStart() < transcript1.getStart())
+
+    def collapseChromosome(self, chromosome):
+        progress            = Progress(table.getNbElements(), "Analysing chromosome %s" % (chromosome), self.verbosity)
+        command             = "SELECT * FROM %s ORDER BY start ASC, end DESC" % (table.name)
+        transcriptStart     = None
+        transcriptEnd       = None
+        transcriptDirection = None
+        currentTranscript   = None
+        if self.strands:
+            command += ", direction"
+        for index, transcript in table.selectTranscripts(command, True):
+            self.nbRead += 1
+            if not self.strands:
+                transcript.setDirection("+")
+            if transcriptStart != transcript.getStart() or transcriptEnd != transcript.getEnd() or transcriptDirection != transcript.getDirection():
+                self.writeTranscript(currentTranscript)
+                transcriptStart     = transcript.getStart()
+                transcriptEnd       = transcript.getEnd()
+                transcriptDirection = transcript.getDirection()
+                currentTranscript   = transcript
+            else:
+                currentTranscript.setTagValue("nbElements", (currentTranscript.getTagValue("nbElements") + 1) if "nbElements" in currentTranscript.getTagNames() else 1)
+            progress.inc()
+        self.writeTranscript(currentTranscript)
+        progress.done()
+
+    def collapse(self):
+        self._sortFile()
+        for chromosome in sorted(self.nbElementsPerChromosome.keys()):
+            self._iterate(chromosome)
+        self.outputWriter.close()
+        if self.verbosity > 1:
+            print "# reads read: %d" % (self.nbRead)
+            print "# reads written: %d (%.2f%%)" % (self.nbWritten, float(self.nbWritten) / self.nbRead * 100)
+            print "# reads merges: %d" % (self.nbMerges)
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Collapse Reads v1.0.3: Merge two reads if they have exactly the same genomic coordinates. [Category: Merge]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in mapping format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the file [compulsory] [format: mapping file format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-s", "--strands",   dest="strands",        action="store_true", default=False,                help="merge elements on 2 different strands [format: bool] [default: false]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
+    (options, args) = parser.parse_args()
+
+    collapser = CollapseReads(options.verbosity)
+    collapser.setInputFile(options.inputFileName, options.format)
+    collapser.setOutputFile(options.outputFileName)
+    collapser.strands = not options.strands
+    collapser.collapse()
+    collapser.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CombineTags.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,115 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+import random
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+OPERATIONS = ("plus", "minus", "times", "div")
+
+class CombineTags(object):
+
+    def __init__(self, verbosity = 0):
+        self.verbosity       = verbosity
+
+    def setInputFile(self, fileName, format):
+        self.inputFileName = fileName
+        parserChooser = ParserChooser(self.verbosity)
+        parserChooser.findFormat(format, "transcript")
+        self.parser = parserChooser.getParser(fileName)
+
+    def setOutputFile(self, fileName):
+        self.outputWriter = Gff3Writer(fileName, self.verbosity)
+
+    def setTags(self, tag1, tag2, outputTag, defaultValue = None):
+        self.tag1         = tag1
+        self.tag2         = tag2
+        self.outputTag    = outputTag
+        self.defaultValue = defaultValue
+
+    def setOperation(self, operation):
+        self.operation = operation
+        if self.operation not in OPERATIONS:
+            raise Exception("Do no handle operation %s, only: %s" % (self.operation, ", ".join(OPERATIONS)))
+
+    def run(self):
+        progress = Progress(self.parser.getNbTranscripts(), "Printing transcripts %s" % (self.inputFileName), self.verbosity)
+        for transcript in self.parser.getIterator():
+            tag1 = transcript.getTagValue(self.tag1)
+            tag2 = transcript.getTagValue(self.tag2)
+            if tag1 == None or tag2 == None:
+                if self.defaultValue == None:
+                    raise Exception("Transcript %s misses one of the tags %s and %s, and has no default value !" % (transcript, self.tag1, self.tag2))
+                newTag = self.defaultValue
+            else:
+                tag1, tag2 = float(tag1), float(tag2)
+                if self.operation == "plus":
+                    newTag = tag1 + tag2
+                elif self.operation == "minus":
+                    newTag = tag1 - tag2
+                elif self.operation == "times":
+                    newTag = tag1 * tag2
+                elif self.operation == "div":
+                    newTag = tag1 / tag2
+            transcript.setTagValue(self.outputTag, newTag)
+            self.outputWriter.addTranscript(transcript)
+            progress.inc()
+        progress.done()
+        self.parser.close()
+        self.outputWriter.close()
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Change Tag Name v1.0.1: Change the name of tag of a list of transcripts. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",               type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-t", "--tag1",        dest="tag1",           action="store",               type="string", help="name of the first tag [compulsory] [format: string]")
+    parser.add_option("-T", "--tag2",        dest="tag2",           action="store",               type="string", help="name of the second tag [compulsory] [format: string]")
+    parser.add_option("-d", "--default",     dest="defaultValue",   action="store", default=None, type="string", help="default value when one of the tag is absent [compulsory] [format: float]")
+    parser.add_option("-n", "--new",         dest="newTag",         action="store",               type="string", help="name of the new tag [compulsory] [format: string]")
+    parser.add_option("-p", "--operation",   dest="operation",      action="store",               type="string", help="operation combining the tags [compulsory] [format: choice (plus, minus, times, div)]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int] [default: 1]")
+    (options, args) = parser.parse_args()
+
+    combiner = CombineTags(options.verbosity)
+    combiner.setInputFile(options.inputFileName, options.inputFormat)
+    combiner.setOutputFile("%s.gff3" % (options.outputFileName))
+    combiner.setTags(options.tag1, options.tag2, options.newTag, options.defaultValue)
+    combiner.setOperation(options.operation)
+    combiner.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CompareOverlapping.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,491 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os, struct, time, random
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.ncList.NCList import NCList
+from SMART.Java.Python.ncList.NCListCursor import NCListCursor
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle
+from SMART.Java.Python.ncList.NCListHandler import NCListHandler
+from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+from SMART.Java.Python.misc import Utils
+try:
+	import cPickle as pickle
+except:
+	import pickle
+
+REFERENCE = 0
+QUERY = 1
+TYPES = (REFERENCE, QUERY)
+TYPETOSTRING = {0: "reference", 1: "query"}
+
+class CompareOverlapping(object):
+
+	def __init__(self, verbosity = 1):
+		self._outputFileName		   = "outputOverlaps.gff3"
+		self._iWriter				   = None
+		self._nbOverlappingQueries	   = 0
+		self._nbOverlaps			   = 0
+		self._nbLines				   = {REFERENCE: 0, QUERY: 0}
+		self._verbosity				   = verbosity
+		self._ncLists				   = {}
+		self._cursors				   = {}
+		self._splittedFileNames		   = {}
+		self._nbElements			   = {}
+		self._nbElementsPerChromosome  = {}
+		self._inputFileNames		   = {REFERENCE: None,  QUERY: None}
+		self._inputFileFormats		   = {REFERENCE: None,  QUERY: None}
+		self._starts				   = {REFERENCE: None, QUERY: None}
+		self._ends					   = {REFERENCE: None, QUERY: None}
+		self._fivePrimes			   = {REFERENCE: None, QUERY: None}
+		self._threePrimes			   = {REFERENCE: None, QUERY: None}
+		self._ncListHandlers		   = {REFERENCE: None,  QUERY: None}
+		self._convertedFileNames	   = {REFERENCE: False, QUERY: False}
+		self._sorted                   = False
+		self._index                    = False
+		self._introns				   = False
+		self._antisense				   = False
+		self._colinear				   = False
+		self._invert				   = False
+		self._distance				   = 0
+		self._minOverlap			   = 1
+		self._pcOverlap				   = None
+		self._included				   = False
+		self._including				   = False
+		self._outputNotOverlapping	   = False
+		self._tmpRefFileName		   = None
+		self._currentQueryTranscript   = None
+		self._currentOrQueryTranscript = None
+		self._currentExQueryTranscript = None
+		self._randInt				   = random.randint(0, 100000)
+		
+	def __del__(self):
+		for fileName in [self._tmpRefFileName] + self._convertedFileNames.values():
+			if fileName != None and os.path.exists(fileName):
+				os.remove(fileName)
+
+	def close(self):
+		self._iWriter.close()
+		
+	def setInput(self, fileName, format, type):
+		chooser = ParserChooser(self._verbosity)
+		chooser.findFormat(format)
+		self._inputFileNames[type]   = fileName
+		self._inputFileFormats[type] = format
+		
+	def setOutput(self, outputFileName):
+		if outputFileName != '':
+			self._outputFileName = outputFileName
+		self._iWriter = Gff3Writer(self._outputFileName)
+
+	def setSorted(self, sorted):
+		self._sorted = sorted
+
+	def setIndex(self, index):
+		self._index = index
+
+	def restrictToStart(self, distance, type):
+		self._starts[type] = distance
+		
+	def restrictToEnd(self, distance, type):
+		self._ends[type] = distance
+		
+	def extendFivePrime(self, distance, type):
+		self._fivePrimes[type] = distance
+		
+	def extendThreePrime(self, distance, type):
+		self._threePrimes[type] = distance
+
+	def acceptIntrons(self, boolean):
+		self._introns = boolean
+
+	def getAntisenseOnly(self, boolean):
+		self._antisense = boolean
+		
+	def getColinearOnly(self, boolean):
+		self._colinear = boolean
+
+	def getInvert(self, boolean):
+		self._invert = boolean
+
+	def setMaxDistance(self, distance):
+		self._distance = distance
+
+	def setMinOverlap(self, overlap):
+		self._minOverlap = overlap
+
+	def setPcOverlap(self, overlap):
+		self._pcOverlap = overlap
+
+	def setIncludedOnly(self, boolean):
+		self._included = boolean
+		
+	def setIncludingOnly(self, boolean):
+		self._including = boolean
+
+	def includeNotOverlapping(self, boolean):
+		self._outputNotOverlapping = boolean
+		
+	def transformTranscript(self, transcript, type):
+		if self._starts[type] != None:
+			transcript.restrictStart(self._starts[type])
+		if self._ends[type] != None:
+			transcript.restrictEnd(self._ends[type])
+		if self._fivePrimes[type] != None:
+			transcript.extendStart(self._fivePrimes[type])
+		if self._threePrimes[type] != None:
+			transcript.extendEnd(self._threePrimes[type])
+		if self._introns:
+			transcript.exons = []
+		if type == REFERENCE and self._distance > 0:
+			transcript.extendExons(self._distance)
+		return transcript
+
+	def extendQueryTranscript(self, transcript):
+		self._currentExQueryTranscript = Transcript()
+		self._currentExQueryTranscript.copy(transcript)
+		if self._fivePrimes[QUERY] != None:
+			self._currentExQueryTranscript.extendStart(self._fivePrimes[QUERY])
+		if self._threePrimes[QUERY] != None:
+			self._currentExQueryTranscript.extendEnd(self._threePrimes[QUERY])
+		transcript.exons = []
+
+	def createTmpRefFile(self):
+		self._tmpRefFileName = "tmp_ref_%d.pkl" % (self._randInt)
+		if "SMARTTMPPATH" in os.environ:
+			self._tmpRefFileName = os.path.join(os.environ["SMARTTMPPATH"], self._tmpRefFileName)
+		chooser = ParserChooser(self._verbosity)
+		chooser.findFormat(self._inputFileFormats[REFERENCE])
+		parser = chooser.getParser(self._inputFileNames[REFERENCE])
+		writer = NCListFilePickle(self._tmpRefFileName, self._verbosity)
+		for transcript in parser.getIterator():
+			transcript = self.transformTranscript(transcript, REFERENCE)
+			writer.addTranscript(transcript)
+		writer.close()
+		self._inputFileNames[REFERENCE]   = self._tmpRefFileName
+		self._inputFileFormats[REFERENCE] = "pkl"
+
+	def createNCLists(self):
+		self._ncLists = dict([type, {}] for type in TYPES)
+		self._indices = dict([type, {}] for type in TYPES)
+		self._cursors = dict([type, {}] for type in TYPES)
+		for type in TYPES:
+			if self._verbosity > 2:
+				print "Creating %s NC-list..." % (TYPETOSTRING[type])
+			self._convertedFileNames[type] = "%s_%d_%d.ncl" % (self._inputFileNames[type], self._randInt, type)
+			ncLists = ConvertToNCList(self._verbosity)
+			ncLists.setInputFileName(self._inputFileNames[type], self._inputFileFormats[type])
+			ncLists.setOutputFileName(self._convertedFileNames[type])
+			ncLists.setSorted(self._sorted)
+			if type == REFERENCE and self._index:
+				ncLists.setIndex(True)
+			ncLists.run()
+			self._ncListHandlers[type] = NCListHandler(self._verbosity)
+			self._ncListHandlers[type].setFileName(self._convertedFileNames[type])
+			self._ncListHandlers[type].loadData()
+			self._nbLines[type]					= self._ncListHandlers[type].getNbElements()
+			self._nbElementsPerChromosome[type] = self._ncListHandlers[type].getNbElementsPerChromosome()
+			self._ncLists[type]					= self._ncListHandlers[type].getNCLists()
+			for chromosome, ncList in self._ncLists[type].iteritems():
+				self._cursors[type][chromosome] = NCListCursor(None, ncList, 0, self._verbosity)
+				if type == REFERENCE and self._index:
+					self._indices[REFERENCE][chromosome] = ncList.getIndex()
+			if self._verbosity > 2:
+				print "	...done"
+
+	def compare(self):
+		nbSkips, nbMoves   = 0, 0
+		previousChromosome = None
+		done			   = False
+		refNCList		   = None
+		queryNCList		   = None
+		startTime		   = time.time()
+		progress		   = Progress(len(self._ncLists[QUERY].keys()), "Checking overlap", self._verbosity)
+		for chromosome, queryNCList in self._ncLists[QUERY].iteritems():
+			queryParser = self._ncListHandlers[QUERY].getParser(chromosome)
+			queryNCList = self._ncLists[QUERY][chromosome]
+			queryCursor = self._cursors[QUERY][chromosome]
+			if chromosome != previousChromosome:
+				skipChromosome		= False
+				previousChromosome  = chromosome
+				if chromosome not in self._ncLists[REFERENCE]:
+					if self._outputNotOverlapping:
+						while not queryCursor.isOut():
+							self._currentQueryTranscript = queryCursor.getTranscript()
+							self._writeIntervalInNewGFF3({})
+							if queryCursor.hasChildren():
+								queryCursor.moveDown()
+							else:
+								queryCursor.moveNext()
+					progress.inc()
+					continue
+				refNCList = self._ncLists[REFERENCE][chromosome]
+				refCursor = self._cursors[REFERENCE][chromosome]
+			while True:
+				self._currentOrQueryTranscript = queryCursor.getTranscript()
+				self._currentQueryTranscript = Transcript()
+				self._currentQueryTranscript.copy(self._currentOrQueryTranscript)
+				self._currentQueryTranscript = self.transformTranscript(self._currentQueryTranscript, QUERY)
+				self.extendQueryTranscript(self._currentOrQueryTranscript)
+				newRefLaddr = self.checkIndex(refCursor)
+				if newRefLaddr != None:
+					nbMoves += 1
+					refCursor.setLIndex(newRefLaddr)
+					done = False
+				refCursor, done, unmatched = self.findOverlapIter(refCursor, done)
+				if refCursor.isOut():
+					if not self._invert and not self._outputNotOverlapping:
+						break
+				if (unmatched and not self._invert and not self._outputNotOverlapping) or not queryCursor.hasChildren():
+					queryCursor.moveNext()
+					nbSkips += 1
+				else:
+					queryCursor.moveDown()
+				if queryCursor.isOut():
+					break
+			progress.inc()
+		progress.done()
+		endTime = time.time()
+		self._timeSpent = endTime - startTime
+		if self._verbosity >= 10:
+			print "# skips:   %d" % (nbSkips)
+			print "# moves:   %d" % (nbMoves)
+
+	def findOverlapIter(self, cursor, done):
+		chromosome = self._currentQueryTranscript.getChromosome()
+		matched	= False
+		if chromosome not in self._ncLists[REFERENCE]:
+			return None, False, True
+		ncList = self._ncLists[REFERENCE][chromosome]
+		overlappingNames = {}
+		nextDone = False
+		firstOverlapLAddr = NCListCursor(cursor)
+		firstOverlapLAddr.setLIndex(-1)
+		if cursor.isOut():
+			self._writeIntervalInNewGFF3(overlappingNames)
+			return firstOverlapLAddr, False, True
+		parentCursor = NCListCursor(cursor)
+		parentCursor.moveUp()
+		firstParentAfter = False
+		while not parentCursor.isOut(): 
+			if self.isOverlapping(parentCursor) == 0:
+				matched = True
+				if self._checkOverlap(parentCursor.getTranscript()):
+					overlappingNames.update(self._extractID(parentCursor.getTranscript()))
+				if firstOverlapLAddr.isOut():
+					firstOverlapLAddr.copy(parentCursor)
+					nextDone = True 
+			elif self.isOverlapping(parentCursor) == 1:
+				firstParentAfter = NCListCursor(parentCursor)
+			parentCursor.moveUp()
+		if firstParentAfter:
+			written = self._writeIntervalInNewGFF3(overlappingNames)
+			return firstParentAfter, False, not written if self._invert else not matched
+		#This loop finds the overlaps with currentRefLAddr.#
+		while True:
+			parentCursor = NCListCursor(cursor)
+			parentCursor.moveUp()
+			#In case: Query is on the right of the RefInterval and does not overlap.
+			overlap = self.isOverlapping(cursor)
+			if overlap == -1:
+				cursor.moveNext()
+			#In case: Query overlaps with RefInterval.	
+			elif overlap == 0:
+				matched = True
+				if self._checkOverlap(cursor.getTranscript()):
+					overlappingNames.update(self._extractID(cursor.getTranscript()))
+				if firstOverlapLAddr.compare(parentCursor):
+					firstOverlapLAddr.copy(cursor)
+					nextDone = True
+				if done:
+					cursor.moveNext()
+				else:
+					if not cursor.hasChildren():
+						cursor.moveNext()
+						if cursor.isOut():
+							break
+					else:
+						cursor.moveDown()
+			#In case: Query is on the left of the RefInterval and does not overlap.		
+			else:
+				if firstOverlapLAddr.isOut() or firstOverlapLAddr.compare(parentCursor):
+					firstOverlapLAddr.copy(cursor)
+					nextDone = False # new
+				break
+			
+			done = False
+			if cursor.isOut():
+				break
+		written = self._writeIntervalInNewGFF3(overlappingNames)
+		return firstOverlapLAddr, nextDone, not written if self._invert else not matched
+	
+	def isOverlapping(self, refTranscript):
+		if (self._currentExQueryTranscript.getStart() <= refTranscript.getEnd() and self._currentExQueryTranscript.getEnd() >= refTranscript.getStart()):
+			return 0   
+		if self._currentExQueryTranscript.getEnd() < refTranscript.getStart():
+			return 1
+		return -1
+
+	def checkIndex(self, cursor):
+		if not self._index:
+			return None
+		if cursor.isOut():
+			return None
+		chromosome = self._currentExQueryTranscript.getChromosome()
+		nextLIndex = self._indices[REFERENCE][chromosome].getIndex(self._currentExQueryTranscript)
+		if nextLIndex == None:
+			return None
+		ncList		 = self._ncLists[REFERENCE][chromosome]
+		nextGffAddress = ncList.getRefGffAddr(nextLIndex)
+		thisGffAddress = cursor.getGffAddress()
+		if nextGffAddress > thisGffAddress:
+			return nextLIndex
+		return None
+		
+	def _writeIntervalInNewGFF3(self, names):
+		nbOverlaps = 0
+		for cpt in names.values():
+			nbOverlaps += cpt
+		self._nbOverlappingQueries += 1		      if Utils.xor(names, self._invert) else 0
+		self._nbOverlaps		   += nbOverlaps  if Utils.xor(names, self._invert) else 0
+		if names:
+			self._currentQueryTranscript.setTagValue("overlapWith", ",".join(names))
+			self._currentQueryTranscript.setTagValue("nbOverlaps", nbOverlaps)
+			if self._invert:
+				return False
+		else:
+			if self._outputNotOverlapping:
+				self._currentQueryTranscript.setTagValue("nbOverlaps", 0)
+			elif not self._invert:
+				return False
+		self._iWriter.addTranscript(self._currentQueryTranscript)
+		self._iWriter.write()
+		return True
+		
+	def _extractID(self, transcript):
+		id		 = transcript.getTagValue("ID")		      if "ID"		  in transcript.getTagNames() else transcript.getUniqueName()
+		nbElements = transcript.getTagValue("nbElements") if "nbElements" in transcript.getTagNames() else 1
+		return {id: float(nbElements)}
+
+	def _checkOverlap(self, refTranscript):
+		if self._currentQueryTranscript.getDistance(refTranscript) > self._distance:
+			return False
+		minOverlap = self._minOverlap
+		if self._pcOverlap != None:
+			minOverlap = max(self._minOverlap, self._currentQueryTranscript.getSize() / 100.0 * self._pcOverlap)
+		if not self._currentQueryTranscript.overlapWith(refTranscript, minOverlap):
+			return False
+		if self._antisense and self._currentQueryTranscript.getDirection() == refTranscript.getDirection():
+			return False
+		if self._colinear and self._currentQueryTranscript.getDirection() != refTranscript.getDirection():
+			return False
+		if self._included and not refTranscript.include(self._currentQueryTranscript):
+			return False
+		if self._including and not self._currentQueryTranscript.include(refTranscript):
+			return False
+		if self._introns:
+			return True
+		return self._currentQueryTranscript.overlapWithExon(refTranscript, minOverlap)
+		
+	def run(self):
+		self.createTmpRefFile()
+		self.createNCLists()
+		self.compare()
+		self.close()
+		if self._verbosity > 0:
+			print "# queries: %d" % (self._nbLines[QUERY])
+			print "# refs:	  %d" % (self._nbLines[REFERENCE])
+			print "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps)
+			print "time: 	  %ds" % (self._timeSpent)
+
+
+if __name__ == "__main__":
+	description = "Compare Overlapping v1.0.4: Get the data which overlap with a reference set. [Category: Data Comparison]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input1",		      dest="inputFileName1", action="store",					 type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format1",		  dest="format1",		 action="store",					 type="string", help="format of file 1 [compulsory] [format: transcript file format]")
+	parser.add_option("-j", "--input2",		      dest="inputFileName2", action="store",					 type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
+	parser.add_option("-g", "--format2",		  dest="format2",		 action="store",					 type="string", help="format of file 2 [compulsory] [format: transcript file format]")
+	parser.add_option("-o", "--output",		      dest="output",		 action="store",	  default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+	parser.add_option("-D", "--index",	          dest="index",	         action="store_true", default=False,	            help="add an index to the reference file (faster but more memory) [format: boolean] [default: False]")
+	parser.add_option("-r", "--sorted",	          dest="sorted",	     action="store_true", default=False,	            help="input files are already sorted [format: boolean] [default: False]")
+	parser.add_option("-S", "--start1",		      dest="start1",		 action="store",	  default=None,  type="int",	help="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]")
+	parser.add_option("-s", "--start2",		      dest="start2",		 action="store",	  default=None,  type="int",	help="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]")
+	parser.add_option("-U", "--end1",			  dest="end1",		     action="store",	  default=None,  type="int",	help="only consider the n last nucleotides of the transcripts in file 1 (do not use it with -S) [format: int]")
+	parser.add_option("-u", "--end2",			  dest="end2",		     action="store",	  default=None,  type="int",	help="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]")
+	parser.add_option("-t", "--intron",		      dest="introns",		 action="store_true", default=False,				help="also report introns [format: bool] [default: false]")
+	parser.add_option("-E", "--5primeExtension1", dest="fivePrime1",	 action="store",	  default=None,  type="int",	help="extension towards 5' in file 1 [format: int]")
+	parser.add_option("-e", "--5primeExtension2", dest="fivePrime2",	 action="store",	  default=None,  type="int",	help="extension towards 5' in file 2 [format: int]")
+	parser.add_option("-N", "--3primeExtension1", dest="threePrime1",	 action="store",	  default=None,  type="int",	help="extension towards 3' in file 1 [format: int]")
+	parser.add_option("-n", "--3primeExtension2", dest="threePrime2",	 action="store",	  default=None,  type="int",	help="extension towards 3' in file 2 [format: int]")
+	parser.add_option("-c", "--colinear",		  dest="colinear",		 action="store_true", default=False,				help="colinear only [format: bool] [default: false]")
+	parser.add_option("-a", "--antisense",		  dest="antisense",		 action="store_true", default=False,				help="antisense only [format: bool] [default: false]")
+	parser.add_option("-d", "--distance",		  dest="distance",	     action="store",	  default=0,	 type="int",	help="accept some distance between query and reference [format: int]")
+	parser.add_option("-k", "--included",		  dest="included",	     action="store_true", default=False,				help="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]")
+	parser.add_option("-K", "--including",		  dest="including",	     action="store_true", default=False,				help="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]")
+	parser.add_option("-m", "--minOverlap",		  dest="minOverlap",	 action="store",	  default=1,	 type="int",	help="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]")
+	parser.add_option("-p", "--pcOverlap",		  dest="pcOverlap",	     action="store",	  default=None,  type="int",	help="minimum percentage of nucleotides to overlap to declare an overlap [format: int]")
+	parser.add_option("-O", "--notOverlapping",   dest="notOverlapping", action="store_true", default=False,				help="also output not overlapping data [format: bool] [default: false]")
+	parser.add_option("-x", "--exclude",		  dest="exclude",		 action="store_true", default=False,				help="invert the match [format: bool] [default: false]")
+	parser.add_option("-v", "--verbosity",		  dest="verbosity",		 action="store",	  default=1,	 type="int",	help="trace level [format: int]")
+	(options, args) = parser.parse_args()
+
+	co = CompareOverlapping(options.verbosity)
+	co.setInput(options.inputFileName1, options.format1, QUERY)
+	co.setInput(options.inputFileName2, options.format2, REFERENCE)
+	co.setOutput(options.output)
+	co.setSorted(options.sorted)
+	co.setIndex(options.index)
+	co.restrictToStart(options.start1, QUERY)
+	co.restrictToStart(options.start2, REFERENCE)
+	co.restrictToEnd(options.end1, QUERY)
+	co.restrictToEnd(options.end2, REFERENCE)
+	co.extendFivePrime(options.fivePrime1, QUERY)
+	co.extendFivePrime(options.fivePrime2, REFERENCE)
+	co.extendThreePrime(options.threePrime1, QUERY)
+	co.extendThreePrime(options.threePrime2, REFERENCE)
+	co.acceptIntrons(options.introns)
+	co.getAntisenseOnly(options.antisense)
+	co.getColinearOnly(options.colinear)
+	co.getInvert(options.exclude)
+	co.setMaxDistance(options.distance)
+	co.setMinOverlap(options.minOverlap)
+	co.setPcOverlap(options.pcOverlap)
+	co.setIncludedOnly(options.included)
+	co.setIncludingOnly(options.including)
+	co.includeNotOverlapping(options.notOverlapping)
+	co.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CompareOverlappingSmallQuery.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,261 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+MINBIN = 3
+MAXBIN = 7
+REFERENCE = 0
+QUERY = 1
+
+def getBin(start, end):
+	for i in range(MINBIN, MAXBIN + 1):
+		binLevel = 10 ** i
+		if int(start / binLevel) == int(end / binLevel):
+			return int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))
+	return int((MAXBIN + 1) * 10 ** (MAXBIN + 1))
+
+def getOverlappingBins(start, end):
+	array	= []
+	bigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))
+	for i in range(MINBIN, MAXBIN + 1):
+		binLevel = 10 ** i
+		array.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))
+	array.append((bigBin, bigBin))
+	return array
+
+
+class CompareOverlappingSmallQuery(object):
+
+	def __init__(self, verbosity):
+		self.verbosity      = verbosity
+		self.tableNames     = {}
+		self.nbQueries      = 0
+		self.nbRefs	        = 0
+		self.nbWritten      = 0
+		self.nbOverlaps     = 0
+		self.distance       = None
+		self.invert         = False
+		self.antisense      = False
+		self.collinear      = False
+		self.pcOverlapQuery = False
+		self.pcOverlapRef   = False
+		self.minOverlap     = False
+		self.included       = False
+		self.including      = False
+		self.bins	        = {}
+		self.overlaps       = {}
+		self.notOverlapping = False
+
+	def setReferenceFile(self, fileName, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		self.refParser = chooser.getParser(fileName)
+
+	def setQueryFile(self, fileName, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		self.queryParser = chooser.getParser(fileName)
+
+	def setOutputFile(self, fileName):
+		self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+	def setDistance(self, distance):
+		self.distance = distance
+
+	def setInvert(self, boolean):
+		self.invert = boolean
+
+	def setCollinear(self, boolean):
+		self.collinear = boolean
+
+	def setAntisense(self, boolean):
+		self.antisense = boolean
+
+	def setMinPercentOverlap(self, pcOverlapQuery, pcOverlapRef):
+		self.pcOverlapQuery = pcOverlapQuery
+		self.pcOverlapRef   = pcOverlapRef
+
+	def setMinOverlap(self, minOverlap):
+		self.minOverlap = minOverlap
+
+	def setInclude(self, included, including):
+		self.included  = included
+		self.including = including
+
+	def includeNotOverlapping(self, boolean):
+		self.notOverlapping = boolean
+
+	def loadQuery(self):
+		progress = UnlimitedProgress(10000, "Reading queries", self.verbosity)
+		for transcript in self.queryParser.getIterator():
+			if transcript.__class__.__name__ == "Mapping":
+				transcript = transcript.getTranscript()
+			chromosome = transcript.getChromosome()
+			bin		   = getBin(transcript.getStart(), transcript.getEnd())
+			if chromosome not in self.bins:
+				self.bins[chromosome] = {}
+			if bin not in self.bins[chromosome]:
+				self.bins[chromosome][bin] = []
+			self.bins[chromosome][bin].append(transcript)
+			if self.notOverlapping or self.invert:
+				self.overlaps[transcript] = {}
+			self.nbQueries += 1
+			progress.inc()
+		progress.done()
+
+	def _compareTwoTranscripts(self, queryTranscript, refTranscript):
+		if not queryTranscript.overlapWithExon(refTranscript):
+			return False
+		if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection():
+			return False
+		if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection():
+			return False
+		if self.included and not refTranscript.include(queryTranscript):
+			return False
+		if self.including and not queryTranscript.include(refTranscript):
+			return False
+		querySize = queryTranscript.getSize()
+		if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)):
+			return False
+		refSize = refTranscript.getSize()
+		if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)):
+			return False
+		if self.minOverlap and not queryTranscript.overlapWithExon(refTranscript, self.minOverlap):
+			return False
+		return True
+
+	def _alterTranscript(self, transcript, type):
+		if type == REFERENCE:
+			if self.distance != None:
+				transcript.extendExons(self.distance)
+		return transcript
+
+	def _compareTranscript(self, refTranscript):
+		refChromosome = refTranscript.getChromosome()
+		if refChromosome not in self.bins:
+			return []
+		refStart = refTranscript.getStart()
+		refEnd   = refTranscript.getEnd()
+		bins	 = getOverlappingBins(refStart, refEnd)
+		for binRange in bins:
+			for bin in range(binRange[0], binRange[1]+1):
+				if bin not in self.bins[refChromosome]:
+					continue
+				for queryTranscript in self.bins[refChromosome][bin]:
+					if self._compareTwoTranscripts(queryTranscript, refTranscript):
+						if queryTranscript not in self.overlaps:
+							self.overlaps[queryTranscript] = {}
+						nbElements = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1
+						self.overlaps[queryTranscript][refTranscript.getName()] = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1
+						self.nbOverlaps += nbElements
+
+	def _updateTranscript(self, queryTranscript):
+		overlaps = self.overlaps[queryTranscript]
+		queryTranscript.setTagValue("nbOverlaps", sum(overlaps.values()))
+		if overlaps:
+			queryTranscript.setTagValue("overlapsWith", "--".join(overlaps.keys())[:100])
+		return queryTranscript
+
+	def compare(self):
+		progress = UnlimitedProgress(10000, "Comparing references", self.verbosity)
+		for refTranscript in self.refParser.getIterator():
+			if refTranscript.__class__.__name__ == "Mapping":
+				refTranscript = refTranscript.getTranscript()
+			refTranscript = self._alterTranscript(refTranscript, REFERENCE)
+			self._compareTranscript(refTranscript)
+			self.nbRefs += 1
+			progress.inc()
+		progress.done()
+
+	def printResults(self):
+		for transcript in self.overlaps:
+			if not self.invert or not self.overlaps[transcript]:
+				if not self.invert:
+					transcript = self._updateTranscript(transcript)
+				self.writer.addTranscript(transcript)
+				self.nbWritten += 1
+		self.writer.close()
+
+	def displayResults(self):
+		if self.verbosity:
+			print "# queries:  %d" % (self.nbQueries)
+			print "# refs:     %d" % (self.nbRefs)
+			print "# written:  %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)
+
+	def run(self):
+		self.loadQuery()
+		self.compare()
+		self.printResults()
+		self.displayResults()
+
+if __name__ == "__main__":
+	
+	description = "Compare Overlapping Small Query v1.0.1: Provide the queries that overlap with a reference, when the query is small. [Category: Data Comparison]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input1",	        dest="inputFileName1", action="store",			           type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format1",        dest="format1",		  action="store",			           type="string", help="format of previous file [compulsory] [format: transcript file format]")
+	parser.add_option("-j", "--input2",	        dest="inputFileName2", action="store",			           type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
+	parser.add_option("-g", "--format2",        dest="format2",		  action="store",			           type="string", help="format of previous file [compulsory] [format: transcript file format]")
+	parser.add_option("-o", "--output",	        dest="outputFileName", action="store",			           type="string", help="output file [format: output file in GFF3 format]")
+	parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,				 help="also output not overlapping data [format: bool] [default: false]")
+	parser.add_option("-d", "--distance",		dest="distance",	   action="store",	    default=0,	   type="int",	 help="accept some distance between query and reference [format: int]")
+	parser.add_option("-c", "--collinear",		dest="collinear",	   action="store_true", default=False,			 	 help="provide collinear features [format: bool] [default: false]")
+	parser.add_option("-a", "--antisense",		dest="antisense",	   action="store_true", default=False,			 	 help="provide antisense features [format: bool] [default: false]")
+	parser.add_option("-m", "--minOverlap",	    dest="minOverlap",     action="store",      default=False, type="int",	 help="min. #nt overlap [format: bool] [default: false]")
+	parser.add_option("-p", "--pcOverlapQuery",	dest="pcOverlapQuery", action="store",      default=False, type="int",	 help="min. % overlap of the query [format: bool] [default: false]")
+	parser.add_option("-P", "--pcOverlapRef",	dest="pcOverlapRef",   action="store",      default=False, type="int",   help="min. % overlap of the reference [format: bool] [default: false]")
+	parser.add_option("-k", "--included",		dest="included",	   action="store_true", default=False,			 	 help="provide query elements which are nested in reference elements [format: bool] [default: false]")
+	parser.add_option("-K", "--including",		dest="including",	   action="store_true", default=False,			 	 help="provide query elements in which reference elements are nested [format: bool] [default: false]")
+	parser.add_option("-x", "--exclude",		dest="exclude",		   action="store_true", default=False,			 	 help="invert the match [format: bool] [default: false]")
+	parser.add_option("-v", "--verbosity",      dest="verbosity",	   action="store",      default=1,     type="int",	 help="trace level [format: int]")
+	(options, args) = parser.parse_args()
+
+	cosq = CompareOverlappingSmallQuery(options.verbosity)
+	cosq.setQueryFile(options.inputFileName1, options.format1)
+	cosq.setReferenceFile(options.inputFileName2, options.format2)
+	cosq.setOutputFile(options.outputFileName)
+	cosq.includeNotOverlapping(options.notOverlapping)
+	cosq.setDistance(options.distance)
+	cosq.setCollinear(options.collinear)
+	cosq.setAntisense(options.antisense)
+	cosq.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)
+	cosq.setMinOverlap(options.minOverlap)
+	cosq.setInclude(options.included, options.including)
+	cosq.setInvert(options.exclude)
+	cosq.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CompareOverlappingSmallRef.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,250 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+MINBIN = 3
+MAXBIN = 7
+REFERENCE = 0
+QUERY = 1
+
+def getBin(start, end):
+	for i in range(MINBIN, MAXBIN + 1):
+		binLevel = 10 ** i
+		if int(start / binLevel) == int(end / binLevel):
+			return int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))
+	return int((MAXBIN + 1) * 10 ** (MAXBIN + 1))
+
+def getOverlappingBins(start, end):
+	array	= []
+	bigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))
+	for i in range(MINBIN, MAXBIN + 1):
+		binLevel = 10 ** i
+		array.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))
+	array.append((bigBin, bigBin))
+	return array
+
+
+class CompareOverlappingSmallRef(object):
+
+	def __init__(self, verbosity):
+		self.verbosity      = verbosity
+		self.tableNames     = {}
+		self.nbQueries      = 0
+		self.nbRefs	        = 0
+		self.nbWritten      = 0
+		self.nbOverlaps     = 0
+		self.invert         = False
+		self.antisense      = False
+		self.collinear      = False
+		self.distance       = None
+		self.minOverlap     = False
+		self.pcOverlapQuery = False
+		self.pcOverlapRef   = False
+		self.included       = False
+		self.including      = False
+		self.bins	        = {}
+		self.notOverlapping = False
+
+	def setReferenceFile(self, fileName, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		self.refParser = chooser.getParser(fileName)
+
+	def setQueryFile(self, fileName, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		self.queryParser = chooser.getParser(fileName)
+
+	def setOutputFile(self, fileName):
+		self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+	def setDistance(self, distance):
+		self.distance = distance
+
+	def setCollinear(self, boolean):
+		self.collinear = boolean
+
+	def setAntisense(self, boolean):
+		self.antisense = boolean
+
+	def setInvert(self, boolean):
+		self.invert = boolean
+
+	def setMinPercentOverlap(self, pcOverlapQuery, pcOverlapRef):
+		self.pcOverlapQuery = pcOverlapQuery
+		self.pcOverlapRef   = pcOverlapRef
+
+	def setInclude(self, included, including):
+		self.included  = included
+		self.including = including
+
+	def includeNotOverlapping(self, boolean):
+		self.notOverlapping = boolean
+
+	def loadRef(self):
+		progress = UnlimitedProgress(10000, "Reading references", self.verbosity)
+		for transcript in self.refParser.getIterator():
+			if transcript.__class__.__name__ == "Mapping":
+				transcript = transcript.getTranscript()
+			transcript = self._alterTranscript(transcript, REFERENCE)
+			chromosome = transcript.getChromosome()
+			bin		   = getBin(transcript.getStart(), transcript.getEnd())
+			if chromosome not in self.bins:
+				self.bins[chromosome] = {}
+			if bin not in self.bins[chromosome]:
+				self.bins[chromosome][bin] = []
+			self.bins[chromosome][bin].append(transcript)
+			self.nbRefs += 1
+			progress.inc()
+		progress.done()
+
+	def _alterTranscript(self, transcript, type):
+		if type == REFERENCE:
+			if self.distance != None:
+				transcript.extendExons(self.distance)
+		return transcript
+
+	def _compareTwoTranscripts(self, queryTranscript, refTranscript):
+		if not queryTranscript.overlapWithExon(refTranscript):
+			return False
+		if self.collinear and queryTranscript.getDirection() != refTranscript.getDirection():
+			return False
+		if self.antisense and queryTranscript.getDirection() == refTranscript.getDirection():
+			return False
+		if self.included and not queryTranscript.isIncluded(refTranscript):
+			return False
+		if self.including and not refTranscript.isIncluded(queryTranscript):
+			return False
+		querySize = queryTranscript.getSize()
+		if self.pcOverlapQuery and not queryTranscript.overlapWithExon(refTranscript, int(querySize * self.pcOverlapQuery / 100.0)):
+			return False
+		refSize = refTranscript.getSize()
+		if self.pcOverlapRef and not queryTranscript.overlapWithExon(refTranscript, int(refSize * self.pcOverlapRef / 100.0)):
+			return False
+		if self.minOverlap and not queryTranscript.overlapWithExon(refTranscript, self.minOverlap):
+			return False
+		return True
+
+	def _compareTranscript(self, queryTranscript):
+		queryChromosome = queryTranscript.getChromosome()
+		if queryChromosome not in self.bins:
+			return []
+		queryStart = queryTranscript.getStart()
+		queryEnd   = queryTranscript.getEnd()
+		bins	   = getOverlappingBins(queryStart, queryEnd)
+		overlaps   = {}
+		for binRange in bins:
+			for bin in range(binRange[0], binRange[1]+1):
+				if bin not in self.bins[queryChromosome]:
+					continue
+				for refTranscript in self.bins[queryChromosome][bin]:
+					if self._compareTwoTranscripts(queryTranscript, refTranscript):
+						nbElements = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1
+						overlaps[refTranscript.getName()] = int(float(refTranscript.getTagValue("nbElements"))) if "nbElements" in refTranscript.getTagNames() else 1
+						self.nbOverlaps += nbElements
+		return overlaps
+
+	def _updateTranscript(self, queryTranscript, overlaps):
+		queryTranscript.setTagValue("nbOverlaps", sum(overlaps.values()))
+		if overlaps:
+			queryTranscript.setTagValue("overlapsWith", "--".join(overlaps.keys())[:100])
+		return queryTranscript
+
+	def compare(self):
+		progress = UnlimitedProgress(10000, "Comparing queries", self.verbosity)
+		for queryTranscript in self.queryParser.getIterator():
+			if queryTranscript.__class__.__name__ == "Mapping":
+				queryTranscript = queryTranscript.getTranscript()
+			progress.inc()
+			self.nbQueries += 1
+			overlaps = self._compareTranscript(queryTranscript)
+			if self.notOverlapping or (overlaps and not self.invert) or (not overlaps and self.invert):
+				if not self.invert:
+					queryTranscript = self._updateTranscript(queryTranscript, overlaps)
+				self.writer.addTranscript(queryTranscript)
+				self.nbWritten += 1
+		progress.done()
+		self.writer.close()
+
+	def displayResults(self):
+		if self.verbosity > 0:
+			print "# queries:  %d" % (self.nbQueries)
+			print "# refs:     %d" % (self.nbRefs)
+			print "# written:  %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)
+
+	def run(self):
+		self.loadRef()
+		self.compare()
+		self.displayResults()
+
+if __name__ == "__main__":
+	
+	description = "Compare Overlapping Small Reference v1.0.1: Provide the queries that overlap with a reference, when the reference is small. [Category: Data Comparison]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input1",	        dest="inputFileName1", action="store",			           type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format1",        dest="format1",		  action="store",			           type="string", help="format of previous file [compulsory] [format: transcript file format]")
+	parser.add_option("-j", "--input2",	        dest="inputFileName2", action="store",			           type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
+	parser.add_option("-g", "--format2",        dest="format2",		  action="store",			           type="string", help="format of previous file [compulsory] [format: transcript file format]")
+	parser.add_option("-o", "--output",	        dest="outputFileName", action="store",			           type="string", help="output file [format: output file in GFF3 format]")
+	parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,				  help="also output not overlapping data [format: bool] [default: false]")
+	parser.add_option("-d", "--distance",		dest="distance",	   action="store",	    default=0,	   type="int",	  help="accept some distance between query and reference [format: int]")
+	parser.add_option("-c", "--collinear",		dest="collinear",	   action="store_true", default=False,			 	  help="provide collinear features [format: bool] [default: false]")
+	parser.add_option("-a", "--antisense",		dest="antisense",	   action="store_true", default=False,			 	  help="provide antisense features [format: bool] [default: false]")
+	parser.add_option("-m", "--minOverlap",	    dest="minOverlap",     action="store",      default=False, type="int",	  help="min. #nt overlap [format: bool] [default: false]")
+	parser.add_option("-p", "--pcOverlapQuery",	dest="pcOverlapQuery", action="store",      default=False, type="int",	  help="min. % overlap of the query [format: bool] [default: false]")
+	parser.add_option("-P", "--pcOverlapRef",	dest="pcOverlapRef",   action="store",      default=False, type="int",    help="min. % overlap of the reference [format: bool] [default: false]")
+	parser.add_option("-k", "--included",		dest="included",	   action="store_true", default=False,			 	  help="provide query elements which are nested in reference elements [format: bool] [default: false]")
+	parser.add_option("-K", "--including",		dest="including",	   action="store_true", default=False,			 	  help="provide query elements in which reference elements are nested [format: bool] [default: false]")
+	parser.add_option("-x", "--exclude",		dest="exclude",		   action="store_true", default=False,			 	  help="invert the match [format: bool] [default: false]")
+	parser.add_option("-v", "--verbosity",      dest="verbosity",	   action="store",      default=1,     type="int",	  help="trace level [format: int]")
+	(options, args) = parser.parse_args()
+
+	cosr = CompareOverlappingSmallRef(options.verbosity)
+	cosr.setQueryFile(options.inputFileName1, options.format1)
+	cosr.setReferenceFile(options.inputFileName2, options.format2)
+	cosr.setOutputFile(options.outputFileName)
+	cosr.includeNotOverlapping(options.notOverlapping)
+	cosr.setDistance(options.distance)
+	cosr.setAntisense(options.antisense)
+	cosr.setInclude(options.included, options.including)
+	cosr.setInvert(options.exclude)
+	cosr.setMinOverlap(options.minOverlap)
+	cosr.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)
+	cosr.run()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/ComputeCoverage.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,142 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os, random
+from optparse import OptionParser, OptionGroup
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+
+class CoverageComputer(object):
+
+	def __init__(self, verbosity = 0):
+		self.verbosity	     = verbosity
+		self.queryReader	 = None
+		self.referenceReader = None
+		self.outputWriter	 = None
+		self.introns		 = False
+		self.nbNucleotides   = 0
+		self.nbCovered	     = 0
+
+	def setInputQueryFile(self, fileName, format):
+		self.queryReader = TranscriptContainer(fileName, format, self.verbosity-1)
+
+	def setInputReferenceFile(self, fileName, format):
+		self.referenceReader = TranscriptContainer(fileName, format, self.verbosity-1)
+
+	def includeIntrons(self, boolean):
+		self.introns = boolean
+
+	def setOutputFileName(self, fileName, title="S-MART", feature="transcript", featurePart="exon"):
+		self.outputWriter = Gff3Writer(fileName, self.verbosity-1)
+		self.outputWriter.setTitle(title)
+		self.outputWriter.setFeature(feature)
+		self.outputWriter.setFeaturePart(featurePart)
+
+	def readReference(self):
+		self.coveredRegions = {}
+		progress = Progress(self.referenceReader.getNbTranscripts(), "Reading reference file", self.verbosity-1)
+		for transcript in self.referenceReader.getIterator():
+			chromosome = transcript.getChromosome()
+			if chromosome not in self.coveredRegions:
+				self.coveredRegions[chromosome] = {}
+			if self.introns:
+				transcript.removeExons()
+			for exon in transcript.getExons():
+				for position in range(exon.getStart(), exon.getEnd()+1):
+					self.coveredRegions[chromosome][position] = 1
+			progress.inc()
+		progress.done()
+
+	def readQuery(self):
+		progress = Progress(self.queryReader.getNbTranscripts(), "Reading query file", self.verbosity-1)
+		for transcript in self.queryReader.getIterator():
+			progress.inc()
+			chromosome = transcript.getChromosome()
+			if chromosome not in self.coveredRegions:
+				continue
+			if self.introns:
+				transcript.removeExons()
+			for exon in transcript.getExons():
+				for position in range(exon.getStart(), exon.getEnd()+1):
+					self.nbNucleotides += 1
+					self.nbCovered     += self.coveredRegions[chromosome].get(position, 0)
+		progress.done()
+
+	def write(self):
+		progress = Progress(self.queryReader.getNbTranscripts(), "Writing output file", self.verbosity-1)
+		for transcript in self.queryReader.getIterator():
+			chromosome = transcript.getChromosome()
+			if self.introns:
+				transcript.removeExons()
+			size	 = transcript.getSize()
+			coverage = 0
+			for exon in transcript.getExons():
+				for position in range(exon.getStart(), exon.getEnd()+1):
+					coverage += self.coveredRegions[chromosome].get(position, 0)
+			transcript.setTagValue("coverage", 0 if size == 0 else float(coverage) / size * 100)
+			self.outputWriter.addTranscript(transcript)
+			progress.inc()
+		progress.done()
+
+	def sumUp(self):
+		print "%d nucleotides in query, %d (%.f%%) covered" % (self.nbNucleotides, self.nbCovered, 0 if self.nbNucleotides == 0 else float(self.nbCovered) / self.nbNucleotides * 100)
+
+	def run(self):
+		self.readReference()
+		self.readQuery()
+		if self.outputWriter != None:
+			self.write()
+		self.sumUp()
+
+
+if __name__ == "__main__":
+	
+	# parse command line
+	description = "Compute Coverage v1.0.1: Compute the coverage of a set with respect to another set. [Category: Personal]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input1",	   dest="inputFileName1", action="store",                     type="string", help="input query file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format1",   dest="format1",        action="store",                     type="string", help="format of the first file [compulsory] [format: transcript file format]")
+	parser.add_option("-j", "--input2",	   dest="inputFileName2", action="store",                     type="string", help="input reference file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-g", "--format2",   dest="format2",        action="store",                     type="string", help="format of the second file [compulsory] [format: transcript file format]")
+	parser.add_option("-t", "--introns",   dest="introns",        action="store_true", default=False,                help="also include introns [format: boolean] [default: false]")
+	parser.add_option("-o", "--output",	   dest="outputFileName", action="store",	   default=None,  type="string", help="output file [format: output file in GFF3 format]")
+	parser.add_option("-v", "--verbosity", dest="verbosity",	  action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
+	(options, args) = parser.parse_args()
+
+	computer = CoverageComputer(options.verbosity)
+	computer.setInputQueryFile(options.inputFileName1, options.format1)
+	computer.setInputReferenceFile(options.inputFileName2, options.format2)
+	computer.includeIntrons(options.introns)
+	computer.setOutputFileName(options.outputFileName)
+	computer.run()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CountLoci.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,230 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2012
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os, os.path, random
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.parsing.GffParser import GffParser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.cleanGff import CleanGff
+from SMART.Java.Python.CompareOverlappingSmallRef import CompareOverlappingSmallRef
+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
+from SMART.Java.Python.GetUpDownStream import GetUpDownStream
+
+REFERENCE = 0
+QUERY = 1
+
+class CountLoci(object):
+    
+    def __init__(self, verbosity = 1):
+        self.verbosity = verbosity
+        self.tmpFileNames = []
+
+    def __del__(self):
+        for fileName in self.tmpFileNames:
+            if os.path.exists(fileName):
+                os.remove(fileName)
+    
+    def setInputFile(self, fileName, format):
+        self.inputFileName = fileName
+        self.inputFormat = format
+        self.parser = TranscriptContainer(fileName, format, self.verbosity-1)
+        if self.verbosity > 0:
+            print "%d elements in input" % (self.parser.getNbTranscripts())
+
+    def setReference(self, fileName):
+        self.referenceFileName = fileName
+
+    def setDistance(self, distance):
+        self.distance = distance
+
+    def setOutputFileName(self, fileName):
+        self.outputFileName = fileName
+        self.writer         = Gff3Writer(fileName, self.verbosity-1)
+        self.outputBase     = "%s_%d_" % (os.path.splitext(fileName)[0], random.randint(0, 10000))
+
+    def _writeTmpRef(self, tags, outputFileName):
+        cleanGff = CleanGff(self.verbosity-1)
+        cleanGff.setInputFileName(self.referenceFileName)
+        cleanGff.setOutputFileName(outputFileName)
+        cleanGff.setAcceptedTypes(tags)
+        cleanGff.run()
+
+    def _getReferenceFiles(self):
+        self.referenceFiles = {"CDS":                       "%scds.gff3"      % (self.outputBase), \
+                               "five_prime_UTR":            "%sfive.gff3"     % (self.outputBase), \
+                               "three_prime_UTR":           "%sthree.gff3"    % (self.outputBase), \
+                               "mRNA":                      "%smrna.gff3"     % (self.outputBase), \
+                               "ncRNA":                     "%sncRNA.gff3"    % (self.outputBase), \
+                               "transposable_element_gene": "%sTE.gff3"       % (self.outputBase), \
+                               "vic":                       "%svicinity.gff3" % (self.outputBase)}
+        self.tmpFileNames.extend(self.referenceFiles.values())
+        for tag, fileName in self.referenceFiles.iteritems():
+            if tag == "ncRNA":
+                self._writeTmpRef(["miRNA", "ncRNA", "rRNA", "snoRNA", "snRNA", "tRNA"], fileName)
+            elif tag == "vic":
+                continue
+            else:
+                self._writeTmpRef([tag], fileName)
+
+    def _compare(self, queryFileName, queryFormat, referenceFileName, referenceFormat, outputFileName, exclusion = False):
+        co = CompareOverlappingSmallRef(self.verbosity-1)
+        co.setQueryFile(queryFileName, queryFormat)
+        co.setReferenceFile(referenceFileName, referenceFormat)
+        co.setOutputFile(outputFileName)
+        if exclusion:
+            co.setInvert(True)
+        co.run()
+        return co.nbWritten
+
+    def _copy(self, inputFile, tag):
+        parser = GffParser(inputFile, self.verbosity-1)
+        for transcript in parser.getIterator():
+            transcript.setTagValue("locus", tag)
+            self.writer.addTranscript(transcript)
+        
+    def _getCds(self):
+        outputFileName   = "%sin_cds.gff3" % (self.outputBase)
+        outputNoFileName = "%sin_nocds.gff3" % (self.outputBase)
+        self.tmpFileNames.extend([outputFileName, outputNoFileName])
+        nbOverlaps = self._compare(self.inputFileName, self.inputFormat, self.referenceFiles["CDS"], "gff3", outputFileName)
+        self._compare(self.inputFileName, self.inputFormat, self.referenceFiles["CDS"], "gff3", outputNoFileName, True)
+        self._copy(outputFileName, "CDS")
+        if self.verbosity > 0:
+            print "%d overlaps in CDS" % (nbOverlaps)
+        return outputNoFileName
+
+    def _getFivePrime(self, inputFileName):
+        outputFileName   = "%sin_five.gff3" % (self.outputBase)
+        outputNoFileName = "%sin_nofive.gff3" % (self.outputBase)
+        self.tmpFileNames.extend([outputFileName, outputNoFileName])
+        nbOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["five_prime_UTR"], "gff3", outputFileName)
+        self._compare(inputFileName, "gff3", self.referenceFiles["five_prime_UTR"], "gff3", outputNoFileName, True)
+        self._copy(outputFileName, "five_prime_UTR")
+        if self.verbosity > 0:
+            print "%d overlaps in 5' UTR" % (nbOverlaps)
+        return outputNoFileName
+    
+    def _getThreePrime(self, inputFileName):
+        outputFileName   = "%sin_three.gff3" % (self.outputBase)
+        outputNoFileName = "%sin_nothree.gff3" % (self.outputBase)
+        self.tmpFileNames.extend([outputFileName, outputNoFileName])
+        nbOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["three_prime_UTR"], "gff3", outputFileName)
+        self._compare(inputFileName, "gff3", self.referenceFiles["three_prime_UTR"], "gff3", outputNoFileName, True)
+        self._copy(outputFileName, "three_prime_UTR")
+        if self.verbosity > 0:
+            print "%d overlaps in 3' UTR" % (nbOverlaps)
+        return outputNoFileName
+    
+    def _getNcRna(self, inputFileName):
+        outputFileName   = "%sin_ncRna.gff3" % (self.outputBase)
+        outputNoFileName = "%sin_noNcRna.gff3" % (self.outputBase)
+        self.tmpFileNames.extend([outputFileName, outputNoFileName])
+        nbOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["ncRNA"], "gff3", outputFileName)
+        self._compare(inputFileName, "gff3", self.referenceFiles["ncRNA"], "gff3", outputNoFileName, True)
+        self._copy(outputFileName, "ncRNA")
+        if self.verbosity > 0:
+            print "%d overlaps in ncRNA" % (nbOverlaps)
+        return outputNoFileName
+    
+    def _getTe(self, inputFileName):
+        outputFileName   = "%sin_te.gff3" % (self.outputBase)
+        outputNoFileName = "%sin_noTe.gff3" % (self.outputBase)
+        self.tmpFileNames.extend([outputFileName, outputNoFileName])
+        nbOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["transposable_element_gene"], "gff3", outputFileName)
+        self._compare(inputFileName, "gff3", self.referenceFiles["transposable_element_gene"], "gff3", outputNoFileName, True)
+        self._copy(outputFileName, "TE")
+        if self.verbosity > 0:
+            print "%d overlaps in TE" % (nbOverlaps)
+        return outputNoFileName
+    
+    def _getIntron(self, inputFileName):
+        outputFileName   = "%sin_intron.gff3" % (self.outputBase)
+        outputNoFileName = "%sin_nointron.gff3" % (self.outputBase)
+        self.tmpFileNames.extend([outputFileName, outputNoFileName])
+        nbOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["mRNA"], "gff3", outputFileName)
+        self._compare(inputFileName, "gff3", self.referenceFiles["mRNA"], "gff3", outputNoFileName, True)
+        self._copy(outputFileName, "intron")
+        if self.verbosity > 0:
+            print "%d overlaps in introns" % (nbOverlaps)
+        return outputNoFileName
+    
+    def _getVicinity(self, inputFileName):
+        guds = GetUpDownStream(self.verbosity-1)
+        guds.setInputFile(self.referenceFiles["mRNA"], "gff3")
+        guds.setOutputFile(self.referenceFiles["vic"])
+        guds.setDistances(self.distance, self.distance)
+        guds.run()
+        outputFileName = "%sout_vicinity.gff3" % (self.outputBase)
+        outputNoFileName = "%sout_novicinity.gff3" % (self.outputBase)
+        self.tmpFileNames.extend([outputFileName, outputNoFileName])
+        nbOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["vic"], "gff3", outputFileName)
+        nbNoOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["vic"], "gff3", outputNoFileName, True)
+        self._copy(outputFileName, "vicinity")
+        self._copy(outputNoFileName, "intergenic")
+        if self.verbosity > 0:
+            print "%d overlaps in vicinity" % (nbOverlaps)
+            print "%d elsewhere" % (nbNoOverlaps)
+    
+    def run(self):
+        self._getReferenceFiles()
+        outputFileName = self._getCds()
+        outputFileName = self._getFivePrime(outputFileName)
+        outputFileName = self._getThreePrime(outputFileName)
+        outputFileName = self._getNcRna(outputFileName)
+        outputFileName = self._getTe(outputFileName)
+        outputFileName = self._getIntron(outputFileName)
+        self._getVicinity(outputFileName)
+
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Count Loci v1.0.0: Count input elements with respect to CDS, 5' UTR, 3' UTR, intron, downstream, upstream. [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",            type="string", help="input file              [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",            type="string", help="format of the input     [compulsory] [format: transcript file format]")
+    parser.add_option("-r", "--reference", dest="reference",      action="store",            type="string", help="reference file          [compulsory] [format: file in GFF format]")     
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",            type="string", help="output file             [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-d", "--distance",  dest="distance",       action="store",            type="int",    help="distance up/down stream [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1, type="int",    help="trace level                          [format: int]")
+    (options, args) = parser.parse_args()
+
+    cl = CountLoci(options.verbosity)
+    cl.setInputFile(options.inputFileName, options.format)
+    cl.setDistance(options.distance)
+    cl.setReference(options.reference)
+    cl.setOutputFileName(options.outputFileName)
+    cl.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/CountReadGCPercent.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+from Gnome_tools.CountGCPercentBySlidingWindow import CountGCPercentBySlidingWindow
+
+
+class CountReadGCPercent(object):
+    
+    def __init__(self):
+        self.referenceReader = None
+        self.gffReader = None
+        self.outputWriter = None
+        self.verbose = 0
+        
+    def setInputReferenceFile(self, fileName):
+        self.referenceReader = fileName
+ 
+    def setInputGffFile(self, fileName):
+        self.gffReader = TranscriptContainer(fileName, 'gff3', self.verbose)
+        
+    def setOutputFileName(self, fileName):
+        self.outputWriter = Gff3Writer(fileName, self.verbose)
+
+    def readGffAnnotation(self):
+        self.coveredRegions = {}
+        progress = Progress(self.gffReader.getNbTranscripts(), "Reading gff3 annotation file", self.verbose)
+        for transcript in self.gffReader.getIterator():
+            chromosome = transcript.getChromosome()
+            if chromosome not in self.coveredRegions:
+                self.coveredRegions[chromosome] = {}
+            for exon in transcript.getExons():
+                for position in range(exon.getStart(), exon.getEnd()+1):
+                    self.coveredRegions[chromosome][position] = 1
+            progress.inc()
+        progress.done()
+        
+    def write(self):
+        iParser = FastaParser(self.referenceReader)
+        iParser.setTags()
+        iGetGCPercentBySW = CountGCPercentBySlidingWindow()
+        progress = Progress(self.gffReader.getNbTranscripts(), "Writing output file", self.verbose)
+        for transcript in self.gffReader.getIterator():
+            chromosome = transcript.getChromosome()
+            GCpercent = 0
+            nPercent = 0
+            for exon in transcript.getExons():
+                    for sequenceName in iParser.getTags().keys():
+                        if sequenceName != chromosome:
+                            continue
+                        else:
+                            subSequence = iParser.getSubSequence(sequenceName, exon.getStart() , exon.getEnd(), 1)
+                            GCpercent, nPercent = iGetGCPercentBySW.getGCPercentAccordingToNAndNPercent(subSequence)
+                            print "GCpercent = %f, nPercent = %f" % (GCpercent, nPercent)
+            transcript.setTagValue("GCpercent", GCpercent)
+            transcript.setTagValue("NPercent", nPercent)
+            self.outputWriter.addTranscript(transcript)
+            progress.inc()
+        progress.done()
+ 
+    def run(self):
+        self.readGffAnnotation()
+        if self.outputWriter != None:
+            self.write()
+            
+if __name__ == "__main__":
+        description = "Count GC percent for each read against a genome."
+        usage = "CountReadGCPercent.py -i <fasta file> -j <gff3 file> -o <output gff3 file> -v <verbose> -h]"
+        examples = "\nExample: \n"
+        examples += "\t$ python CountReadGCPercent.py -i file.fasta -j annotation.gff -o output.gff3"
+        examples += "\n\n"
+        parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples)
+        parser.add_option( '-i', '--inputGenome', dest='fastaFile', help='fasta file [compulsory]', default= None )
+        parser.add_option( '-j', '--inputAnnotation', dest='gffFile', help='gff3 file [compulsory]', default= None)
+        parser.add_option( '-o', '--output', dest='outputFile', help='output gff3 file [compulsory]', default= None )
+        parser.add_option( '-v', '--verbose', dest='verbose', help='verbosity level (default=0/1)',type="int", default= 0 )
+        (options, args) = parser.parse_args()
+    
+        readGCPercent = CountReadGCPercent()
+        readGCPercent.setInputReferenceFile(options.fastaFile)
+        readGCPercent.setInputGffFile(options.gffFile)
+        readGCPercent.setOutputFileName(options.outputFile)
+        readGCPercent.run()
+        
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/FindOverlapsOptim.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,343 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2012
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+
+import os, struct, time, shutil
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.ncList.NCList import NCList
+from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList
+from SMART.Java.Python.ncList.NCListParser import NCListParser
+from SMART.Java.Python.ncList.NCListCursor import NCListCursor
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle
+from SMART.Java.Python.ncList.NCListHandler import NCListHandler
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+try:
+   import cPickle as pickle
+except:
+   import pickle
+
+REFERENCE = 0
+QUERY = 1
+TYPES = (REFERENCE, QUERY)
+TYPETOSTRING = {0: "reference", 1: "query"}
+
+class FindOverlapsOptim(object):
+	
+	def __init__(self, verbosity = 1):
+		self._parsers				  = {}
+		self._sortedFileNames		  = {}
+		self._outputFileName		  = "outputOverlaps.gff3"
+		self._iWriter				  = None
+		self._inputFileNames		  = {REFERENCE: None,  QUERY: None}
+		self._convertedFileNames      = {REFERENCE: False, QUERY: False}
+		self._inputFileFormats		  = {REFERENCE: None,  QUERY: None}
+		self._converted			      = {REFERENCE: False, QUERY: False}
+		self._ncListHandlers          = {REFERENCE: None,  QUERY: None}
+		self._splittedFileNames	      = {REFERENCE: {},	QUERY: {}}
+		self._nbOverlappingQueries	  = 0
+		self._nbOverlaps			  = 0
+		self._nbLines				  = {REFERENCE: 0, QUERY: 0}
+		self._sorted                  = False
+		self._index                   = False
+		self._verbosity			      = verbosity
+		self._ncLists				  = {}
+		self._cursors				  = {}
+		self._nbElementsPerChromosome = {}
+		self._tmpDirectories		  = {REFERENCE: False, QUERY: False}
+		
+	def close(self):
+		self._iWriter.close()
+		for fileName in (self._sortedFileNames.values()):
+			if os.path.exists(fileName):
+				os.remove(fileName)
+		for fileName in self._convertedFileNames.values():
+			if fileName:
+				os.remove(fileName)
+		
+	def setRefFileName(self, fileName, format):
+		self.setFileName(fileName, format, REFERENCE)
+		
+	def setQueryFileName(self, fileName, format):
+		self.setFileName(fileName, format, QUERY)
+
+	def setFileName(self, fileName, format, type):
+		self._inputFileNames[type] = fileName
+		self._inputFileFormats[type] = format
+		if format.lower() != "nclist":
+			self._converted[type] = True
+		
+	def setOutputFileName(self, outputFileName):
+		self._outputFileName = outputFileName
+		self._iWriter = Gff3Writer(self._outputFileName)
+	
+	def setSorted(self, sorted):
+		self._sorted = sorted
+
+	def setIndex(self, index):
+		self._index = index
+
+	def createNCLists(self):
+		startTime = time.time()
+		if self._verbosity > 1:
+			print "Building database"
+		self._ncLists = dict([type, {}] for type in TYPES)
+		self._indices = dict([type, {}] for type in TYPES)
+		self._cursors = dict([type, {}] for type in TYPES)
+		for type in TYPES:
+			self._ncListHandlers[type] = NCListHandler(self._verbosity-3)
+			if self._converted[type]:
+				self._convertedFileNames[type] = "%s_%d.ncl" % (os.path.splitext(self._inputFileNames[type])[0], type)
+				ncLists = ConvertToNCList(self._verbosity-3)
+				ncLists.setInputFileName(self._inputFileNames[type], self._inputFileFormats[type])
+				ncLists.setSorted(self._sorted)
+				ncLists.setOutputFileName(self._convertedFileNames[type])
+				if type == REFERENCE and self._index:
+					ncLists.setIndex(True)
+				ncLists.run()
+				self._ncListHandlers[type].setFileName(self._convertedFileNames[type])
+			else:
+				self._ncListHandlers[type].setFileName(self._inputFileNames[type])
+			self._ncListHandlers[type].loadData()
+			self._nbLines[type]				    = self._ncListHandlers[type].getNbElements()
+			self._nbElementsPerChromosome[type] = self._ncListHandlers[type].getNbElementsPerChromosome()
+			self._ncLists[type]				    = self._ncListHandlers[type].getNCLists()
+			for chromosome, ncList in self._ncLists[type].iteritems():
+				self._cursors[type][chromosome] = NCListCursor(None, ncList, 0, self._verbosity)
+				if type == REFERENCE and self._index:
+					self._indices[REFERENCE][chromosome] = ncList.getIndex()
+		endTime = time.time()
+		if self._verbosity > 1:
+			print "done (%.2gs)" % (endTime - startTime)
+
+	def compare(self):
+		nbSkips, nbMoves   = 0, 0
+		previousChromosome = None
+		done			   = False
+		startTime		   = time.time()
+		progress		   = Progress(len(self._ncLists[QUERY].keys()), "Checking overlap", self._verbosity)
+		#print "query:", self._ncLists[QUERY].keys()
+		#print "reference:", self._ncLists[REFERENCE].keys()
+		for chromosome, queryNCList in self._ncLists[QUERY].iteritems():
+			queryParser = self._ncListHandlers[QUERY].getParser(chromosome)
+			queryCursor = self._cursors[QUERY][chromosome]
+			if chromosome != previousChromosome:
+				skipChromosome	  = False
+				previousChromosome  = chromosome
+				if chromosome not in self._ncLists[REFERENCE]:
+					#print "out ", chromosome
+					continue
+				refNCList = self._ncLists[REFERENCE][chromosome]
+				refCursor = self._cursors[REFERENCE][chromosome]
+			#print "starting", chromosome
+			while True:
+				queryTranscript = queryCursor.getTranscript()
+				newRefLaddr = self.checkIndex(queryTranscript, refCursor)
+				#print "query is", queryTranscript
+				if newRefLaddr != None:
+					nbMoves += 1
+					refCursor.setLIndex(newRefLaddr)
+					#print "skipping to", refCursor
+					done = False
+				refCursor, done, unmatched = self.findOverlapIter(queryTranscript, refCursor, done)
+				#print "completed with", refCursor, done, unmatched
+				if refCursor.isOut():
+					#print "exiting 1", chromosome
+					break
+				if unmatched or not queryCursor.hasChildren():
+					queryCursor.moveNext()
+					#print "moving next to", queryCursor
+					nbSkips += 1
+				else:
+					queryCursor.moveDown()
+					#print "moving down to", queryCursor
+				if queryCursor.isOut():
+					#print "exiting 2", chromosome
+					break
+			progress.inc()
+		progress.done()
+		endTime = time.time()
+		self._timeSpent = endTime - startTime
+		if self._verbosity >= 10:
+			print "# skips:   %d" % (nbSkips)
+			print "# moves:   %d" % (nbMoves)
+
+	def findOverlapIter(self, queryTranscript, cursor, done):
+		chromosome = queryTranscript.getChromosome()
+		if chromosome not in self._ncLists[REFERENCE]:
+			return False, None
+		ncList = self._ncLists[REFERENCE][chromosome]
+		overlappingNames = {}
+		nextDone = False
+		firstOverlapLAddr = NCListCursor(cursor)
+		firstOverlapLAddr.setLIndex(-1)
+		if cursor.isOut():
+			return firstOverlapLAddr, False
+		parentCursor = NCListCursor(cursor)
+		parentCursor.moveUp()
+		firstParentAfter = False
+		#print "query transcript 1", queryTranscript
+		#print "cursor 1", cursor
+		#print "parent 1", parentCursor
+		while not parentCursor.isOut(): 
+			if self.isOverlapping(queryTranscript, parentCursor) == 0:
+				#print "overlap parent choice 0"
+				overlappingNames.update(self._extractID(parentCursor.getTranscript()))
+				if firstOverlapLAddr.isOut():
+					#print "overlap parent 2"
+					firstOverlapLAddr.copy(parentCursor)
+					nextDone = True # new
+			elif self.isOverlapping(queryTranscript, parentCursor) == 1:
+				#print "overlap parent choice 1"
+				firstParentAfter = NCListCursor(parentCursor)
+			parentCursor.moveUp()
+			#print "parent 2", parentCursor
+		if firstParentAfter:
+			#print "exit parent", firstParentAfter, overlappingNames
+			self._writeIntervalInNewGFF3(queryTranscript, overlappingNames)
+			return firstParentAfter, False, not overlappingNames
+		#This loop finds the overlaps with currentRefLAddr.#
+		while True:
+			#print "ref cursor now is", cursor
+			parentCursor = NCListCursor(cursor)
+			parentCursor.moveUp()
+			#In case: Query is on the right of the RefInterval and does not overlap.
+			overlap = self.isOverlapping(queryTranscript, cursor)
+			if overlap == -1:
+				cursor.moveNext()
+			#In case: Query overlaps with RefInterval.	
+			elif overlap == 0:
+				#print "choice 2"
+				overlappingNames.update(self._extractID(cursor.getTranscript()))
+				if firstOverlapLAddr.compare(parentCursor):
+					firstOverlapLAddr.copy(cursor)
+					nextDone = True # new
+				if done:
+					cursor.moveNext()
+				else:
+					if not cursor.hasChildren():
+						cursor.moveNext()
+						if cursor.isOut():
+							#print "break 1"
+							break
+					else:
+						cursor.moveDown()
+			#In case: Query is on the left of the RefInterval and does not overlap.		
+			else:
+				#print "choice 3"
+				if firstOverlapLAddr.isOut() or firstOverlapLAddr.compare(parentCursor):
+					#print "changing nfo 2"
+					firstOverlapLAddr.copy(cursor)
+					nextDone = False # new
+				#print "break 2"
+				break
+			
+			done = False
+			if cursor.isOut():
+				#print "break 3"
+				break
+		self._writeIntervalInNewGFF3(queryTranscript, overlappingNames)
+		return firstOverlapLAddr, nextDone, not overlappingNames
+	
+	def isOverlapping(self, queryTranscript, refTranscript):
+		if (queryTranscript.getStart() <= refTranscript.getEnd() and queryTranscript.getEnd() >= refTranscript.getStart()):
+			return 0   
+		if queryTranscript.getEnd() < refTranscript.getStart():
+			return 1
+		return -1
+
+	def checkIndex(self, transcript, cursor):
+		if not self._index:
+			return None
+		chromosome = transcript.getChromosome()
+		nextLIndex = self._indices[REFERENCE][chromosome].getIndex(transcript)
+		if nextLIndex == None:
+			return None
+		ncList		 = self._ncLists[REFERENCE][chromosome]
+		nextGffAddress = ncList.getRefGffAddr(nextLIndex)
+		thisGffAddress = cursor.getGffAddress()
+		if nextGffAddress > thisGffAddress:
+			return nextLIndex
+		return None
+		
+	def _writeIntervalInNewGFF3(self, transcript, names):
+		nbOverlaps = 0
+		for cpt in names.values():
+			nbOverlaps += cpt
+		if not names:
+			return
+		transcript.setTagValue("overlapsWith", "--".join(sorted(names.keys())))
+		transcript.setTagValue("nbOverlaps", nbOverlaps)
+		self._iWriter.addTranscript(transcript)
+		self._iWriter.write()
+		self._nbOverlappingQueries += 1
+		self._nbOverlaps		   += nbOverlaps
+		
+	def _extractID(self, transcript):
+		nbElements = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1
+		id		   = transcript.getTagValue("ID")				 if "ID"		 in transcript.getTagNames() else transcript.getUniqueName()
+		return {id: nbElements}
+		
+	def run(self):
+		self.createNCLists()
+		self.compare()
+		self.close()
+		if self._verbosity > 0:
+			print "# queries: %d" % (self._nbLines[QUERY])
+			print "# refs:    %d" % (self._nbLines[REFERENCE])
+			print "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps)
+			print "time:      %.2gs" % (self._timeSpent)
+
+
+if __name__ == "__main__":
+	description = "Find Overlaps Optim v1.0.0: Finds overlaps with several query intervals. [Category: Data Comparison]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--query",	     dest="inputQueryFileName", action="store",			            type="string", help="query input file [compulsory] [format: file in transcript or other format given by -f]")
+	parser.add_option("-f", "--queryFormat", dest="queryFormat",		action="store",			            type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")
+	parser.add_option("-j", "--ref",		 dest="inputRefFileName",   action="store",			            type="string", help="reference input file [compulsory] [format: file in transcript or other format given by -g]")
+	parser.add_option("-g", "--refFormat",   dest="refFormat",		    action="store",			            type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")
+	parser.add_option("-o", "--output",	     dest="outputFileName",	    action="store",			            type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+	parser.add_option("-d", "--index",	     dest="index",	            action="store_true", default=False,	               help="add an index to the reference file (faster but more memory) [format: boolean] [default: False]")
+	parser.add_option("-s", "--sorted",	     dest="sorted",	            action="store_true", default=False,	               help="input files are already sorted [format: boolean] [default: False]")
+	parser.add_option("-v", "--verbosity",   dest="verbosity",		    action="store",      default=1,     type="int",	   help="Trace level [format: int] [default: 1]")
+	(options, args) = parser.parse_args()
+	
+	iFOO = FindOverlapsOptim(options.verbosity)
+	iFOO.setRefFileName(options.inputRefFileName, options.refFormat)
+	iFOO.setQueryFileName(options.inputQueryFileName, options.queryFormat)
+	iFOO.setOutputFileName(options.outputFileName)
+	iFOO.setIndex(options.index)
+	iFOO.setSorted(options.sorted)
+	iFOO.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetDifferentialExpression.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,441 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Get the differential expression between 2 conditions (2 files), on regions defined by a third file"""
+
+import os, re
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc import Utils
+from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection
+from SMART.Java.Python.structure.Transcript import Transcript
+
+class GetDifferentialExpression(object):
+    
+    def __init__(self, verbosity = 1):
+        self.verbosity              = verbosity
+        self.mySqlConnection        = MySqlConnection(verbosity)
+        self.inputs                 = (0, 1)
+        self.transcriptContainers   = [None, None]
+        self.transcriptContainerRef = None
+        self.outputFileName         = None
+        self.writer                 = None
+        self.tables                 = [None, None]
+        self.nbElements             = [0, 0]
+
+        self.regionsToValues = {}
+        self.regionsToNames  = {}
+        self.valuesToPvalues = {}
+
+        self.oriented                      = True
+        self.simpleNormalization           = False
+        self.simpleNormalizationParameters = None
+        self.adjustedNormalization         = False
+        self.fixedSizeFactor               = None
+        self.normalizationSize             = None
+        self.normalizationFactors          = [1, 1]
+        self.fdr                           = None 
+        self.fdrPvalue                     = None 
+
+        self.plot    = False
+        self.plotter = None
+        self.plotterName = None
+        self.points  = {}
+
+
+    def setInputFile(self, i, fileName, fileFormat):
+        self.transcriptContainers[i] = TranscriptContainer(fileName, fileFormat, self.verbosity)
+        self.transcriptContainers[i].mySqlConnection = self.mySqlConnection
+
+
+    def setReferenceFile(self, fileName, fileFormat):
+        self.transcriptContainerRef = TranscriptContainer(fileName, fileFormat, self.verbosity)
+        self.transcriptContainerRef.mySqlConnection = self.mySqlConnection
+
+
+    def setOutputFile(self, fileName):
+        self.outputFileName = fileName
+        self.writer         = Gff3Writer(fileName, self.verbosity)
+
+    
+    def setOriented(self, boolean):
+        self.oriented = boolean
+
+
+    def setSimpleNormalization(self, boolean):
+        self.simpleNormalization = boolean
+
+
+    def setSimpleNormalizationParameters(self, parameters):
+        if parameters != None:
+            self.simpleNormalization = True
+            self.simpleNormalizationParameters = [0, 0]
+            for i, splittedParameter in enumerate(parameters.split(",")):
+                self.simpleNormalizationParameters[i] = int(splittedParameter)
+
+
+    def setAdjustedNormalization(self, boolean):
+        self.adjustedNormalization = boolean
+
+
+    def setFixedSizeNormalization(self, value):
+        self.fixedSizeFactor = value
+
+
+    def setFdr(self, fdr):
+        self.fdr = fdr
+
+
+    def setPlot(self, boolean):
+        self.plot = boolean
+
+
+    def setPlotterName(self, plotterName):
+        self.plotterName = plotterName
+
+    def setPlotter(self):
+        self.plot    = True
+        self.plotter = RPlotter(self.plotterName, self.verbosity)
+        self.plotter.setPoints(True)
+        self.plotter.setLog("xy")
+        self.points = {}
+
+
+    def readInput(self, i):
+        self.transcriptContainers[i].storeIntoDatabase()
+        self.tables[i] = self.transcriptContainers[i].getTables()
+        progress       = Progress(len(self.tables[i].keys()), "Adding indices", self.verbosity)
+        for chromosome in self.tables[i]:
+            if self.oriented:
+                self.tables[i][chromosome].createIndex("iStartEndDir_%s_%d" % (chromosome, i), ("start", "end", "direction"))
+            else:
+                self.tables[i][chromosome].createIndex("iStartEnd_%s_%d" % (chromosome, i), ("start", "end"))
+            progress.inc()
+        progress.done()
+    
+        progress = Progress(self.transcriptContainers[i].getNbTranscripts(), "Reading sample %d" % (i +1), self.verbosity)
+        for chromosome in self.tables[i]:
+            for transcript in self.tables[i][chromosome].getIterator():
+                self.nbElements[i] += 1 if "nbElements" not in transcript.getTagNames() else transcript.getTagValue("nbElements")
+                progress.inc()
+        progress.done()
+        if self.verbosity > 0:
+            print "%d elements in sample %d" % (self.nbElements[i], i+1)
+
+
+    def computeSimpleNormalizationFactors(self):
+        nbElements = self.nbElements
+        if self.simpleNormalizationParameters != None:
+            print "Using provided normalization parameters: %s" % (", ".join([str(parameter) for parameter in self.simpleNormalizationParameters]))
+            nbElements = self.simpleNormalizationParameters
+        avgNbElements = int(float(sum(nbElements)) / len(nbElements))
+        for i in self.inputs:
+            self.normalizationFactors[i] = float(avgNbElements) / nbElements[i]
+            self.nbElements[i]          *= self.normalizationFactors[i]
+        if self.verbosity > 1:
+            print "Normalizing to average # reads: %d" % (avgNbElements)
+            if self.simpleNormalizationParameters != None:
+                print "# reads: %s" % (", ".join([str(nbElement) for nbElement in self.nbElements]))
+
+    def __del__(self):
+        self.mySqlConnection.deleteDatabase()
+
+    def regionToString(self, transcript):
+        return "%s:%d-%d(%s)" % (transcript.getChromosome(), transcript.getStart(), transcript.getEnd(), "+" if transcript.getDirection() == 1 else "-")
+
+    def stringToRegion(self, region):
+        m = re.search(r"^(\S+):(\d+)-(\d+)\((\S)\)$", region)
+        if m == None:
+            raise Exception("Internal format error: cannot parse region '%s'" % (region))
+        transcript = Transcript()
+        transcript.setChromosome(m.group(1))
+        transcript.setStart(int(m.group(2)))
+        transcript.setEnd(int(m.group(3)))
+        transcript.setDirection(m.group(4))
+        return transcript
+
+    def computeMinimumSize(self):
+        self.normalizationSize = 1000000000
+        progress = Progress(self.transcriptContainerRef.getNbTranscripts(), "Getting minimum reference size", self.verbosity)
+        for transcriptRef in self.transcriptContainerRef.getIterator():
+            self.normalizationSize = min(self.normalizationSize, transcriptRef.getEnd() - transcriptRef.getStart())
+            progress.inc()
+        progress.done()
+        if self.verbosity > 1:
+            print "Minimum reference size: %d" % (self.normalizationSize+1)
+
+    def useFixedSizeNormalization(self, start, end, starts):
+        currentNb = 0
+        sum       = 0
+        if not starts:
+            return 0
+        for i in range(start - self.normalizationSize, end + 1 + self.normalizationSize):
+            if i not in starts:
+                starts[i] = 0
+        for i, s in starts.iteritems():
+            if i < start:
+                starts[start] += s
+                starts[i]      = 0
+        for i in range(start - self.normalizationSize, end + 1):
+            currentNb += starts[i+self.normalizationSize] - starts[i]
+            sum       += currentNb
+        return (float(sum) / self.normalizationSize) * (self.fixedSizeFactor / (end - start + 1))
+
+    def retrieveCounts(self, transcriptRef, i):
+        if transcriptRef.getChromosome() not in self.tables[i]:
+            return (0, 0)
+        cumulatedCount           = 0
+        cumulatedNormalizedCount = 0
+        for exon in transcriptRef.getExons():
+            count   = 0
+            starts  = {}
+            command = "SELECT start, tags FROM '%s' WHERE start >= %d AND end <= %d" % (self.tables[i][exon.getChromosome()].getName(), exon.getStart(), exon.getEnd())
+            if self.oriented:
+                command += " AND direction = %d" % (exon.getDirection())
+            query = self.mySqlConnection.executeQuery(command)
+            for line in query.getIterator():
+                nb   = 1
+                tags = line[1].split(";")
+                for tag in tags:
+                    key, value = tag.split("=")
+                    if key == "nbElements":
+                        nb = int(float(value))
+                count += nb
+                starts[int(line[0])] = nb
+            normalizedCount = count if self.fixedSizeFactor == None else self.useFixedSizeNormalization(exon.getStart(), exon.getEnd(), starts)
+            cumulatedCount           += count
+            cumulatedNormalizedCount += normalizedCount
+        return (cumulatedCount, cumulatedNormalizedCount)
+
+    def getAllCounts(self):
+        progress = Progress(self.transcriptContainerRef.getNbTranscripts(), "Getting counts", self.verbosity)
+        for cpt, transcriptRef in enumerate(self.transcriptContainerRef.getIterator()):
+            if "ID" in transcriptRef.getTagNames():
+                self.regionsToNames[self.regionToString(transcriptRef)] = transcriptRef.getTagValue("ID")
+            elif transcriptRef.getName() != None:
+                self.regionsToNames[self.regionToString(transcriptRef)] = transcriptRef.getName()
+            else:
+                self.regionsToNames[self.regionToString(transcriptRef)] = "region_%d" % (cpt)
+            values           = [None, None]
+            normalizedValues = [None, None]
+            for i in self.inputs:
+                values[i], normalizedValues[i] = self.retrieveCounts(transcriptRef, i)
+                normalizedValues[i]            = int(self.normalizationFactors[i] * normalizedValues[i])
+            if sum(values) != 0:
+                self.regionsToValues[self.regionToString(transcriptRef)] = (normalizedValues[0], normalizedValues[1], values[0], values[1])
+            progress.inc()
+        progress.done()
+
+    def computeAdjustedNormalizationFactors(self):
+        nbElements = len(self.regionsToValues.keys())
+        avgValues  = []
+        progress   = Progress(nbElements, "Normalization step 1", self.verbosity)
+        for values in self.regionsToValues.values():
+            correctedValues = [values[i] * self.normalizationFactors[i] for i in self.inputs]
+            avgValues.append(float(sum(correctedValues)) / len(correctedValues))
+            progress.inc()
+        progress.done()
+
+        sortedAvgValues = sorted(avgValues)
+        minAvgValues    = sortedAvgValues[nbElements / 4]
+        maxAvgValues    = sortedAvgValues[nbElements * 3 / 4]
+        sums            = [0, 0]
+        progress        = Progress(nbElements, "Normalization step 2", self.verbosity)
+        for values in self.regionsToValues.values():
+            correctedValues = [values[i] * self.normalizationFactors[i] for i in self.inputs]
+            avgValue        = float(sum(correctedValues)) / len(correctedValues)
+            if minAvgValues <= avgValue and avgValue <= maxAvgValues:
+                for i in self.inputs:
+                    sums[i] += values[i]
+            progress.inc()
+        progress.done()
+
+        avgSums = float(sum(sums)) / len(sums)
+        for i in self.inputs:
+            if self.verbosity > 1:
+                print "Normalizing sample %d: %s to" % ((i+1), self.nbElements[i]),
+            self.normalizationFactors[i] *= float(avgSums) / sums[i]
+            self.nbElements[i]           *= self.normalizationFactors[i]
+            if self.verbosity > 1:
+                print "%s" % (int(self.nbElements[i]))
+                
+    def getMinimumReferenceSize(self):
+        self.normalizationSize = 1000000000
+        progress               = Progress(self.transcriptContainerRef.getNbTranscripts(), "Reference element sizes", self.verbosity)
+        for transcriptRef in self.transcriptContainerRef.getIterator():
+            self.normalizationSize = min(self.normalizationSize, transcriptRef.getEnd() - transcriptRef.getStart() + 1)
+            progress.inc()
+        progress.done()
+        if self.verbosity > 1:
+            print "Minimum reference size: %d" % (self.normalizationSize)
+
+    def computePvalues(self):
+        normalizedValues = set()
+        progress         = Progress(len(self.regionsToValues.keys()), "Normalizing counts", self.verbosity)
+        for region in self.regionsToValues:
+            values                       = self.regionsToValues[region]
+            normalizedValues0            = int(round(values[0] * self.normalizationFactors[0]))
+            normalizedValues1            = int(round(values[1] * self.normalizationFactors[1]))
+            self.regionsToValues[region] = (normalizedValues0, normalizedValues1, self.regionsToValues[region][2], self.regionsToValues[region][3])
+            normalizedValues.add((normalizedValues0, normalizedValues1, self.nbElements[0] - normalizedValues0, self.nbElements[1] - normalizedValues1, self.regionsToValues[region][2], self.regionsToValues[region][3]))
+            progress.inc()
+        progress.done()
+
+        if self.verbosity > 1:
+            print "Computing p-values..."
+        self.valuesToPvalues = Utils.fisherExactPValueBulk(list(normalizedValues))
+        if self.verbosity > 1:
+            print "... done"
+
+    def setTagValues(self, transcript, values, pValue):
+        for tag in transcript.getTagNames():
+            transcript.deleteTag(tag)
+        transcript.removeExons()
+        transcript.setTagValue("pValue", str(pValue))
+        transcript.setTagValue("nbReadsCond1", str(values[0]))
+        transcript.setTagValue("nbReadsCond2", str(values[1]))
+        transcript.setTagValue("nbUnnormalizedReadsCond1", str(values[2]))
+        transcript.setTagValue("nbUnnormalizedReadsCond2", str(values[3]))
+        if (values[0] == values[1]) or (self.fdr != None and pValue > self.fdrPvalue):
+            transcript.setTagValue("regulation", "equal")
+        elif values[0] < values[1]:
+            transcript.setTagValue("regulation", "up")
+        else:
+            transcript.setTagValue("regulation", "down")
+        return transcript
+
+    def computeFdr(self):
+        pValues   = []
+        nbRegions = len(self.regionsToValues.keys())
+        progress  = Progress(nbRegions, "Computing FDR", self.verbosity)
+        for values in self.regionsToValues.values():
+            pValues.append(self.valuesToPvalues[values[0:2]])
+            progress.inc()
+        progress.done()
+        
+        for i, pValue in enumerate(reversed(sorted(pValues))):
+            if pValue <= self.fdr * (nbRegions - 1 - i) / nbRegions:
+                self.fdrPvalue = pValue
+                if self.verbosity > 1:
+                    print "FDR: %f, k: %i, m: %d" % (pValue, nbRegions - 1 - i, nbRegions)
+                return
+
+    def writeDifferentialExpression(self):
+        if self.plot:
+            self.setPlotter()
+
+        cpt = 1
+        progress = Progress(len(self.regionsToValues.keys()), "Writing output", self.verbosity)
+        for region, values in self.regionsToValues.iteritems():
+            transcript = self.stringToRegion(region)
+            pValue     = self.valuesToPvalues[values[0:2]]
+            transcript.setName(self.regionsToNames[region])
+            transcript = self.setTagValues(transcript, values, pValue)
+            self.writer.addTranscript(transcript)
+            cpt += 1
+
+            if self.plot:
+                self.points[region] = (values[0], values[1])
+        progress.done()
+        self.writer.write()
+        self.writer.close()
+
+        if self.plot:
+            self.plotter.addLine(self.points)
+            self.plotter.plot()
+
+    def getDifferentialExpression(self):
+        for i in self.inputs:
+            self.readInput(i)
+
+        if self.simpleNormalization:
+            self.computeSimpleNormalizationFactors()
+        if self.fixedSizeFactor != None:
+            self.computeMinimumSize()
+
+        self.getAllCounts()
+
+        if self.adjustedNormalization:
+            self.computeAdjustedNormalizationFactors()
+
+        self.computePvalues()
+
+        if self.fdr != None:
+            self.computeFdr()
+            
+        self.writeDifferentialExpression()
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Get Differential Expression v1.0.1: Get the differential expression between 2 conditions using Fisher's exact test, on regions defined by a third file. [Category: Data Comparison]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input1",           dest="inputFileName1",    action="store",                     type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format1",          dest="format1",           action="store",                     type="string", help="format of file 1 [compulsory] [format: transcript file format]")
+    parser.add_option("-j", "--input2",           dest="inputFileName2",    action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
+    parser.add_option("-g", "--format2",          dest="format2",           action="store",                     type="string", help="format of file 2 [compulsory] [format: transcript file format]")
+    parser.add_option("-k", "--reference",        dest="referenceFileName", action="store",                     type="string", help="reference file [compulsory] [format: file in transcript format given by -l]")
+    parser.add_option("-l", "--referenceFormat",  dest="referenceFormat",   action="store",                     type="string", help="format of reference file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",           dest="outputFileName",    action="store",                     type="string", help="output file [format: output file in gff3 format]")
+    parser.add_option("-n", "--notOriented",      dest="notOriented",       action="store_true", default=False,                help="if the reads are not oriented [default: False] [format: bool]")
+    parser.add_option("-s", "--simple",           dest="simple",            action="store_true", default=False,                help="normalize using the number of reads in each condition [format: bool]")
+    parser.add_option("-S", "--simpleParameters", dest="simpleParameters",  action="store",      default=None,  type="string", help="provide the number of reads [format: bool]")
+    parser.add_option("-a", "--adjusted",         dest="adjusted",          action="store_true", default=False,                help="normalize using the number of reads of 'mean' regions [format: bool]")
+    parser.add_option("-x", "--fixedSizeFactor",  dest="fixedSizeFactor",   action="store",      default=None,  type="int",    help="give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization) [format: int]")
+    parser.add_option("-d", "--fdr",              dest="fdr",               action="store",      default=None,  type="float",  help="use FDR [format: float]")
+    parser.add_option("-p", "--plot",             dest="plotName",          action="store",      default=None,  type="string", help="plot cloud plot [format: output file in PNG format]")
+    parser.add_option("-v", "--verbosity",        dest="verbosity",         action="store",      default=1,     type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+
+        
+    differentialExpression = GetDifferentialExpression(options.verbosity)
+    differentialExpression.setInputFile(0, options.inputFileName1, options.format1)
+    differentialExpression.setInputFile(1, options.inputFileName2, options.format2)
+    differentialExpression.setReferenceFile(options.referenceFileName, options.referenceFormat)
+    differentialExpression.setOutputFile(options.outputFileName)
+    if options.plotName != None :
+        differentialExpression.setPlotterName(options.plotName)
+        differentialExpression.setPlotter()
+    differentialExpression.setOriented(not options.notOriented)
+    differentialExpression.setSimpleNormalization(options.simple)
+    differentialExpression.setSimpleNormalizationParameters(options.simpleParameters)
+    differentialExpression.setAdjustedNormalization(options.adjusted)
+    differentialExpression.setFixedSizeNormalization(options.fixedSizeFactor)
+    differentialExpression.setFdr(options.fdr)
+    differentialExpression.getDifferentialExpression()
+    differentialExpression.mySqlConnection.deleteDatabase()
+    
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetDistribution.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,362 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2012
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.parsing.FastaParser import FastaParser
+from SMART.Java.Python.structure.Transcript import Transcript
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc.MultipleRPlotter import MultipleRPlotter
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+from SMART.Java.Python.misc.Progress import Progress
+
+TWOSTRANDS = {True: [1, -1], False: [0]}
+STRANDTOSTR = {1: "(+)", -1: "(-)", 0: ""}
+
+class GetDistribution(object):
+
+	def __init__(self, verbosity):
+		self.verbosity     = verbosity
+		self.sizes         = None
+		self.twoStrands    = False
+		self.start         = 1
+		self.names         = ["nbElements"]
+		self.average       = False
+		self.nbValues      = {}
+		self.height        = 300
+		self.width         = 600
+		self.colors        = None
+		self.gffFileName   = None
+		self.csvFileName   = None
+		self.yMin          = None
+		self.yMax          = None
+		self.chromosome    = None
+		self.merge         = False
+		self.nbTranscripts = None
+
+	def setInputFile(self, fileName, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		self.parser = chooser.getParser(fileName)
+
+	def setReferenceFile(self, fileName):
+		if fileName == None:
+			return
+		fastaParser = FastaParser(fileName, self.verbosity)
+		self.chromosomes = fastaParser.getRegions()
+		self.sizes       = dict([region, fastaParser.getSizeOfRegion(region)] for region in self.chromosomes)
+		self.maxSize     = max(self.sizes.values())
+
+	def setRegion(self, chromosome, start, end):
+		if chromosome == None:
+			return
+		self.maxSize     = options.end
+		self.sizes       = {chromosome: end}
+		self.chromosomes = [chromosome]
+		self.chromosome  = chromosome
+		self.start       = start
+		self.end         = end
+
+	def setOutputFile(self, fileName):
+		self.outputFileName = fileName
+
+	def setNbBins(self, nbBins):
+		self.nbBins = nbBins
+
+	def set2Strands(self, twoStrands):
+		self.twoStrands = twoStrands
+
+	def setNames(self, names):
+		self.names = names
+
+	def setAverage(self, average):
+		self.average = average
+
+	def setNormalization(self, normalization):
+		self.normalization = normalization
+	
+	def setImageSize(self, height, width):
+		self.height = height
+		self.width  = width
+
+	def setYLimits(self, yMin, yMax):
+		self.yMin = yMin
+		self.yMax = yMax
+
+	def setColors(self, colors):
+		self.colors = colors
+
+	def writeGff(self, fileName):
+		self.gffFileName = fileName
+
+	def writeCsv(self, fileName):
+		self.csvFileName = fileName
+
+	def mergePlots(self, merge):
+		self.merge = merge
+
+	def _estimateSizes(self):
+		progress = UnlimitedProgress(10000, "Reading input for chromosome size estimate", self.verbosity)
+		self.sizes = {}
+		for self.nbTranscripts, transcript in enumerate(self.parser.getIterator()):
+			chromosome = transcript.getChromosome()
+			start      = transcript.getStart()
+			self.sizes[chromosome] = max(start, self.sizes.get(chromosome, 0))
+			progress.inc()
+		progress.done()
+
+	def _computeSliceSize(self):
+		if self.nbBins == 0:
+			return
+		tmp1           = int(max(self.sizes.values()) / float(self.nbBins))
+		tmp2           = 10 ** (len("%d" % (tmp1))-2)
+		self.sliceSize = max(1, int((tmp1 / tmp2) * tmp2))
+		if self.verbosity > 0:
+			print "choosing bin size of %d" % (self.sliceSize)
+
+	def _initBins(self):
+		self.bins = {}
+		for chromosome in self.sizes:
+			self.bins[chromosome] = {}
+			for name in self.names:
+				self.bins[chromosome][name] = {}
+				for strand in TWOSTRANDS[self.twoStrands]:
+					if self.nbBins == 0:
+						self.bins[chromosome][name][strand] = {}
+					else:
+						self.bins[chromosome][name][strand] = dict([(i * self.sliceSize + 1, 0.0) for i in range(self.start / self.sliceSize, self.sizes[chromosome] / self.sliceSize + 1)])
+
+	def _populateBins(self):
+		if self.nbTranscripts == None:
+			progress = UnlimitedProgress(10000, "Counting data", self.verbosity)
+		else:
+			progress = Progress(self.nbTranscripts, "Counting data", self.verbosity)
+		for transcript in self.parser.getIterator():
+			if transcript.__class__.__name__ == "Mapping":
+				transcript = transcript.getTranscript()
+			progress.inc()
+			chromosome = transcript.getChromosome()
+			start      = transcript.getStart()
+			if self.chromosome and (chromosome != self.chromosome or start < self.start or start > self.end):
+				continue
+			strand = transcript.getDirection() if self.twoStrands else 0
+			if self.nbBins != 0:
+				bin = (start / self.sliceSize) * self.sliceSize + 1
+			else:
+				bin = start
+			for name in self.names:
+				value = float(transcript.tags.get(name, 1))
+				self.bins[chromosome][name][strand][bin] = self.bins[chromosome][name][strand].get(bin, 0) + value
+				self.nbValues[name] = self.nbValues.get(name, 0) + value
+		progress.done()
+
+	def _normalize(self):
+		average = float(sum(self.nbValues)) / len(self.nbValues.keys())
+		factors = dict([name, float(average) / self.nbValues[name]] for name in self.nbValues)
+		for chromosome in self.bins:
+			for name in self.bins[chromosome]:
+				for strand in self.bins[chromosome][name]:
+					for bin in self.bins[chromosome][name][strand]:
+						self.bins[chromosome][name][strand][bin] *= factors[name]
+
+	def _computeAverage(self):
+		for chromosome in self.bins:
+			for name in self.bins[chromosome]:
+				for strand in self.bins[chromosome][name]:
+					for bin in self.bins[chromosome][name][strand]:
+						self.bins[chromosome][name][strand][bin] = float(self.bins[chromosome][name][strand][bin]) / self.sliceSize
+
+	def _getPlotter(self, chromosome):
+		plot = RPlotter("%s_%s.png" % (os.path.splitext(self.outputFileName)[0], chromosome), self.verbosity)
+		plot.setImageSize(self.width, self.height)
+		if self.sizes[chromosome] <= 1000:
+			unit  = "nt."
+			ratio = 1.0
+		elif self.sizes[chromosome] <= 1000000:
+			unit  = "kb"
+			ratio = 1000.0
+		else:
+			unit  = "Mb"
+			ratio = 1000000.0
+		if self.yMin != None:
+			plot.setMinimumY(self.yMin)
+		if self.yMax != None:
+			plot.setMaximumY(self.yMax)
+		plot.setXLabel("Position on %s (in %s)" % (chromosome.replace("_", " "), unit))
+		plot.setLegend(True)
+		for i, name in enumerate(self.bins[chromosome]):
+			for strand in self.bins[chromosome][name]:
+				fullName = "%s %s" % (name.replace("_", " ")[:6], STRANDTOSTR[strand])
+				factor = 1 if strand == 0 else strand
+				correctedLine = dict([(key / ratio, value * factor) for key, value in self.bins[chromosome][name][strand].iteritems()])
+				plot.addLine(correctedLine, fullName, self.colors[i] if self.colors else None)
+		return plot
+
+	def _plot(self):
+		if self.merge:
+			multiplePlot = MultipleRPlotter(self.outputFileName, self.verbosity)
+			multiplePlot.setImageSize(self.width, self.height * len(self.bins.keys()))
+		progress = Progress(len(self.bins.keys()), "Plotting", options.verbosity)
+		for chromosome in sorted(self.bins.keys()):
+			plot = self._getPlotter(chromosome)
+			if self.merge:
+				multiplePlot.addPlot(plot)
+			else:
+				plot.plot()
+			progress.inc()
+		if self.merge:
+			multiplePlot.plot()
+		progress.done()
+
+	def _writeCsv(self):
+		if self.verbosity > 1:
+			print "Writing CSV file..."
+		csvHandle = open(self.csvFileName, "w")
+		csvHandle.write("chromosome;tag;strand")
+		if self.nbBins != 0:
+			xValues = range(self.start / self.sliceSize, max(self.sizes.values()) / self.sliceSize + 1)
+			for value in xValues:
+				csvHandle.write(";%d-%d" % (value * self.sliceSize + 1, (value+1) * self.sliceSize))
+			csvHandle.write("\n")
+		else:
+			xValues = []
+			for chromosome in self.bins:
+				for name in self.bins[chromosome]:
+					for strand in self.bins[chromosome][name]:
+						for bin in self.bins[chromosome][name][strand]:
+							xValues.extend(self.bins[chromosome][name][strand].keys())
+			xValues = sorted(list(set(xValues)))
+			for value in xValues:
+				csvHandle.write(";%d" % (value))
+			csvHandle.write("\n")
+		for chromosome in self.bins:
+			csvHandle.write("%s" % (chromosome))
+			for name in self.bins[chromosome]:
+				csvHandle.write(";%s" % (name))
+				for strand in self.bins[chromosome][name]:
+					csvHandle.write(";%s" % (STRANDTOSTR[strand]))
+					for bin in xValues:
+						csvHandle.write(";%.2f" % (self.bins[chromosome][name][strand].get(bin, 0)))
+					csvHandle.write("\n")
+				csvHandle.write(";")
+			csvHandle.write(";")
+		csvHandle.close()
+		if self.verbosity > 1:
+			print "...done"
+		
+	def _writeGff(self):
+		if self.verbosity > 1:
+			print "Writing GFF file..."
+		writer = Gff3Writer(self.gffFileName, self.verbosity)
+		cpt    = 1
+		for chromosome in self.bins:
+			for name in self.bins[chromosome]:
+				for strand in self.bins[chromosome][name]:
+					for bin in self.bins[chromosome][name][strand]:
+						transcript = Transcript()
+						transcript.setChromosome(chromosome)
+						transcript.setStart(bin)
+						if self.nbBins > 0:
+							transcript.setEnd(bin + self.sliceSize)
+						else:
+							transcript.setEnd(self.start)
+						transcript.setDirection(1 if strand == 0 else strand)
+						transcript.setTagValue("ID", "region%d" % (cpt))
+						cpt += 1
+		writer.write()
+		if self.verbosity > 1:
+			print "...done"
+
+	def run(self):
+		if self.sizes == None:
+			self._estimateSizes()
+		self._computeSliceSize()
+		self._initBins()
+		self._populateBins()
+		if self.normalization:
+			self._normalize()
+		if self.average:
+			self._computeAverage()
+		self._plot()
+		if self.csvFileName != None:
+			self._writeCsv()
+		if self.gffFileName != None:
+			self._writeGff()
+
+
+if __name__ == "__main__":
+
+	description = "Get Distribution v1.0.2: Get the distribution of the genomic coordinates on a genome. [Category: Visualization]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input",       dest="inputFileName",     action="store",                            type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format",      dest="format",            action="store",                            type="string", help="format of the input file [compulsory] [format: transcript file format]")
+	parser.add_option("-o", "--output",      dest="outputFileName",    action="store",                            type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+	parser.add_option("-r", "--reference",   dest="referenceFileName", action="store",      default=None,         type="string", help="file containing the genome [format: file in FASTA format]")
+	parser.add_option("-b", "--nbBins",      dest="nbBins",            action="store",      default=1000,         type="int",    help="number of bins [default: 1000] [format: int]")
+	parser.add_option("-2", "--bothStrands", dest="bothStrands",       action="store_true", default=False,                       help="plot one curve per strand [format: bool] [default: false]")
+	parser.add_option("-c", "--chromosome",  dest="chromosome",        action="store",      default=None,         type="string", help="plot only a chromosome [format: string]")
+	parser.add_option("-s", "--start",       dest="start",             action="store",      default=None,         type="int",    help="start from a given region [format: int]")
+	parser.add_option("-e", "--end",         dest="end",               action="store",      default=None,         type="int",    help="end from a given region [format: int]")
+	parser.add_option("-y", "--yMin",        dest="yMin",              action="store",      default=None,         type="int",    help="minimum value on the y-axis to plot [format: int]")
+	parser.add_option("-Y", "--yMax",        dest="yMax",              action="store",      default=None,         type="int",    help="maximum value on the y-axis to plot [format: int]")
+	parser.add_option("-x", "--csv",         dest="csv",               action="store",      default=None,                        help="write a .csv file [format: output file in CSV format] [default: None]")
+	parser.add_option("-g", "--gff",         dest="gff",               action="store",      default=None,                        help="also write GFF3 file [format: output file in GFF format] [default: None]")
+	parser.add_option("-H", "--height",      dest="height",            action="store",      default=300,          type="int",    help="height of the graphics [format: int] [default: 300]")
+	parser.add_option("-W", "--width",       dest="width",             action="store",      default=600,          type="int",    help="width of the graphics [format: int] [default: 1000]")
+	parser.add_option("-a", "--average",     dest="average",           action="store_true", default=False,                       help="plot average (instead of sum) [default: false] [format: boolean]")
+	parser.add_option("-n", "--names",       dest="names",             action="store",      default="nbElements", type="string", help="name for the tags (separated by commas and no space) [default: None] [format: string]")
+	parser.add_option("-l", "--color",       dest="colors",            action="store",      default=None,         type="string", help="color of the lines (separated by commas and no space) [format: string]")
+	parser.add_option("-z", "--normalize",   dest="normalize",         action="store_true", default=False,                       help="normalize data (when panels are different) [format: bool] [default: false]")
+	parser.add_option("-m", "--merge",       dest="mergePlots",        action="store_true", default=False,                       help="merge all plots in one figure [format: bool] [default: false]")
+	parser.add_option("-v", "--verbosity",   dest="verbosity",         action="store",      default=1,            type="int",    help="trace level [default: 1] [format: int]")
+	(options, args) = parser.parse_args()
+
+	gt = GetDistribution(options.verbosity)
+	gt.setInputFile(options.inputFileName, options.format)
+	gt.setOutputFile(options.outputFileName)
+	gt.setReferenceFile(options.referenceFileName)
+	gt.setNbBins(int(options.nbBins))
+	gt.set2Strands(options.bothStrands)
+	gt.setRegion(options.chromosome, options.start, options.end)
+	gt.setNormalization(options.normalize)
+	gt.setAverage(options.average)
+	gt.setYLimits(options.yMin, options.yMax)
+	gt.writeCsv(options.csv)
+	gt.writeGff(options.gff)
+	gt.setImageSize(options.height, options.width)
+	gt.setNames(options.names.split(","))
+	gt.setColors(None if options.colors == None else options.colors.split(","))
+	gt.setNormalization(options.normalize)
+	gt.mergePlots(options.mergePlots)
+	gt.run()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetFlanking.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,233 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.misc.Progress import Progress
+
+QUERY        = 0
+REFERENCE    = 1
+INPUTS       = (QUERY, REFERENCE)
+STRANDS      = (-1, 1)
+TAG_DISTANCE = "distance_"
+TAG_SENSE    = "_sense"
+TAG_REGION   = "_region"
+TAGS_REGION  = {-1: "_upstream", 0: "", 1: "_downstream"}
+TAGS_RREGION = {-1: "upstream", 0: "overlapping", 1: "downstream"}
+TAGS_SENSE   = {-1: "antisense", 0: "", 1: "collinear"}
+STRANDSTOSTR = {-1: "(-)", 0: "", 1: "(+)"}
+
+
+def getOrderKey(transcript, direction, input):
+	if direction == 1:
+		if input == QUERY:
+			return (transcript.getEnd(), -transcript.getStart())
+		return (transcript.getStart(), -transcript.getEnd())
+	if input == QUERY:
+		return (-transcript.getStart(), transcript.getEnd())
+	return (-transcript.getEnd(), transcript.getStart())
+
+
+class GetFlanking(object):
+
+	def __init__(self, verbosity):
+		self.verbosity   = verbosity
+		self.transcripts = dict([id, {}] for id in INPUTS)
+		self.directions  = []
+		self.noOverlap   = False
+		self.colinear    = False
+		self.antisense   = False
+		self.distance    = None
+		self.minDistance = None
+		self.maxDistance = None
+		self.tagName     = "flanking"
+
+	def setInputFile(self, fileName, format, id):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		parser = chooser.getParser(fileName)
+		for transcript in parser.getIterator():
+			chromosome = transcript.getChromosome()
+			if chromosome not in self.transcripts[id]:
+				self.transcripts[id][chromosome] = []
+			self.transcripts[id][chromosome].append(transcript)
+
+	def setOutputFile(self, fileName):
+		self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+	def addUpstreamDirection(self, upstream):
+		if upstream:
+			self.directions.append(-1)
+
+	def addDownstreamDirection(self, downstream):
+		if downstream:
+			self.directions.append(1)
+
+	def setColinear(self, colinear):
+		self.colinear = colinear
+
+	def setAntisense(self, antisense):
+		self.antisense = antisense
+
+	def setNoOverlap(self, noOverlap):
+		self.noOverlap = noOverlap
+
+	def setMinDistance(self, distance):
+		self.minDistance = distance
+
+	def setMaxDistance(self, distance):
+		self.maxDistance = distance
+
+	def setNewTagName(self, tagName):
+		self.tagName = tagName
+
+	def match(self, transcriptQuery, transcriptRef, direction):
+		#print "comparing", transcriptQuery, "with", transcriptRef, "on direction", direction
+		if direction == 1 and transcriptRef.getEnd() < transcriptQuery.getStart():
+			return False
+		if direction == -1 and transcriptQuery.getEnd() < transcriptRef.getStart():
+			return False
+		if self.noOverlap and transcriptRef.overlapWith(transcriptQuery):
+			return False
+		if self.colinear and transcriptRef.getDirection() != transcriptQuery.getDirection():
+			return False
+		if self.antisense and transcriptRef.getDirection() == transcriptQuery.getDirection():
+			return False
+		if self.minDistance != None or self.maxDistance != None:
+			distance = transcriptRef.getDistance(transcriptQuery)
+			if self.minDistance != None and distance < self.minDistance:
+				return False
+			if self.maxDistance != None and distance > self.maxDistance:
+				return False
+		return True
+
+	def getFlanking(self, chromosome, direction):
+		if chromosome not in self.transcripts[REFERENCE]:
+			return
+		sortedTranscripts = dict([id, {}] for id in INPUTS)
+		for id in INPUTS:
+			sortedTranscripts[id] = sorted(self.transcripts[id][chromosome], key = lambda t: getOrderKey(t, direction, id))
+		refIndex = 0
+		progress = Progress(len(sortedTranscripts[QUERY]), "Reading chr %s %s" % (chromosome, STRANDSTOSTR[direction]), self.verbosity)
+		for query in sortedTranscripts[QUERY]:
+			#print "Q: ", query
+			#print "R1: ", sortedTranscripts[REFERENCE][refIndex]
+			while not self.match(query, sortedTranscripts[REFERENCE][refIndex], direction):
+				refIndex += 1
+				if refIndex == len(sortedTranscripts[REFERENCE]):
+					progress.done()
+					#print "done"
+					return
+				#print "R2: ", sortedTranscripts[REFERENCE][refIndex]
+			self.flankings[query][direction] = sortedTranscripts[REFERENCE][refIndex]
+			progress.inc()
+		progress.done()
+
+	def setTags(self, query, reference, direction):
+		refName = reference.getTagValue("ID")
+		if refName == None:
+			refName = reference.getName()
+		if refName == None:
+			refName = reference.__str__()
+		query.setTagValue("%s%s" % (self.tagName, TAGS_REGION[direction*query.getDirection()]), refName)
+		query.setTagValue("%s_%s%s" % (TAG_DISTANCE, self.tagName, TAGS_REGION[direction*query.getDirection()]), query.getDistance(reference))
+		query.setTagValue("%s_%s" % (TAG_SENSE, self.tagName), TAGS_SENSE[query.getDirection() * reference.getDirection()])
+		if direction == 0:
+			query.setTagValue("%s_%s" % (TAG_REGION, self.tagName), TAGS_RREGION[cmp(query.getRelativeDistance(reference), 0)])
+		for tag in reference.getTagNames():
+			if tag not in ("quality", "feature"):
+				query.setTagValue("%s%s_%s" % (self.tagName, TAGS_REGION[direction*query.getDirection()], tag), reference.getTagValue(tag))
+		return query
+
+	def write(self):
+		progress = Progress(len(self.flankings.keys()), "Printing data", self.verbosity)
+		for transcriptQuery in self.flankings.keys():
+			if not self.flankings[transcriptQuery]:
+				self.writer.addTranscript(transcriptQuery)
+			elif self.directions:
+				for direction in self.directions:
+					#relativeDirection = direction if transcriptQuery.getDirection() == 1 else - direction
+					relativeDirection = direction * transcriptQuery.getDirection()
+					if relativeDirection in self.flankings[transcriptQuery]:
+						transcriptRef = self.flankings[transcriptQuery][relativeDirection]
+						transcriptQuery = self.setTags(transcriptQuery, transcriptRef, relativeDirection)
+				self.writer.addTranscript(transcriptQuery)
+			else:
+				transcriptRef = sorted(self.flankings[transcriptQuery].values(), key = lambda transcriptRef: transcriptQuery.getDistance(transcriptRef))[0]
+				self.writer.addTranscript(self.setTags(transcriptQuery, transcriptRef, 0))
+			progress.inc()
+		progress.done()
+
+	def run(self):
+		for chromosome in sorted(self.transcripts[QUERY].keys()):
+			self.flankings = dict([query, {}] for query in self.transcripts[QUERY][chromosome])
+			for direction in STRANDS:
+				#print "comparison", chromosome, direction
+				self.getFlanking(chromosome, direction)
+			self.write()
+		self.writer.close()
+
+if __name__ == "__main__":
+	
+	description = "Get Flanking v1.0.1: Get the flanking regions of a set of reference. [Category: Data Selection]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input1",      dest="inputFileName1", action="store",                          type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format1",     dest="format1",        action="store",                          type="string", help="format of previous file [compulsory] [format: transcript file format]")
+	parser.add_option("-j", "--input2",      dest="inputFileName2", action="store",                          type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
+	parser.add_option("-g", "--format2",     dest="format2",        action="store",                          type="string", help="format of previous file [compulsory] [format: transcript file format]")
+	parser.add_option("-5", "--upstream",    dest="upstream",       action="store_true", default=False,                     help="output upstream elements [format: boolean] [default: False]")
+	parser.add_option("-3", "--downstream",  dest="downstream",     action="store_true", default=False,                     help="output downstream elements [format: boolean] [default: False]")
+	parser.add_option("-c", "--colinear",    dest="colinear",       action="store_true", default=False,                     help="find first colinear element [format: boolean] [default: False]")
+	parser.add_option("-a", "--antisense",   dest="antisense",      action="store_true", default=False,                     help="find first anti-sense element [format: boolean] [default: False]")
+	parser.add_option("-e", "--noOverlap",   dest="noOverlap",      action="store_true", default=False,                     help="do not consider elements which are overlapping reference elements [format: boolean] [default: False]")
+	parser.add_option("-d", "--minDistance", dest="minDistance",    action="store",      default=None,       type="int",    help="minimum distance between 2 elements [format: int]")
+	parser.add_option("-D", "--maxDistance", dest="maxDistance",    action="store",      default=None,       type="int",    help="maximum distance between 2 elements [format: int]")
+	parser.add_option("-t", "--tag",         dest="tagName",        action="store",      default="flanking", type="string", help="name of the new tag [format: string] [default: flanking]")
+	parser.add_option("-o", "--output",      dest="outputFileName", action="store",                          type="string", help="output file [format: output file in GFF3 format]")
+	parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,          type="int",    help="trace level [format: int]")
+	(options, args) = parser.parse_args()
+
+	gf = GetFlanking(options.verbosity)
+	gf.setInputFile(options.inputFileName1, options.format1, QUERY)
+	gf.setInputFile(options.inputFileName2, options.format2, REFERENCE)
+	gf.setOutputFile(options.outputFileName)
+	gf.addUpstreamDirection(options.upstream)
+	gf.addDownstreamDirection(options.downstream)
+	gf.setColinear(options.colinear)
+	gf.setAntisense(options.antisense)
+	gf.setNoOverlap(options.noOverlap)
+	gf.setMinDistance(options.minDistance)
+	gf.setMaxDistance(options.maxDistance)
+	gf.setNewTagName(options.tagName)
+	gf.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetIntersection.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,164 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Mapping import Mapping
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+MINBIN = 3
+MAXBIN = 7
+REFERENCE = 0
+QUERY = 1
+
+def getBin(start, end):
+	for i in range(MINBIN, MAXBIN + 1):
+		binLevel = 10 ** i
+		if int(start / binLevel) == int(end / binLevel):
+			return int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))
+	return int((MAXBIN + 1) * 10 ** (MAXBIN + 1))
+
+def getOverlappingBins(start, end):
+	array	= []
+	bigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))
+	for i in range(MINBIN, MAXBIN + 1):
+		binLevel = 10 ** i
+		array.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))
+	array.append((bigBin, bigBin))
+	return array
+
+
+class GetIntersection(object):
+
+	def __init__(self, verbosity):
+		self.verbosity              = verbosity
+		self.nbQueries              = 0
+		self.nbRefs	                = 0
+		self.nbWritten              = 0
+		self.bins	                = {}
+
+	def setReferenceFile(self, fileName, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		self.refParser = chooser.getParser(fileName)
+
+	def setQueryFile(self, fileName, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		self.queryParser = chooser.getParser(fileName)
+
+	def setOutputFile(self, fileName):
+		self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+	def loadRef(self):
+		progress = UnlimitedProgress(10000, "Reading references", self.verbosity)
+		for transcript in self.refParser.getIterator():
+			if transcript.__class__.__name__ == "Mapping":
+				transcript = transcript.getTranscript()
+			chromosome = transcript.getChromosome()
+			bin		   = getBin(transcript.getStart(), transcript.getEnd())
+			if chromosome not in self.bins:
+				self.bins[chromosome] = {}
+			if bin not in self.bins[chromosome]:
+				self.bins[chromosome][bin] = []
+			self.bins[chromosome][bin].append(transcript)
+			self.nbRefs += 1
+			progress.inc()
+		progress.done()
+
+	def _compareTranscript(self, queryTranscript):
+		queryChromosome = queryTranscript.getChromosome()
+		if queryChromosome not in self.bins:
+			return None
+		queryStart = queryTranscript.getStart()
+		queryEnd   = queryTranscript.getEnd()
+		bins	   = getOverlappingBins(queryStart, queryEnd)
+		overlaps   = []
+		for binRange in bins:
+			for bin in range(binRange[0], binRange[1]+1):
+				if bin not in self.bins[queryChromosome]:
+					continue
+				for refTranscript in self.bins[queryChromosome][bin]:
+					newTranscript = queryTranscript.getIntersection(refTranscript)
+					if newTranscript != None:
+						overlaps.append(newTranscript)
+		if not overlaps:
+			return None
+		newTranscript = overlaps[0]
+		for transcript in overlaps[1:]:
+			newTranscript.merge(transcript)
+		return newTranscript
+
+	def compare(self):
+		progress = UnlimitedProgress(10000, "Comparing queries", self.verbosity)
+		for queryTranscript in self.queryParser.getIterator():
+			if queryTranscript.__class__.__name__ == "Mapping":
+				queryTranscript = queryTranscript.getTranscript()
+			progress.inc()
+			self.nbQueries += 1
+			newTranscript = self._compareTranscript(queryTranscript)
+			if newTranscript != None:
+				self.writer.addTranscript(queryTranscript)
+				self.nbWritten += 1
+		progress.done()
+		self.writer.close()
+
+	def displayResults(self):
+		print "# queries:  %d" % (self.nbQueries)
+		print "# refs:     %d" % (self.nbRefs)
+		print "# written:  %d" % (self.nbWritten)
+
+	def run(self):
+		self.loadRef()
+		self.compare()
+		self.displayResults()
+
+if __name__ == "__main__":
+	
+	description = "Get Intersection v1.0.0: Shrink the first data set so that all bases covered by the first data set is also covered by the second data set. [Category: Data Comparison]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input1",	        dest="inputFileName1", action="store",			           type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format1",        dest="format1",		  action="store",			           type="string", help="format of previous file [compulsory] [format: transcript file format]")
+	parser.add_option("-j", "--input2",	        dest="inputFileName2", action="store",			           type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
+	parser.add_option("-g", "--format2",        dest="format2",		  action="store",			           type="string", help="format of previous file [compulsory] [format: transcript file format]")
+	parser.add_option("-o", "--output",	        dest="outputFileName", action="store",			           type="string", help="output file [format: output file in GFF3 format]")
+	parser.add_option("-v", "--verbosity",      dest="verbosity",	   action="store",      default=1,     type="int",	 help="trace level [format: int]")
+	(options, args) = parser.parse_args()
+
+	gi = GetIntersection(options.verbosity)
+	gi.setQueryFile(options.inputFileName1, options.format1)
+	gi.setReferenceFile(options.inputFileName2, options.format2)
+	gi.setOutputFile(options.outputFileName)
+	gi.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetRandomSubset.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,96 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2011
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import random
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.misc.Progress import Progress
+
+class GetRandomSubset(object):
+
+    def __init__(self, verbosity):
+        self.verbosity = verbosity
+
+    def setInputFile(self, fileName, format):
+        chooser = ParserChooser(self.verbosity)
+        chooser.findFormat(format)
+        self.parser = chooser.getParser(fileName)
+
+    def setNumber(self, number, percent):
+        if number != None:
+            self.number = number
+        elif percent != None:
+            self.number = int(float(percent) / 100 * self.parser.getNbTranscripts())
+        else:
+            raise Exception("Error! Number of elements to output is not given!")
+
+    def setOutputFile(self, fileName):
+        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+
+    def chooseElements(self):
+        self.randomIndices = random.sample(range(self.parser.getNbTranscripts()), self.number)
+        
+    def run(self):
+        self.chooseElements()
+        progress  = Progress(self.parser.getNbTranscripts(), "Reading input file", self.verbosity)
+        nbWritten = 0
+        for cpt1, transcript in enumerate(self.parser.getIterator()):
+            if cpt1 in self.randomIndices:
+                self.writer.addTranscript(transcript)
+                nbWritten += 1
+            progress.inc()
+        self.writer.write()
+        self.writer.close()
+        progress.done()
+        if self.verbosity > 1:
+            print "%d transcripts read" % (self.parser.getNbTranscripts())
+            print "%d transcripts written" % (nbWritten)
+
+
+if __name__ == "__main__":
+    
+    description = "Get Random Subset v1.0.1: Get a random sub-set of a list of genomic coordinates. [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-n", "--number",    dest="number",         action="store", default=None, type="string", help="number of elements to output [format: int]")
+    parser.add_option("-p", "--percent",   dest="percent",        action="store", default=None, type="string", help="percentage of elements to output (between 0 and 100) [format: int]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int", help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    grs = GetRandomSubset(options.verbosity)
+    grs.setInputFile(options.inputFileName, options.format)
+    grs.setNumber(options.number, options.percent)
+    grs.setOutputFile(options.outputFileName)
+    grs.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetReadDistribution.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,303 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import random, os, glob, subprocess
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.parsing.GffParser import GffParser
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc import Utils
+from commons.core.LoggerFactory import LoggerFactory
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+
+LOG_DEPTH      = "smart"
+DEFAULT_REGION = "_all_"
+MULTIPLE_STR   = {1: "", 1000: " (in kbp)", 1000000: " (in Gbp)"}
+
+class GetReadDistribution(object):
+
+	def __init__(self, verbosity = 0):
+		self.xLab         = ""
+		self.yLab         = "# reads"
+		self.verbosity    = verbosity
+		self.number       = random.randint(0, 100000)
+		self.log          = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)
+		self.parsers      = {}
+		self.distribution = {}
+		self.factors      = {}
+		self.regions      = None
+		self.tmpDatName   = None
+		self.tmpRName     = None
+		self.quorum       = 1
+		self.strands      = False
+		self.width        = 800
+		self.height       = 300
+		self.arial        = False
+
+	def setNames(self, names):
+		self.names = names
+
+	def setInputFiles(self, fileNames, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		for cpt, fileName in enumerate(fileNames):
+			self.parsers[self.names[cpt]] = chooser.getParser(fileName)
+
+	def setOutputFileName(self, fileName):
+		self.outputFileName = fileName
+
+	def setLabs(self, xLab, yLab):
+		self.xLab = xLab
+		self.yLab = yLab
+
+	def setBinSize(self, binSize):
+		self.binSize = binSize
+
+	def setColors(self, colors):
+		self.colors = colors
+
+	def setFactors(self, factors):
+		if factors == None:
+			self.factors = dict([name, 1.0] for name in self.names)
+		else:
+			self.factors = dict(zip(self.names, factors))
+
+	def setMultiple(self, boolean):
+		self.multiple = boolean
+	
+	def setImageSize(self, width, height):
+		if width != None:
+			self.width = width
+		if height != None:
+			self.height = height
+
+	def setQuorum(self, quorum):
+		self.quorum = quorum
+
+	def setRegionsFile(self, fileName):
+		if fileName != None:
+			self._loadRegions(fileName)
+
+	def setBothStrands(self, strands):
+		self.strands = strands
+
+	def setArial(self, arial):
+		self.arial = arial
+
+	def _checkOptions(self):
+		if not self.parsers:
+			self.logAndRaise("ERROR: Missing input file names")
+
+	def _logAndRaise(self, errorMsg):
+		self.log.error(errorMsg)
+		raise Exception(errorMsg)
+
+	def _loadRegions(self, fileName):
+		self.regions = {}
+		parser       = GffParser(fileName, self.verbosity)
+		for transcript in parser.getIterator():
+			chromosome = transcript.getChromosome()
+			start      = transcript.getStart()
+			end        = transcript.getEnd()
+			name       = transcript.getName()
+			if chromosome not in self.regions:
+				self.regions[chromosome] = {}
+			if start not in self.regions[chromosome]:
+				self.regions[chromosome][start] = {}
+			if end not in self.regions[chromosome][start]:
+				self.regions[chromosome][start][end] = []
+			self.regions[chromosome][start][end].append(name)
+
+	def _getRegions(self, transcript):
+		if self.regions == None:
+			return [DEFAULT_REGION]
+		chromosome = transcript.getChromosome()
+		start      = transcript.getStart()
+		end        = transcript.getEnd()
+		if chromosome not in self.regions:
+			return []
+		names = []
+		for loadedStart in sorted(self.regions[chromosome].keys()):
+			if loadedStart > end:
+				return names
+			for loadedEnd in reversed(sorted(self.regions[chromosome][loadedStart].keys())):
+				if loadedEnd < start:
+					break
+				names.extend(self.regions[chromosome][loadedStart][loadedEnd])
+		return names
+
+	def _parse(self, name):
+		progress = UnlimitedProgress(10000, "Reading file '%s'" % (name), self.verbosity)
+		for transcript in self.parsers[name].getIterator():
+			if transcript.__class__.__name__ == "Mapping":
+				transcript = transcript.getTranscript()
+			regions = self._getRegions(transcript)
+			for region in regions:
+				if region not in self.distribution:
+					self.distribution[region] = {}
+				if name not in self.distribution[region]:
+					self.distribution[region][name] = {}
+				chromosome  = transcript.getChromosome()
+				nbElements  = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1
+				nbElements *= self.factors.get(name, 1)
+				strand      = transcript.getDirection() if self.strands else 1
+				if chromosome not in self.distribution[region][name]:
+					self.distribution[region][name][chromosome] = {}
+				if strand not in self.distribution[region][name][chromosome]:
+					self.distribution[region][name][chromosome][strand] = {}
+				previousBin = None
+				for exon in transcript.getExons():
+					for pos in range(exon.getStart(), exon.getEnd()+1):
+						bin = pos / self.binSize
+						if bin != previousBin:
+							self.distribution[region][name][chromosome][strand][bin] = self.distribution[region][name][chromosome][strand].get(bin, 0) + nbElements
+							previousBin = bin
+			progress.inc()
+		progress.done()
+
+	def _checkQuorum(self, region):
+		if self.quorum == None:
+			return True
+		return max([max([max([max(self.distribution[region][name][chromosome][strand].values()) for strand in self.distribution[region][name][chromosome]]) for chromosome in self.distribution[region][name]]) for name in self.distribution[region]])
+
+	def _writeData(self, region):
+		self.tmpDatName = "tmpFile%d.dat" % (self.number)
+		handle          = open(self.tmpDatName, "w")
+		handle.write("Chr\tPos\tStrand\tCount\tSample\n")
+		for name in self.distribution[region]:
+			for chromosome in sorted(self.distribution[region][name].keys()):
+				for strand in sorted(self.distribution[region][name][chromosome].keys()):
+					for pos in sorted(self.distribution[region][name][chromosome][strand].keys()):
+						handle.write("%s\t%d\t%d\t%d\t\"%s\"\n" % (chromosome, pos * self.binSize, strand, self.distribution[region][name][chromosome][strand].get(pos, 0) * strand, name))
+		handle.close()
+
+	def _findMultiple(self, region):
+		if not self.multiple:
+			return 1
+		maxPosition = max([max([max([max(self.distribution[region][name][chromosome][strand].keys()) for strand in self.distribution[region][name][chromosome]]) for chromosome in self.distribution[region][name]]) for name in self.distribution[region]]) * self.binSize
+		if maxPosition > 2000000:
+			return 1000000
+		elif maxPosition > 2000:
+			return 1000
+		return 1
+
+	def _writeScript(self, region):
+		self.tmpRName = "tmpFile%d.R" % (self.number)
+		fileName      = self.outputFileName if region == DEFAULT_REGION else "%s_%s.png" % (os.path.splitext(self.outputFileName)[0], region)
+		colors        = "scale_fill_brewer(palette=\"Set1\") + scale_color_brewer(palette=\"Set1\")" if self.colors == None else "scale_fill_manual(values = c(%s)) + scale_color_manual(values = c(%s))" % (", ".join(["\"%s\"" % (color) for color in self.colors]), ", ".join(["\"%s\"" % (color) for color in self.colors]))
+		title         = "" if region == DEFAULT_REGION else " + labs(title = \"Distribution of %s\") " % (region)
+		facet         = "Sample ~ Chr" if region == DEFAULT_REGION else "Sample ~ ."
+		handle        = open(self.tmpRName, "w")
+		multiple      = self._findMultiple(region)
+		arial         = ", text = element_text(family=\"Arial\", size=20)" if self.arial else ""
+		if self.arial:
+			handle.write("library(extrafont)\nloadfonts()\n")
+		handle.write("library(ggplot2)\n")
+		handle.write("data <- read.table(\"%s\", header = T)\n" % (self.tmpDatName))
+		handle.write("data$Sample <- factor(data$Sample, levels=c(%s))\n" % (", ".join(["\"%s\"" % (name) for name in self.names])))
+		handle.write("png(\"%s\", width = %d, height = %d)\n" % (fileName, self.width, self.height))
+		handle.write("ggplot(data, aes(x = Pos/%d, y = Count, fill = Sample, color = Sample)) %s + geom_bar(stat = \"identity\") + facet_grid(%s, space=\"free\") + xlab(\"%s%s\") + ylab(\"%s\") + %s + theme(legend.position = \"none\", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()%s)\n" % (multiple, title, facet, self.xLab, MULTIPLE_STR[multiple], self.yLab, colors, arial))
+		handle.write("dev.off()\n")
+
+	def _runR(self):
+		rCommand = os.environ["SMARTRPATH"] if "SMARTRPATH" in os.environ else "R"
+		command  = "\"%s\" CMD BATCH %s" % (rCommand, self.tmpRName)
+		status   = subprocess.call(command, shell=True)
+		if status != 0:
+			raise Exception("Problem with the execution of script file %s, status is: %s" % (self.tmpRName, status))
+
+	def _plot(self):
+		progress = Progress(len(self.distribution), "Plotting data", self.verbosity)
+		for region in self.distribution:
+			if not self._checkQuorum(region):
+				self.log.info("Not displaying '%s' for it contains insufficient data." % (region))
+			else:
+				self._writeData(region)
+				self._writeScript(region)
+				self._runR()
+			progress.inc()
+		progress.done()
+
+	def _cleanFiles(self):
+		for fileName in (self.tmpDatName, self.tmpRName):
+			if fileName != None and os.path.exists(fileName):
+				os.remove(fileName)
+				for otherFileName in glob.glob("%s*" % (fileName)):
+					os.remove(otherFileName)
+
+	def run(self):
+		LoggerFactory.setLevel(self.log, self.verbosity)
+		self._checkOptions()
+		self.log.info("START Get Read Distribution")
+		for name in self.names:
+			self._parse(name)
+		self._plot()
+		self._cleanFiles()
+		self.log.info("END Get Read Distribution")
+
+
+if __name__ == "__main__":
+	description = "Usage: GetReadDistribution.py [options]\n\nGet Read Distribution v1.0.1: Get the distribution of a set of reads. [Category: Personal]\n"
+	epilog = ""
+	parser = RepetOptionParser(description = description, epilog = epilog)
+	parser.add_option("-i", "--input",     dest="inputFileNames",  action="store",      default=None,      type="string", help="input files, separated by commas [compulsory] [format: string]")
+	parser.add_option("-f", "--format",    dest="format",          action="store",      default=None,      type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")
+	parser.add_option("-n", "--names",     dest="names",           action="store",      default=None,      type="string", help="name of the input data, separated by commas [compulsory] [format: string]")
+	parser.add_option("-o", "--output",    dest="outputFileName",  action="store",      default=None,      type="string", help="output file [format: output file in PNG format]")
+	parser.add_option("-s", "--binSize",   dest="binSize",         action="store",      default=10000,     type="int",    help="bin size [format: int] [default: 10000]")
+	parser.add_option("-l", "--xLabel",    dest="xLab",            action="store",      default="",        type="string", help="x-axis label name [format: string]")
+	parser.add_option("-L", "--yLabel",    dest="yLab",            action="store",      default="# reads", type="string", help="y-axis label name [format: string] [default: Reads]")
+	parser.add_option("-c", "--colors",    dest="colors",          action="store",      default=None,      type="string", help="colors of the bars, separated by commas  [format: string]")
+	parser.add_option("-a", "--factors",   dest="factors",         action="store",      default=None,      type="string", help="normalization factors, separated by commas  [format: string]")
+	parser.add_option("-r", "--regions",   dest="regionsFileName", action="store",      default=None,      type="string", help="regions to plot [format: transcript file in GFF format]")
+	parser.add_option("-2", "--strands",   dest="strands",         action="store_true", default=False,                    help="plot negative strands on the negative x-axis [format: boolean] [default: False]")
+	parser.add_option("-m", "--multiple",  dest="multiple",        action="store_true", default=False,                    help="use human readable genomic positions (k, G) [format: boolean] [default: False]")
+	parser.add_option("-q", "--quorum",    dest="quorum",          action="store",      default=1,         type="int",    help="minimum number of intervals to plot a region [format: int] [default: 1]")
+	parser.add_option("-z", "--width",     dest="width",           action="store",      default=800,       type="int",    help="width of the image [format: int] [default: 800]")
+	parser.add_option("-Z", "--height",    dest="height",          action="store",      default=300,       type="int",    help="height of the image [format: int] [default: 300]")
+	parser.add_option("-A", "--arial",     dest="arial",           action="store_true", default=False,                    help="use Arial font [format: boolean] [default: false]")
+	parser.add_option("-v", "--verbosity", dest="verbosity",       action="store",      default=1,         type="int",    help="trace level [format: int]")
+	options = parser.parse_args()[0]
+	iGetReadDistribution = GetReadDistribution(options.verbosity)
+	iGetReadDistribution.setNames(options.names.split(","))
+	iGetReadDistribution.setInputFiles(options.inputFileNames.split(","), options.format)
+	iGetReadDistribution.setOutputFileName(options.outputFileName)
+	iGetReadDistribution.setLabs(options.xLab, options.yLab)
+	iGetReadDistribution.setBinSize(options.binSize)
+	iGetReadDistribution.setColors(None if options.colors == None else options.colors.split(","))
+	iGetReadDistribution.setFactors(None if options.factors == None else map(float, options.factors.split(",")))
+	iGetReadDistribution.setRegionsFile(options.regionsFileName)
+	iGetReadDistribution.setMultiple(options.multiple)
+	iGetReadDistribution.setQuorum(options.quorum)
+	iGetReadDistribution.setImageSize(options.width, options.height)
+	iGetReadDistribution.setBothStrands(options.strands)
+	iGetReadDistribution.setArial(options.arial)
+	iGetReadDistribution.run()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetReadSizes.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,262 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import random, os, glob, subprocess
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.parsing.GffParser import GffParser
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc import Utils
+from commons.core.LoggerFactory import LoggerFactory
+from commons.core.utils.RepetOptionParser import RepetOptionParser
+
+LOG_DEPTH      = "smart"
+DEFAULT_REGION = "_all_"
+
+class GetReadSizes(object):
+
+	def __init__(self, verbosity = 0):
+		self.xLab       = "Size"
+		self.yLab       = "# reads"
+		self.verbosity  = verbosity
+		self.number     = random.randint(0, 100000)
+		self.log        = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)
+		self.parsers    = {}
+		self.sizes      = {}
+		self.factors    = {}
+		self.regions    = None
+		self.tmpDatName = None
+		self.tmpRName   = None
+		self.width      = 800
+		self.height     = 300
+		self.arial      = False
+
+	def setNames(self, names):
+		self.names = names
+
+	def setInputFiles(self, fileNames, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		for cpt, fileName in enumerate(fileNames):
+			self.parsers[self.names[cpt]] = chooser.getParser(fileName)
+
+	def setOutputFileName(self, fileName):
+		self.outputFileName = fileName
+
+	def setLabs(self, xLab, yLab):
+		self.xLab = xLab
+		self.yLab = yLab
+
+	def setSizes(self, minSize, maxSize):
+		self.minSize = minSize
+		self.maxSize = maxSize
+
+	def setColors(self, colors):
+		self.colors = colors
+
+	def setFactors(self, factors):
+		self.factors = dict(zip(self.names, factors))
+
+	def setRegionsFile(self, fileName):
+		if fileName != None:
+			self._loadRegions(fileName)
+
+	def setImageSize(self, width, height):
+		if width != None:
+			self.width = width
+		if height != None:
+			self.height = height
+
+	def setArial(self, arial):
+		self.arial = arial
+
+	def _checkOptions(self):
+		if not self.parsers:
+			self.logAndRaise("ERROR: Missing input file names")
+
+	def _logAndRaise(self, errorMsg):
+		self.log.error(errorMsg)
+		raise Exception(errorMsg)
+
+	def _loadRegions(self, fileName):
+		self.regions = {}
+		parser       = GffParser(fileName, self.verbosity)
+		for transcript in parser.getIterator():
+			chromosome = transcript.getChromosome()
+			start      = transcript.getStart()
+			end        = transcript.getEnd()
+			name       = transcript.getName()
+			if chromosome not in self.regions:
+				self.regions[chromosome] = {}
+			if start not in self.regions[chromosome]:
+				self.regions[chromosome][start] = {}
+			if end not in self.regions[chromosome][start]:
+				self.regions[chromosome][start][end] = []
+			self.regions[chromosome][start][end].append(name)
+
+	def _getRegions(self, transcript):
+		if self.regions == None:
+			return [DEFAULT_REGION]
+		chromosome = transcript.getChromosome()
+		start      = transcript.getStart()
+		end        = transcript.getEnd()
+		if chromosome not in self.regions:
+			return []
+		names = []
+		for loadedStart in sorted(self.regions[chromosome].keys()):
+			if loadedStart > end:
+				return names
+			for loadedEnd in reversed(sorted(self.regions[chromosome][loadedStart].keys())):
+				if loadedEnd < start:
+					break
+				names.extend(self.regions[chromosome][loadedStart][loadedEnd])
+		return names
+
+	def _parse(self, name):
+		progress = UnlimitedProgress(10000, "Reading file '%s'" % (name), self.verbosity)
+		for transcript in self.parsers[name].getIterator():
+			if transcript.__class__.__name__ == "Mapping":
+				transcript = transcript.getTranscript()
+			regions = self._getRegions(transcript)
+			for region in regions:
+				if region not in self.sizes:
+					self.sizes[region] = {}
+				if name not in self.sizes[region]:
+					self.sizes[region][name] = {}
+				size = transcript.getSize()
+				if (self.minSize == None or size >= self.minSize) and (self.maxSize == None or size <= self.maxSize):
+					nbElements                     = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1
+					nbElements                    *= self.factors.get(name, 1)
+					self.sizes[region][name][size] = self.sizes[region][name].get(size, 0) + nbElements
+			progress.inc()
+		progress.done()
+		if self.minSize == None:
+			self.minSize = min([min(self.sizes[region][name].keys()) for name in self.names for region in region])
+		if self.maxSize == None:
+			self.maxSize = max([max(self.sizes[region][name].keys()) for name in self.names for region in region])
+
+	def _checkQuorum(self, region):
+		return (max([sum(self.sizes[region][name].values()) for name in self.sizes[region]]) > 0)
+
+	def _writeData(self, region):
+		self.tmpDatName = "tmpFile%d.dat" % (self.number)
+		handle          = open(self.tmpDatName, "w")
+		handle.write("Size\tCount\tSample\n")
+		for name in self.sizes[region]:
+			for size in sorted(self.sizes[region][name].keys()):
+				handle.write("%d\t%d\t\"%s\"\n" % (size, self.sizes[region][name].get(size, 0), name))
+		handle.close()
+
+	def _writeScript(self, region):
+		self.tmpRName = "tmpFile%d.R" % (self.number)
+		fileName      = self.outputFileName if region == DEFAULT_REGION else "%s_%s.png" % (os.path.splitext(self.outputFileName)[0], region)
+		colors        = "scale_fill_brewer(palette=\"Set1\")" if self.colors == None else "scale_fill_manual(values = c(%s))" % (", ".join(["\"%s\"" % (color) for color in self.colors]))
+		title         = "" if region == DEFAULT_REGION else " + labs(title = \"Sizes of %s\")" % (region)
+		handle        = open(self.tmpRName, "w")
+		arial         = ", text = element_text(family=\"Arial\", size=20)" if self.arial else ""
+		if self.arial:
+			handle.write("library(extrafont)\nloadfonts()\n")
+		handle.write("library(ggplot2)\n")
+		handle.write("data <- read.table(\"%s\", header = T)\n" % (self.tmpDatName))
+		handle.write("data$Sample <- factor(data$Sample, levels=c(%s))\n" % (", ".join(["\"%s\"" % (name) for name in self.names])))
+		handle.write("data$Size <- factor(data$Size, levels=c(%s))\n" % (", ".join(["%d" % (size) for size in range(self.minSize, self.maxSize+1)])))
+		handle.write("png(\"%s\", width = %d, height = %d)\n" % (fileName, self.width, self.height))
+		handle.write("ggplot(data, aes(x = Size, y = Count, fill = Size)) %s + geom_bar(stat = \"identity\") + facet_grid(. ~ Sample, space=\"free_x\") + xlab(\"%s\") + ylab(\"%s\") + %s + theme(legend.position = \"none\", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()%s)\n" % (title, self.xLab, self.yLab, colors, arial))
+		handle.write("dev.off()\n")
+
+	def _runR(self):
+		rCommand = os.environ["SMARTRPATH"] if "SMARTRPATH" in os.environ else "R"
+		command  = "\"%s\" CMD BATCH %s" % (rCommand, self.tmpRName)
+		status   = subprocess.call(command, shell=True)
+		if status != 0:
+			raise Exception("Problem with the execution of script file %s, status is: %s" % (self.tmpRName, status))
+
+	def _plot(self):
+		progress = Progress(len(self.sizes), "Plotting data", self.verbosity)
+		for region in self.sizes:
+			if not self._checkQuorum(region):
+				self.log.info("Not displaying '%s' for it contains no data." % (region))
+			else:
+				self._writeData(region)
+				self._writeScript(region)
+				self._runR()
+			progress.inc()
+		progress.done()
+
+	def _cleanFiles(self):
+		for fileName in (self.tmpDatName, self.tmpRName):
+			if fileName != None and os.path.exists(fileName):
+				os.remove(fileName)
+				for otherFileName in glob.glob("%s*" % (fileName)):
+					os.remove(otherFileName)
+
+	def run(self):
+		LoggerFactory.setLevel(self.log, self.verbosity)
+		self._checkOptions()
+		self.log.info("START Get Read Sizes")
+		for name in self.names:
+			self._parse(name)
+		self._plot()
+		self._cleanFiles()
+		self.log.info("END Get Read Sizes")
+
+
+if __name__ == "__main__":
+	description = "Usage: GetReadSizes.py [options]\n\nGet Read Sizes v1.0.1: Get the sizes of a set of reads. [Category: Personal]\n"
+	epilog = ""
+	parser = RepetOptionParser(description = description, epilog = epilog)
+	parser.add_option("-i", "--input",     dest="inputFileNames",  action="store",      default=None,     type="string", help="input files, separated by commas [compulsory] [format: string]")
+	parser.add_option("-f", "--format",    dest="format",          action="store",      default=None,     type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")
+	parser.add_option("-n", "--names",     dest="names",           action="store",      default=None,     type="string", help="name of the input data, separated by commas [compulsory] [format: string]")
+	parser.add_option("-o", "--output",    dest="outputFileName",  action="store",      default=None,      type="string", help="output file [format: output file in PNG format]")
+	parser.add_option("-s", "--minSize",   dest="minSize",         action="store",      default=None,      type="int",    help="minimum size [format: int]")
+	parser.add_option("-S", "--maxSize",   dest="maxSize",         action="store",      default=None,      type="int",    help="maximum size [format: int]")
+	parser.add_option("-l", "--xLabel",    dest="xLab",            action="store",      default="Size",    type="string", help="x-axis label name [format: string] [default: Size]")
+	parser.add_option("-L", "--yLabel",    dest="yLab",            action="store",      default="# reads", type="string", help="y-axis label name [format: string] [default: Reads]")
+	parser.add_option("-c", "--colors",    dest="colors",          action="store",      default=None,      type="string", help="colors of the bars, separated by commas  [format: string]")
+	parser.add_option("-a", "--factors",   dest="factors",         action="store",      default=None,      type="string", help="normalization factors, separated by commas  [format: string]")
+	parser.add_option("-r", "--regions",   dest="regionsFileName", action="store",      default=None,      type="string", help="regions to plot [format: transcript file in GFF format]")
+	parser.add_option("-z", "--width",     dest="width",           action="store",      default=800,       type="int",    help="width of the image [format: int] [default: 800]")
+	parser.add_option("-Z", "--height",    dest="height",          action="store",      default=300,       type="int",    help="height of the image [format: int] [default: 300]")
+	parser.add_option("-A", "--arial",     dest="arial",           action="store_true", default=False,                    help="use Arial font [format: boolean] [default: false]")
+	parser.add_option("-v", "--verbosity", dest="verbosity",       action="store",      default=1,         type="int",    help="trace level [format: int]")
+	options = parser.parse_args()[0]
+	iGetReadSizes = GetReadSizes(options.verbosity)
+	iGetReadSizes.setNames(options.names.split(","))
+	iGetReadSizes.setInputFiles(options.inputFileNames.split(","), options.format)
+	iGetReadSizes.setOutputFileName(options.outputFileName)
+	iGetReadSizes.setLabs(options.xLab, options.yLab)
+	iGetReadSizes.setSizes(options.minSize, options.maxSize)
+	iGetReadSizes.setColors(None if options.colors == None else options.colors.split(","))
+	iGetReadSizes.setFactors(None if options.factors == None else map(float, options.factors.split(",")))
+	iGetReadSizes.setRegionsFile(options.regionsFileName)
+	iGetReadSizes.setImageSize(options.width, options.height)
+	iGetReadSizes.setArial(options.arial)
+	iGetReadSizes.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/GetUpDownStream.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,152 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2012
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os
+from optparse import OptionParser, OptionGroup
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
+from SMART.Java.Python.ncList.FileSorter import FileSorter
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc import Utils
+
+
+class GetUpDownStream(object):
+
+    def __init__(self, verbosity = 0):
+        self.verbosity         = verbosity
+        self.inputReader       = None
+        self.outputWriter      = None
+        self.nbRead            = 0
+        self.nbWritten         = 0
+        self.nbMerges          = 0
+        self.splittedFileNames = {}
+
+    def __del__(self):
+        for fileName in self.splittedFileNames.values():
+            os.remove(fileName)
+            
+    def setInputFile(self, fileName, format):
+        parserChooser = ParserChooser(self.verbosity)
+        parserChooser.findFormat(format, "transcript")
+        self.parser = parserChooser.getParser(fileName)
+        self.sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])
+
+    def setOutputFile(self, fileName):
+        self.outputWriter = Gff3Writer(fileName, self.verbosity)
+
+    def setDistances(self, up, down):
+        self.upDistance   = up
+        self.downDistance = down
+
+    def _sortFile(self):
+        fs = FileSorter(self.parser, self.verbosity-4)
+        fs.perChromosome(True)
+        fs.setOutputFileName(self.sortedFileName)
+        fs.sort()
+        self.splittedFileNames       = fs.getOutputFileNames()
+        self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()
+        self.nbRead                  = fs.getNbElements()
+
+    def _write(self, start, end, reference, after):
+        if start > end:
+            return
+        transcript = Transcript()
+        transcript.setChromosome(reference.getChromosome())
+        transcript.setStart(start)
+        transcript.setEnd(end)
+        transcript.setDirection("+")
+        transcript.setName("%s_%s" % ("up" if Utils.xor(reference.getDirection() == 1, after) else "down", reference.getName()))
+        self.outputWriter.addTranscript(transcript)
+        
+    def _getFlanking(self, chromosome):
+        progress    = Progress(self.nbElementsPerChromosome[chromosome], "Analyzing chromosome %s" % (chromosome), self.verbosity)
+        parser      = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)
+        previous    = None
+        for transcript in parser.getIterator():
+            progress.inc()
+            transcript.removeExons()
+            if previous == None:
+                distance = self.upDistance if transcript.getDirection() == 1 else self.downDistance
+                start    = max(1, transcript.getStart() - distance)
+                self._write(start, transcript.getStart()-1, transcript, False)
+                previous = transcript
+                continue
+            if previous.include(transcript):
+                continue
+            if transcript.overlapWith(previous):
+                previous = transcript
+                continue
+            distancePrevious = self.downDistance if previous.getDirection()   == 1 else self.upDistance
+            distanceCurrent  = self.upDistance   if transcript.getDirection() == 1 else self.downDistance
+            distance = transcript.getDistance(previous)
+            if distancePrevious + distanceCurrent == 0:
+                previous = transcript
+                continue
+            if distance >= distancePrevious + distanceCurrent:
+                endPrevious  = previous.getEnd() + distancePrevious
+                startCurrent = transcript.getStart() - distanceCurrent
+            else:
+                middle       = previous.getEnd() + int((distance-1) * float(distancePrevious) / (distancePrevious + distanceCurrent))
+                endPrevious  = middle
+                startCurrent = middle+1
+            self._write(previous.getEnd() + 1, endPrevious, previous, True)
+            self._write(startCurrent, transcript.getStart() - 1, transcript, False)
+            previous = transcript
+        distance = self.downDistance if previous.getDirection() == 1 else self.upDistance
+        self._write(previous.getEnd() + 1, previous.getEnd() + distance, previous, True)
+        progress.done()
+
+    def run(self):
+        self._sortFile()
+        for chromosome in sorted(self.nbElementsPerChromosome.keys()):
+            self._getFlanking(chromosome)
+        self.outputWriter.close()
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Get Up and Down Stream v1.0.0: Get the flanking regions of an annotation. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in mapping format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the file [compulsory] [format: mapping file format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-u", "--up",        dest="up",             action="store",      default=0,     type="int",    help="the upstream distance  [format: int]")
+    parser.add_option("-d", "--down",      dest="down",           action="store",      default=0,     type="int",    help="the downstream distance  [format: int]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
+    (options, args) = parser.parse_args()
+
+    guds = GetUpDownStream(options.verbosity)
+    guds.setInputFile(options.inputFileName, options.format)
+    guds.setOutputFile(options.outputFileName)
+    guds.setDistances(options.up, options.down)
+    guds.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/RestrictFromCoverage.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,224 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2012
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import os, struct, time, random
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.Interval import Interval
+from SMART.Java.Python.ncList.NCList import NCList
+from SMART.Java.Python.ncList.NCListCursor import NCListCursor
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle
+from SMART.Java.Python.ncList.FileSorter import FileSorter
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+from SMART.Java.Python.misc import Utils
+try:
+    import cPickle as pickle
+except:
+    import pickle
+
+REFERENCE = 0
+QUERY = 1
+TYPES = (REFERENCE, QUERY)
+TYPETOSTRING = {0: "reference", 1: "query"}
+
+class RestrictFromCoverage(object):
+
+    def __init__(self, verbosity = 1):
+        self._verbosity               = verbosity
+        self._randomNumber            = random.randint(0, 100000)
+        self._nbWritten               = 0
+        self._nbLines                 = dict([type, 0]  for type in TYPES)
+        self._splittedFileNames       = dict([type, {}] for type in TYPES)
+        self._nbElementsPerChromosome = dict([type, {}] for type in TYPES)
+        self._nbElements              = dict([type, 0]  for type in TYPES)
+        
+    def __del__(self):
+        pass
+
+    def _close(self):
+        self._writer.close()
+        
+    def setInputFileName(self, fileName, format, type):
+        chooser = ParserChooser(self._verbosity)
+        chooser.findFormat(format)
+        parser = chooser.getParser(fileName)
+        sortedFileName = "%s_%d_%d_sorted.pkl" % (os.path.splitext(fileName)[0], self._randomNumber, type)
+        if self._verbosity > 2:
+            print "Preparing %s file..." % (TYPETOSTRING[type])
+        startTime = time.time()
+        fs = FileSorter(parser, self._verbosity-1)
+        fs.perChromosome(True)
+        fs.setOutputFileName(sortedFileName)
+        fs.sort()
+        self._nbLines[type]                 = fs.getNbElements()
+        self._splittedFileNames[type]       = fs.getOutputFileNames()
+        self._nbElementsPerChromosome[type] = fs.getNbElementsPerChromosome()
+        self._nbElements[type]              = fs.getNbElements()
+        endTime = time.time()
+        if self._verbosity > 2:
+            print "    ...done (%ds)" % (endTime - startTime)
+            
+    def setOutputFileName(self, outputFileName):
+        self._writer = Gff3Writer(outputFileName)
+
+    def setPercent(self, minPercent, maxPercent):
+        self._minPercent = minPercent
+        self._maxPercent = maxPercent
+
+    def setNbNucleotides(self, minNb, maxNb):
+        self._minNucleotides = minNb
+        self._maxNucleotides = maxNb
+
+    def setOverlap(self, minOverlap, maxOverlap):
+        self._minOverlap = minOverlap
+        self._maxOverlap = maxOverlap
+
+    def setStrands(self, boolean):
+        self._twoStrands = boolean
+
+    def _compareChromosome(self, chromosome):
+        firstOverlap = 0
+        parser1      = NCListFileUnpickle(self._splittedFileNames[QUERY][chromosome],     self._verbosity)
+        parser2      = NCListFileUnpickle(self._splittedFileNames[REFERENCE][chromosome], self._verbosity)
+        progress     = Progress(self._nbElementsPerChromosome[QUERY][chromosome], "Analyzing %s" % (chromosome), self._verbosity)
+        for transcript1 in parser1.getIterator():
+            firstOverlap = self._compareList(transcript1, parser2)
+            parser2.setInitAddress(firstOverlap)
+            progress.inc()
+        progress.done()
+
+    def _compareList(self, transcript1, parser2):
+        values = []
+        for exon in transcript1.getExons():
+            values.append([0.0] * exon.getSize())
+        firstOverlap = None
+        for transcript2 in parser2.getIterator():
+            address       = parser2.getCurrentTranscriptAddress()
+            nbElements    = float(transcript2.getTagValue("nbElements"))    if "nbElements"    in transcript2.getTagNames() else 1.0
+            nbOccurrences = float(transcript2.getTagValue("nbOccurrences")) if "nbOccurrences" in transcript2.getTagNames() else 1.0
+            nbElements   /= nbOccurrences
+            if transcript2.getStart() > transcript1.getEnd():
+                if firstOverlap == None:
+                    firstOverlap = address
+                if self._checkValues(values):
+                    self._printTranscript(transcript1)
+                return firstOverlap
+            elif transcript1.overlapWith(transcript2):
+                if firstOverlap == None:
+                    firstOverlap = address
+                values = self._compareTranscript(transcript1, transcript2, values, nbElements)
+        if self._checkValues(values):
+            self._printTranscript(transcript1)
+            return firstOverlap
+    
+    def _compareTranscript(self, transcript1, transcript2, values, nbElements):
+        if not transcript1.overlapWith(transcript2) or ((self._twoStrands) and transcript1.getDirection() != transcript2.getDirection()):
+            return values
+        for id1, exon1 in enumerate(transcript1.getExons()):
+            for exon2 in transcript2.getExons():
+                values[id1] = map(sum, zip(values[id1], self._compareExon(exon1, exon2, nbElements)))
+        return values
+        
+    def _compareExon(self, exon1, exon2, nbElements):
+        array = [0.0] * exon1.getSize()
+        if not exon1.overlapWith(exon2) or ((self._twoStrands) and exon1.getDirection() != exon2.getDirection()):
+            return array
+        for pos in range(max(exon1.getStart(), exon2.getStart()) - exon1.getStart(), min(exon1.getEnd(), exon2.getEnd()) - exon1.getStart()+1):
+            array[pos] += nbElements
+        return array
+
+    def _filter(self, value):
+        if self._minOverlap and self._maxOverlap:
+            return self._minOverlap <= value <= self._maxOverlap
+        if self._minOverlap:
+            return self._minOverlap <= value
+        if self._maxOverlap:
+            return value <= self._maxOverlap
+        return True
+
+    def _checkValues(self, values):
+        nbValues    = sum(map(len, values))
+        nbPosValues = sum(map(len, [filter(self._filter, valuePart) for valuePart in values]))
+        ratio       = float(nbPosValues) / nbValues * 100
+        if self._minNucleotides and nbPosValues < self._minNucleotides:
+            return False
+        if self._maxNucleotides and nbPosValues > self._maxNucleotides:
+            return False
+        if self._minPercent and ratio < self._minPercent:
+            return False
+        if self._maxPercent and ratio > self._maxPercent:
+            return False
+        return True
+
+    def _printTranscript(self, transcript):
+        self._writer.addTranscript(transcript)
+        self._nbWritten += 1
+
+    def run(self):
+        for chromosome in sorted(self._splittedFileNames[QUERY].keys()):
+            self._compareChromosome(chromosome)
+        self._close()
+        if self._verbosity > 0:
+            print "# queries: %d" % (self._nbElements[QUERY])
+            print "# refs:    %d" % (self._nbElements[REFERENCE])
+            print "# written: %d (%d%%)" % (self._nbWritten, 0 if self._nbElements[QUERY] == 0 else round(float(self._nbWritten) / self._nbElements[QUERY] * 100))
+        
+
+if __name__ == "__main__":
+    description = "Restrict From Coverage v1.0.0: Select the elements from the first set which have a given coverage. [Category: Data Comparison]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input1",           dest="inputFileName1", action="store",                     type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format1",          dest="format1",        action="store",                     type="string", help="format of file 1 [compulsory] [format: transcript file format]")
+    parser.add_option("-j", "--input2",           dest="inputFileName2", action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
+    parser.add_option("-g", "--format2",          dest="format2",        action="store",                     type="string", help="format of file 2 [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",           dest="output",         action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-n", "--minNucleotides",   dest="minNucleotides", action="store",      default=None,  type="int",    help="minimum number of nucleotides overlapping to declare an overlap [format: int]")
+    parser.add_option("-N", "--maxNucleotides",   dest="maxNucleotides", action="store",      default=None,  type="int",    help="maximum number of nucleotides overlapping to declare an overlap [format: int]")
+    parser.add_option("-p", "--minPercent",       dest="minPercent",     action="store",      default=None,  type="int",    help="minimum percentage of nucleotides overlapping to declare an overlap [format: int]")
+    parser.add_option("-P", "--maxPercent",       dest="maxPercent",     action="store",      default=None,  type="int",    help="maximum percentage of nucleotides overlapping to declare an overlap [format: int]")
+    parser.add_option("-e", "--minOverlap",       dest="minOverlap",     action="store",      default=None,  type="int",    help="minimum number of elements from 2nd file to declare an overlap [format: int]")
+    parser.add_option("-E", "--maxOverlap",       dest="maxOverlap",     action="store",      default=None,  type="int",    help="maximum number of elements from 2nd file to declare an overlap [format: int]")
+    parser.add_option("-s", "--strands",          dest="strands",        action="store_true", default=False,                help="consider the two strands separately [format: bool] [default: false]")
+    parser.add_option("-v", "--verbosity",        dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    rfc = RestrictFromCoverage(options.verbosity)
+    rfc.setInputFileName(options.inputFileName1, options.format1, QUERY)
+    rfc.setInputFileName(options.inputFileName2, options.format2, REFERENCE)
+    rfc.setOutputFileName(options.output)
+    rfc.setNbNucleotides(options.minNucleotides, options.maxNucleotides)
+    rfc.setPercent(options.minPercent, options.maxPercent)
+    rfc.setOverlap(options.minOverlap, options.maxOverlap)
+    rfc.setStrands(options.strands)
+    rfc.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/SelectByTag.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,148 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Select the transcript such that a tag value is not less than a given threshold"""
+import os
+import sys
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer import MySqlTranscriptWriter
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.RPlotter import RPlotter
+
+class SelectByTag(object):
+    
+    def __init__(self, verbosity = 1):
+        self.input     = None
+        self.format    = None
+        self.tag       = None
+        self.value     = None
+        self.min       = None
+        self.max       = None
+        self.default   = None
+        self.output    = None
+        self.mysql     = None
+        self.verbosity = verbosity
+
+        self.parser      = None
+        self.writer      = None
+        self.mysqlWriter = None
+        self.nbElements  = None
+        self.nbWritten   = 0
+
+    
+    def setParser(self):
+        self.parser     = TranscriptContainer(self.input, self.format, self.verbosity)
+        self.nbElements = self.parser.getNbTranscripts()
+
+
+    def setWriter(self):
+        self.writer = Gff3Writer(self.output, self.verbosity)
+        if self.mysql:
+            self.mysqlWriter = MySqlTranscriptWriter(self.output, self.verbosity)
+
+
+    def isAccepted(self, transcript):
+        value = transcript.getTagValue(self.tag)
+        if value == None:
+            if self.default != None:
+                value = self.default
+            else:
+                raise Exception("Error! Transcript %s no tag called '%s'" % (transcript, self.tag))
+        if self.value != None:
+            if self.value == str(value):
+                return True
+            return self.value.isdigit() and value == float(self.value)
+        value = float(value)
+        return (self.min == None or self.min <= value) and (self.max == None or self.max >= value)
+
+
+    def readInputFile(self):
+        progress = Progress(self.parser.getNbTranscripts(), "Writing transcripts", self.verbosity)
+        for transcript in self.parser.getIterator():
+            if self.isAccepted(transcript):
+                self.writer.addTranscript(transcript)
+                if self.mysql:
+                    self.mysqlWriter.addTranscript(transcript)
+                self.nbWritten += 1
+            progress.inc()
+        progress.done()
+
+
+    def writeFile(self):
+        self.writer.write()
+        if self.mysql:
+            self.mysqlWriter.write()
+
+    
+    def run(self):
+        self.setParser()
+        self.setWriter()
+        self.readInputFile()
+        self.writeFile()
+        if self.verbosity > 0:
+            print "%d input" % (self.nbElements)
+            if self.nbElements != 0:
+                print "%d output (%.2f%%)" % (self.nbWritten, float(self.nbWritten) / self.nbElements * 100)
+
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Select by Tag v1.0.2: Keep the genomic coordinates such that a the value of a given tag is between two limits. [Category: Data Selection]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of the input [compulsory] [format: transcript file format]")
+    parser.add_option("-g", "--tag", dest="tag", action="store", default=None, type="string", help="the tag [compulsory] [format: string]")     
+    parser.add_option("-a", "--value", dest="value", action="store", default=None, type="string", help="the value to be found [format: string]")     
+    parser.add_option("-m", "--min", dest="min", action="store", default=None, type="float", help="the minimum threshold [format: float]")     
+    parser.add_option("-M", "--max", dest="max", action="store", default=None, type="float", help="the maximum threshold [format: float]")     
+    parser.add_option("-d", "--default", dest="default", action="store", default=None, type="float", help="value if tag is not present [format: float]")     
+    parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-y", "--mysql", dest="mysql", action="store_true", default=False, help="write output into MySQL tables [format: boolean] [default: False]")
+    parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    selectByTag         = SelectByTag(options.verbosity)
+    selectByTag.input   = options.inputFileName
+    selectByTag.format  = options.format
+    selectByTag.tag     = options.tag
+    selectByTag.value   = options.value
+    selectByTag.min     = options.min
+    selectByTag.max     = options.max
+    selectByTag.default = options.default
+    selectByTag.output  = options.outputFileName
+    selectByTag.mysql   = options.mysql
+    selectByTag.run()
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/WrappGetDistribution.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,96 @@
+#! /usr/bin/env python
+from optparse import OptionParser
+import tarfile
+import os
+import re
+import shutil
+import subprocess
+
+SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
+
+def toTar(tarFileName, directory):
+    fileName = os.path.splitext(tarFileName)[0]
+    fileNameBaseName = os.path.basename(fileName)
+    tfile = tarfile.open(fileName + ".tmp.tar", "w")
+    list = os.listdir(directory)
+    for file in list:
+        if re.search(str(fileNameBaseName), file):
+            tfile.add(file)
+    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
+    tfile.close()
+    
+
+if __name__ == "__main__":
+    
+    magnifyingFactor = 1000
+    
+    # parse command line
+    description = "Get Distribution v1.0.1: Get the distribution of the genomic coordinates on a genome. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",     action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",      dest="format",            action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",      dest="outTarFileName",    action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-r", "--reference",   dest="referenceFileName", action="store",      default=None,  type="string", help="file containing the genome [compulsory] [format: file in FASTA format]")
+    parser.add_option("-n", "--nbBins",      dest="nbBins",            action="store",      default=1000,  type="int",    help="number of bins [default: 1000] [format: int]")
+    parser.add_option("-2", "--bothStrands", dest="bothStrands",       action="store_true", default=False,                help="plot one curve per strand [format: bool] [default: false]")
+    parser.add_option("-w", "--raw",         dest="raw",               action="store_true", default=False,                help="plot raw number of occurrences instead of density [format: bool] [default: false]")
+    parser.add_option("-x", "--csv",         dest="csv",               action="store_true", default=False,                help="write a .csv file [format: bool]")
+    parser.add_option("-c", "--chromosome",  dest="chromosome",        action="store",      default=None,  type="string", help="plot only a chromosome [format: string]")
+    parser.add_option("-s", "--start",       dest="start",             action="store",      default=None,  type="int",    help="start from a given region [format: int]")
+    parser.add_option("-e", "--end",         dest="end",               action="store",      default=None,  type="int",    help="end from a given region [format: int]")
+    parser.add_option("-y", "--yMin",        dest="yMin",              action="store",      default=None,  type="int",    help="minimum value on the y-axis to plot [format: int]")
+    parser.add_option("-Y", "--yMax",        dest="yMax",              action="store",      default=None,  type="int",    help="maximum value on the y-axis to plot [format: int]")
+    parser.add_option("-g", "--gff",         dest="gff",               action="store_true", default=False,                help="also write GFF3 file [format: bool] [default: false]")
+    parser.add_option("-H", "--height",      dest="height",            action="store",      default=None,  type="int",    help="height of the graphics [format: int] [default: 300]")
+    parser.add_option("-W", "--width",       dest="width",             action="store",      default=None,  type="int",    help="width of the graphics [format: int] [default: 1000]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",         action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
+    parser.add_option("-l", "--log",         dest="log",               action="store_true", default=False,                help="write a log file [format: bool]")
+    (options, args) = parser.parse_args()
+
+
+    absPath = os.getcwd()
+    print "the current path is :", absPath
+    directory = "/tmp/wrappGetDistribution"
+    print "the dir path is :", directory
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    os.chdir(directory)
+    if options.inputFileName != None and options.format != None and options.outTarFileName != None:
+        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
+        cmd = "python %s/Java/Python/getDistribution.py -i %s -f %s -o %s -D %s" % (SMART_PATH, options.inputFileName, options.format, outputFileName, directory)
+    if options.referenceFileName != None :
+        cmd += " -r %s" % options.referenceFileName
+    if options.nbBins != None :
+        cmd += " -n %s" % options.nbBins
+    if options.chromosome :
+        cmd += " -c %s" % options.chromosome 
+    if options.start != None :
+        cmd += " -s %s" % options.start
+    if options.end != None :
+        cmd += " -e %s" % options.end
+    if options.yMin != None :
+        cmd += " -y %s" % options.yMin
+    if options.yMax != None :
+        cmd += " -Y %s" % options.yMax
+    if options.height != None :
+        cmd += " -H %s" % options.height
+    if options.width != None :
+        cmd += " -W %s" % options.width
+    if options.bothStrands :
+        cmd += " -2" 
+    if options.raw :
+        cmd += " -w" 
+    if options.csv :
+        cmd += " -x" 
+    if options.gff :
+        cmd += " -g"
+    if options.log :
+        cmd += " -l" 
+    print "cmd is: ", cmd    
+    status = subprocess.call(cmd, shell=True)
+    if status != 0:
+            raise Exception("Problem with the execution of command!")
+    toTar(options.outTarFileName, directory)
+    shutil.rmtree(directory)
+    
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/WrappGetReadDistribution.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,58 @@
+#! /usr/bin/env python
+from optparse import OptionParser
+import tarfile
+import os
+import re
+import shutil
+import subprocess
+
+SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
+
+def toTar(tarFileName, directory):
+    fileName = os.path.splitext(tarFileName)[0]
+    fileNameBaseName = os.path.basename(fileName)
+    tfile = tarfile.open(fileName + ".tmp.tar", "w")
+    list = os.listdir(directory)
+    for file in list:
+        if re.search(str(fileNameBaseName), file):
+            tfile.add(file)
+    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
+    tfile.close()
+    
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Get Read Distribution v1.0.1: Plot the number of identical reads and give the most represented. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file sequence [compulsory] [format: file in sequence format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of the file [compulsory] [format: sequence file format]")
+    parser.add_option("-n", "--number",    dest="number",         action="store", default=None, type="int",    help="keep the best n    [format: int]")
+    parser.add_option("-p", "--percent",   dest="percent",        action="store", default=None, type="float",  help="keep the best n\% [format: float]")
+    parser.add_option("-o", "--output",    dest="outTarFileName", action="store",               type="string", help="output file [compulsory] [format: zip]")
+
+    (options, args) = parser.parse_args()
+
+
+    absPath = os.getcwd()
+    print "the current path is :", absPath
+    directory = "/tmp/wrappGetReadDistribution"
+    print "the dir path is :", directory
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    os.chdir(directory)
+    if options.inputFileName != None and options.format != None and options.outTarFileName != None:
+        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
+        cmd = "python %s/Java/Python/getReadDistribution.py -i %s -f %s -o %s -D %s" % (SMART_PATH, options.inputFileName, options.format, outputFileName, directory)
+    if options.number != None :
+        cmd += " -n %s" % options.number
+    if options.percent != None :
+        cmd += " -p %s" % options.percent
+    print "cmd is: ", cmd    
+    status = subprocess.call(cmd, shell=True)
+    if status != 0:
+            raise Exception("Problem with the execution of command!")
+    toTar(options.outTarFileName, directory)
+    shutil.rmtree(directory)
+    
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/WrappPlotCoverage.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,89 @@
+#! /usr/bin/env python
+from optparse import OptionParser
+import tarfile
+import os
+import re
+import shutil
+import subprocess
+
+SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
+
+def toTar(tarFileName, directory):
+    fileName = os.path.splitext(tarFileName)[0]
+    fileNameBaseName = os.path.basename(fileName)
+    tfile = tarfile.open(fileName + ".tmp.tar", "w")
+    list = os.listdir(directory)
+    for file in list:
+        if re.search(str(fileNameBaseName), file):
+            tfile.add(file)
+    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
+    tfile.close()
+
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Plot Coverage v1.0.1: Plot the coverage of the first data with respect to the second one. [Category: Visualization]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input1",       dest="inputFileName1", action="store",                       type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat1", dest="inputFormat1",   action="store",                       type="string", help="format of input file 1 [compulsory] [format: transcript file format]")
+    parser.add_option("-j", "--input2",       dest="inputFileName2", action="store",                       type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
+    parser.add_option("-g", "--inputFormat2", dest="inputFormat2",   action="store",                       type="string", help="format of input file 2 [compulsory] [format: transcript file format]")
+    parser.add_option("-q", "--sequence",     dest="inputSequence",  action="store",      default=None,    type="string", help="input sequence file [format: file in FASTA format] [default: None]")
+    parser.add_option("-o", "--output",       dest="outTarFileName", action="store",                       type="string", help="output file [compulsory] [format: output file in zip format]")
+    parser.add_option("-w", "--width",        dest="width",          action="store",      default=1500,    type="int",    help="width of the plots (in px) [format: int] [default: 1500]")
+    parser.add_option("-e", "--height",       dest="height",         action="store",      default=1000,    type="int",    help="height of the plots (in px) [format: int] [default: 1000]")
+    parser.add_option("-t", "--title",        dest="title",          action="store",      default="",      type="string", help="title of the plots [format: string]")
+    parser.add_option("-x", "--xlab",         dest="xLabel",         action="store",      default="",      type="string", help="label on the x-axis [format: string]")
+    parser.add_option("-y", "--ylab",         dest="yLabel",         action="store",      default="",      type="string", help="label on the y-axis [format: string]")
+    parser.add_option("-p", "--plusColor",    dest="plusColor",      action="store",      default="red",   type="string", help="color for the elements on the plus strand [format: string] [default: red]")
+    parser.add_option("-m", "--minusColor",   dest="minusColor",     action="store",      default="blue",  type="string", help="color for the elements on the minus strand [format: string] [default: blue]")
+    parser.add_option("-s", "--sumColor",     dest="sumColor",       action="store",      default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]")
+    parser.add_option("-l", "--lineColor",    dest="lineColor",      action="store",      default="black", type="string", help="color for the lines [format: string] [default: black]")
+    parser.add_option("-1", "--merge",        dest="merge",          action="store_true", default=False,                  help="merge the 2 plots in 1 [format: boolean] [default: false]")
+    parser.add_option("-v", "--verbosity",    dest="verbosity",      action="store",      default=1,       type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    absPath = os.getcwd()
+    directory = "/tmp/wrappPlotCov"
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    os.chdir(directory)
+    if options.inputFileName1 != None and options.inputFormat1 != None and options.inputFileName2 != None and options.inputFormat2 != None and options.outTarFileName != None:
+        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
+        print 'outputfile is :', outputFileName
+        cmd = "python %s/Java/Python/plotCoverage.py -i %s -f %s -j %s -g %s -o %s -D %s" % (SMART_PATH, options.inputFileName1, options.inputFormat1, options.inputFileName2, options.inputFormat2, outputFileName, directory)
+    if options.inputSequence!= None:
+        cmd += " -q %s" % options.inputSequence
+    if options.width != None:
+        cmd += " -w %s" % options.width
+    if options.height != None:
+        cmd += " -e %s" % options.height
+    if options.title != None:
+        cmd += " -t %s" % options.title
+    if options.xLabel != None:
+        cmd += " -x %s" % options.xLabel
+    if options.yLabel != None:
+        cmd += " -y %s" % options.yLabel
+    if options.plusColor != None:
+        cmd += " -p %s" % options.plusColor
+    if options.minusColor != None:
+        cmd += " -m %s" % options.minusColor
+    if options.sumColor != None:
+        cmd += " -s %s" % options.sumColor
+    if options.lineColor != None:
+        cmd += " -l %s" % options.lineColor
+    if options.merge:
+        cmd += " -1"
+    status = subprocess.call(cmd, shell=True)
+    if status != 0:
+            raise Exception("Problem with the execution of command!")
+    toTar(options.outTarFileName, directory)
+    shutil.rmtree(directory)
+
+ 
+
+
+        
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/WrappPlotRepartition.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,71 @@
+#! /usr/bin/env python
+from optparse import OptionParser
+import tarfile
+import os
+import re
+import shutil
+import subprocess
+
+SMART_PATH = "%sSMART" % os.environ["REPET_PATH"]
+
+def toTar(tarFileName, directory):
+    fileName = os.path.splitext(tarFileName)[0]
+    fileNameBaseName = os.path.basename(fileName)
+    tfile = tarfile.open(fileName + ".tmp.tar", "w")
+    list = os.listdir(directory)
+    for file in list:
+        if re.search(str(fileNameBaseName), file):
+            tfile.add(file)
+    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
+    tfile.close()
+    
+
+if __name__ == "__main__":
+    
+    magnifyingFactor = 1000
+    
+    # parse command line
+    description = "Plot the repartition of different data on a whole genome. (This tool uses 1 input file only, the different values being stored in the tags.    See documentation to know more about it.) [Category: Visualization]"
+
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",dest="inputFileName",action="store",type="string",help="input file name [compulsory] [format: file in GFF3 format]")
+    parser.add_option("-n", "--names",dest="names", action="store", type="string", help="name for the tags (separated by commas and no space) [compulsory] [format: string]")
+    parser.add_option("-o", "--output",dest="outTarFileName",action="store",type="string", help="output file [compulsory] [format: output file tar format]")
+    parser.add_option("-c", "--color",dest="colors",action="store",default=None,type="string", help="color of the lines (separated by commas and no space) [format: string]")
+    parser.add_option("-f", "--format",dest="format",action="store",default="png",type="string", help="format of the output file [format: string] [default: png]")
+    parser.add_option("-r", "--normalize",dest="normalize",action="store_true", default=False,help="normalize data (when panels are different) [format: bool] [default: false]")
+    parser.add_option("-l", "--log",dest="log",action="store",default="",type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string]")
+    parser.add_option("-v", "--verbosity",dest="verbosity",action="store",default=1,type="int",help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+
+    absPath = os.getcwd()
+    print "the current path is :", absPath
+    directory = "/tmp/wrappPlotRepartition"
+    print "the dir path is :", directory
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    os.chdir(directory)
+    if options.inputFileName != None and options.format != None and options.outTarFileName != None:
+        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
+        cmd = "python %s/Java/Python/plotRepartition.py -i %s -o %s -D %s" % (SMART_PATH, options.inputFileName, outputFileName, directory)
+    if options.names != None :
+        cmd += " -n %s" % options.names
+    else: print "You must choose tag names !"
+    if options.colors != None :
+        cmd += " -c %s" % options.colors
+    if options.format != None:
+        cmd += " -f %s" % options.format
+    if options.normalize :
+        cmd += " -r " 
+    if options.log != "" :
+        cmd += " -l %s" % options.log
+    
+    print "cmd is: ", cmd    
+    status = subprocess.call(cmd, shell=True)
+    if status != 0:
+            raise Exception("Problem with the execution of command!")
+    toTar(options.outTarFileName, directory)
+    shutil.rmtree(directory)
+    
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/adaptorStripper.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,115 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Remove adaptors"""
+
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.Sequence import Sequence
+from SMART.Java.Python.structure.SequenceList import SequenceList
+from commons.core.parsing.FastaParser import FastaParser
+from commons.core.writer.FastaWriter import FastaWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+def distance (string1, string2):
+    if len(string1) != len(string2):
+        return None
+    distance = 0
+    for i in range(0, len(string1)):
+        if string1[i] != string2[i]:
+            distance += 1
+    return distance
+
+
+
+if __name__ == "__main__":
+    nbRemaining = 0
+    
+    # parse command line
+    description = "Adaptor Stripper v1.0.1: Remove the adaptor of a list of reads. [Category: Personnal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",         dest="inputFileName",      action="store",                     type="string", help="input file [compulsory] [format: file in FASTA format]")
+    parser.add_option("-o", "--output",        dest="outputFileName",     action="store",                     type="string", help="output file [compulsory] [format: output file in FASTA format]")
+    parser.add_option("-5", "--5primeAdaptor", dest="fivePrimeAdaptor",   action="store",                     type="string", help="five prime adaptor [format: string]")
+    parser.add_option("-3", "--3primeAdaptor", dest="threePrimeAdaptor",  action="store",                     type="string", help="three prime adaptor [format: string]")
+    parser.add_option("-d", "--5primeDist",    dest="fivePrimeDistance",  action="store",      default=3,     type="int",    help="five prime distance [format: int] [default: 3]")
+    parser.add_option("-e", "--3primeDist",    dest="threePrimeDistance", action="store",      default=3,     type="int",    help="three prime distance [format: int [default: 3]]")
+    parser.add_option("-m", "--3primeSize",    dest="threePrimeSize",     action="store",      default=10,    type="int",    help="three prime size [format: int] [default: 10]")
+    parser.add_option("-v", "--verbosity",     dest="verbosity",          action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
+    parser.add_option("-l", "--log",           dest="log",                action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    if options.log:
+        logHandle = open(options.outputFileName + ".log", "w")
+
+
+    writer         = FastaWriter(options.outputFileName + ".fas", options.verbosity)
+    sequenceParser = FastaParser(options.inputFileName, options.verbosity)
+    nbSequences    = sequenceParser.getNbSequences()
+
+    # treat sequences
+    progress = Progress(sequenceParser.getNbSequences(), "Analyzing " + options.inputFileName, options.verbosity)
+    for sequence in sequenceParser.getIterator():
+        fivePrimeAdaptor  = sequence.getSequence()[0:len(options.fivePrimeAdaptor)]
+        threePrimeAdaptor = sequence.getSequence()[len(sequence.sequence)-len(options.threePrimeAdaptor):]
+
+        # check 5' adaptor
+        fivePrimeDistance = distance(fivePrimeAdaptor, options.fivePrimeAdaptor)
+        # check 3' adaptor
+        threePrimeDistance = len(threePrimeAdaptor)
+        for i in range(options.threePrimeSize, len(threePrimeAdaptor)+1):
+            threePrimeDistance = min(threePrimeDistance, distance(threePrimeAdaptor[-i:], options.threePrimeAdaptor[:i]))
+
+        # sort candidates
+        if fivePrimeDistance > options.fivePrimeDistance:
+            if options.log:
+                logHandle.write("Sequence %s does not start with the right adaptor (%s != %s)\n" % (sequence.getSequence(), fivePrimeAdaptor, options.fivePrimeAdaptor))
+        elif threePrimeDistance > options.threePrimeDistance:
+            if options.log:
+                logHandle.write("Sequence %s does not end with the right adaptor (%s != %s)\n" % (sequence.getSequence(), threePrimeAdaptor, options.threePrimeAdaptor))
+        else:
+            nbRemaining += 1
+            sequence.setSequence(sequence.getSequence()[len(options.fivePrimeAdaptor):len(sequence.getSequence())-len(options.threePrimeAdaptor)])
+            writer.addSequence(sequence)
+
+        progress.inc()
+
+    progress.done()
+
+    if options.log:
+        logHandle.close()
+
+    writer.write()
+
+    print "kept %i over %i (%.f%%)" % (nbRemaining, nbSequences, float(nbRemaining) / nbSequences * 100)
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/changeGffFeatures.sh	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,2 @@
+#!/bin/bash
+sed "s/\t$2\t/\t$3\t/g" $1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/changeTagName.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,90 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Change the name of a tag
+"""
+
+import os
+import random
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Change Tag Name v1.0.1: Change the name of tag of a list of transcripts. [Category: Data Modification]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                      type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                      type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-t", "--tag",         dest="tag",            action="store",                      type="string", help="name of the tag to change [compulsory] [format: string]")
+    parser.add_option("-n", "--name",        dest="name",           action="store",                      type="string", help="new name for the tag [compulsory] [format: string]")
+    parser.add_option("-y", "--mysql",       dest="mysql",          action="store_true", default=False,                 help="mySQL output [format: bool] [default: false]")    
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,      type="int",    help="trace level [format: int] [default: 1]")
+    parser.add_option("-l", "--log",         dest="log",            action="store_true", default=False,                 help="write a log file [format: bool] [default: false]")
+    (options, args) = parser.parse_args()
+
+    if options.log:
+        logHandle = open("%s.log" % options.outputFileName, "w")
+
+    # create parser and writer(s)
+    parser      = TranscriptContainer(options.inputFileName, options.inputFormat, options.verbosity)
+    tmpFileName = "tmpTranscriptFile%d.gff3" % (random.randint(0, 100000))
+    writer      = Gff3Writer(tmpFileName, options.verbosity)
+    if options.mysql:
+        mysqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)
+    outputData = {}
+        
+    # process transcripts
+    progress = Progress(parser.getNbTranscripts(), "Printing transcripts %s" % (options.inputFileName), options.verbosity)
+    for transcript in parser.getIterator():
+        if options.tag in transcript.tags:
+            value = transcript.tags[options.tag]
+            del transcript.tags[options.tag]
+            transcript.tags[options.name] = value
+        writer.addTranscript(transcript)
+        if options.mysql:
+            mysqlWriter.addTranscript(transcript)
+        progress.inc()
+    progress.done()
+    parser.transcriptListParser.close()
+
+    writer.write()
+
+    if options.mysql:
+        mysqlWriter.write()
+
+    os.rename(tmpFileName, options.outputFileName)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleanGff.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,195 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Clean a GFF file (as given by NCBI or TAIR) and outputs a GFF3 file.
+"""
+
+import os
+import re
+from optparse import OptionParser
+from commons.core.parsing.GffParser import *
+from SMART.Java.Python.misc.RPlotter import *
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+count = {}
+
+class ParsedLine(object):
+    def __init__(self, line, cpt):
+        self.line = line
+        self.cpt  = cpt
+        self.parse()
+
+    def parse(self):
+        self.line = self.line.strip()
+        self.splittedLine = self.line.split(None, 8)
+        if len(self.splittedLine) < 9:
+            raise Exception("Line '%s' has less than 9 fields.  Exiting..." % (self.line))
+        self.type = self.splittedLine[2]
+        self.parseOptions()
+        self.getId()
+        self.getParents()
+
+    def parseOptions(self):
+        self.parsedOptions = {}
+        for option in self.splittedLine[8].split(";"):
+            option = option.strip()
+            if option == "": continue
+            posSpace = option.find(" ")
+            posEqual = option.find("=")
+            if posEqual != -1 and (posEqual < posSpace or posSpace == -1):
+                key, value = option.split("=", 1)
+            elif posSpace != -1:
+                key, value = option.split(None, 1)
+            else:
+                key   = "ID"
+                value = option
+            self.parsedOptions[key.strip()] = value.strip(" \"")
+
+    def getId(self):
+        for key in self.parsedOptions:
+            if key.lower() == "id":
+                self.id = self.parsedOptions[key]
+                return
+        if "Parent" in self.parsedOptions:
+            parent = self.parsedOptions["Parent"].split(",")[0]
+            if parent not in count:
+                count[parent] = {}
+            if self.type not in count[parent]:
+                count[parent][self.type] = 0
+            count[parent][self.type] += 1
+            self.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])
+        else:
+            self.id = "smart%d" % (self.cpt)
+        self.parsedOptions["ID"] = self.id
+
+    def getParents(self):
+        for key in self.parsedOptions:
+            if key.lower() in ("parent", "derives_from"):
+                self.parents = self.parsedOptions[key].split(",")
+                return
+        self.parents = None
+
+    def removeParent(self):
+        for key in self.parsedOptions.keys():
+            if key.lower() in ("parent", "derives_from"):
+                del self.parsedOptions[key]
+
+    def export(self):
+        self.splittedLine[8] = ";".join(["%s=%s" % (key, value) for key, value in self.parsedOptions.iteritems()])
+        return "%s\n" % ("\t".join(self.splittedLine))
+
+
+class CleanGff(object):
+
+    def __init__(self, verbosity = 1):
+        self.verbosity = verbosity
+        self.lines         = {}
+        self.acceptedTypes = []
+        self.parents       = []
+        self.children      = {}
+
+    def setInputFileName(self, name):
+        self.inputFile = open(name)
+        
+    def setOutputFileName(self, name):
+        self.outputFile = open(name, "w")
+
+    def setAcceptedTypes(self, types):
+        self.acceptedTypes = types
+
+    def parse(self):
+        progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
+        for cpt, line in enumerate(self.inputFile):
+            if not line or line[0] == "#": continue
+            if line[0] == ">": break
+            parsedLine = ParsedLine(line, cpt)
+            if parsedLine.type in self.acceptedTypes:
+                self.lines[parsedLine.id] = parsedLine
+            progress.inc()
+        progress.done()
+
+    def sort(self):
+        progress = Progress(len(self.lines.keys()), "Sorting file", self.verbosity)
+        for line in self.lines.values():
+            parentFound = False
+            if line.parents:
+                for parent in line.parents:
+                    if parent in self.lines:
+                        parentFound = True
+                        if parent in self.children:
+                            self.children[parent].append(line)
+                        else:
+                            self.children[parent] = [line]
+            if not parentFound:
+                line.removeParent()
+                self.parents.append(line)
+            progress.inc()
+        progress.done()
+
+    def write(self):
+        progress = Progress(len(self.parents), "Writing output file", self.verbosity)
+        for line in self.parents:
+            self.writeLine(line)
+            progress.inc()
+        self.outputFile.close()
+        progress.done()
+
+    def writeLine(self, line):
+        self.outputFile.write(line.export())
+        if line.id in self.children:
+            for child in self.children[line.id]:
+                self.writeLine(child)
+
+    def run(self):
+        self.parse()
+        self.sort()
+        self.write()
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Clean GFF v1.0.3: Clean a GFF file (as given by NCBI) and outputs a GFF3 file. [Category: Other]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                      type="string", help="input file name [compulsory] [format: file in GFF format]")
+    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-t", "--types",     dest="types",          action="store", default="mRNA,exon", type="string", help="list of comma-separated types that you want to keep [format: string] [default: mRNA,exon]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,           type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    cleanGff = CleanGff(options.verbosity)
+    cleanGff.setInputFileName(options.inputFileName)
+    cleanGff.setOutputFileName(options.outputFileName)
+    cleanGff.setAcceptedTypes(options.types.split(","))
+    cleanGff.run()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/CleanerChooser.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,80 @@
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
+from SMART.Java.Python.cleaning.GffCleaner import GffCleaner
+from SMART.Java.Python.cleaning.GtfCleaner import GtfCleaner
+from SMART.Java.Python.cleaning.DefaultCleaner import DefaultCleaner
+
+#Attention!! Do not delete the imports!! They are used to know the type of file format!!!
+
+class CleanerChooser(object):
+	"""
+	A class that finds the correct cleaner
+	@ivar format: the format
+	@type format: string
+	@ivar cleaner: the parser
+	@type cleaner: object
+	@ivar cleanerClass: the class of the parser
+	@type cleanerClass: class
+	@ivar verbosity: verbosity
+	@type verbosity: int		
+	"""
+
+	def __init__(self, verbosity = 0):
+		"""
+		Constructor
+		@param verbosity: verbosity
+		@type verbosity: int
+		"""
+		self.verbosity = verbosity
+	
+
+	def findFormat(self, format):
+		"""
+		Find the correct parser
+		@ivar format: the format
+		@type format: string
+		@return: a cleaner
+		"""
+		for cleanerClass in TranscriptListCleaner.__subclasses__():
+			if cleanerClass != None:
+				if cleanerClass.getFileFormats() != None and format in cleanerClass.getFileFormats():
+					self.cleanerClass = cleanerClass
+					return
+		self.cleanerClass = DefaultCleaner
+
+
+	def getCleaner(self):
+		"""
+		Get the parser previously found
+		@return: the parser
+		"""
+		return self.cleanerClass(self.verbosity)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/DefaultCleaner.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,45 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Default cleaner. Does nothing but copying.
+"""
+from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+
+class DefaultCleaner(TranscriptListCleaner):
+
+	def __init__(self, verbosity = 1):
+		super(DefaultCleaner, self).__init__(verbosity)
+
+	def _clean(self):
+		self.outputHandle.write(self.inputHandle.read())
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/GffCleaner.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,168 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Clean a GFF file (as given by NCBI or TAIR) and outputs a GFF3 file.
+"""
+
+from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+count = {}
+
+class ParsedLine(object):
+	def __init__(self, line, cpt):
+		self.line = line
+		self.cpt  = cpt
+		self.parse()
+
+	def parse(self):
+		self.line = self.line.strip()
+		self.splittedLine = self.line.split(None, 8)
+		if len(self.splittedLine) < 9:
+			raise Exception("Line '%s' has less than 9 fields.  Exiting..." % (self.line))
+		self.type = self.splittedLine[2]
+		self.parseOptions()
+		self.getId()
+		self.getParents()
+
+	def parseOptions(self):
+		self.parsedOptions = {}
+		for option in self.splittedLine[8].split(";"):
+			option = option.strip()
+			if option == "": continue
+			posSpace = option.find(" ")
+			posEqual = option.find("=")
+			if posEqual != -1 and (posEqual < posSpace or posSpace == -1):
+				key, value = option.split("=", 1)
+			elif posSpace != -1:
+				key, value = option.split(None, 1)
+			else:
+				key   = "ID"
+				value = option
+			self.parsedOptions[key.strip()] = value.strip(" \"")
+
+	def getId(self):
+		for key in self.parsedOptions:
+			if key.lower() == "id":
+				self.id = self.parsedOptions[key]
+				return
+		if "Parent" in self.parsedOptions:
+			parent = self.parsedOptions["Parent"].split(",")[0]
+			if parent not in count:
+				count[parent] = {}
+			if self.type not in count[parent]:
+				count[parent][self.type] = 0
+			count[parent][self.type] += 1
+			self.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])
+		else:
+			self.id = "smart%d" % (self.cpt)
+		self.parsedOptions["ID"] = self.id
+
+	def getParents(self):
+		for key in self.parsedOptions:
+			if key.lower() in ("parent", "derives_from"):
+				self.parents = self.parsedOptions[key].split(",")
+				return
+		self.parents = None
+
+	def removeParent(self):
+		for key in self.parsedOptions.keys():
+			if key.lower() in ("parent", "derives_from"):
+				del self.parsedOptions[key]
+
+	def export(self):
+		self.splittedLine[8] = ";".join(["%s=%s" % (key, value) for key, value in self.parsedOptions.iteritems()])
+		return "%s\n" % ("\t".join(self.splittedLine))
+
+
+class GffCleaner(TranscriptListCleaner):
+
+	def __init__(self, verbosity = 1):
+		super(GffCleaner, self).__init__(verbosity)
+		self.lines		 = {}
+		self.acceptedTypes = ["mRNA", "transcript", "exon"]
+		self.parents	   = []
+		self.children	  = {}
+
+	def getFileFormats():
+		return ["gff", "gff2", "gff3"]
+	getFileFormats = staticmethod(getFileFormats)
+
+	def setAcceptedTypes(self, types):
+		self.acceptedTypes = types
+
+	def parse(self):
+		progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
+		for cpt, line in enumerate(self.inputHandle):
+			if not line or line[0] == "#": continue
+			if line[0] == ">": break
+			parsedLine = ParsedLine(line, cpt)
+			if self.acceptedTypes == None or parsedLine.type in self.acceptedTypes:
+				self.lines[parsedLine.id] = parsedLine
+			progress.inc()
+		progress.done()
+
+	def sort(self):
+		progress = Progress(len(self.lines.keys()), "Sorting file", self.verbosity)
+		for line in self.lines.values():
+			parentFound = False
+			if line.parents:
+				for parent in line.parents:
+					if parent in self.lines:
+						parentFound = True
+						if parent in self.children:
+							self.children[parent].append(line)
+						else:
+							self.children[parent] = [line]
+			if not parentFound:
+				line.removeParent()
+				self.parents.append(line)
+			progress.inc()
+		progress.done()
+
+	def write(self):
+		progress = Progress(len(self.parents), "Writing output file", self.verbosity)
+		for line in self.parents:
+			self.writeLine(line)
+			progress.inc()
+		progress.done()
+
+	def writeLine(self, line):
+		self.outputHandle.write(line.export())
+		if line.id in self.children:
+			for child in self.children[line.id]:
+				self.writeLine(child)
+
+	def _clean(self):
+		self.parse()
+		self.sort()
+		self.write()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/GtfCleaner.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,121 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Clean a GTF file
+"""
+
+import shlex
+from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+count = {}
+
+class ParsedLine(object):
+	def __init__(self, line, cpt):
+		self.line = line
+		self.cpt  = cpt
+		self.parse()
+
+	def parse(self):
+		self.line = self.line.strip()
+		self.splittedLine = self.line.split(None, 8)
+		if len(self.splittedLine) < 9:
+			raise Exception("Line '%s' has less than 9 fields.  Exiting..." % (self.line))
+		self.type = self.splittedLine[2]
+		self.parseOptions()
+
+	def parseOptions(self):
+		self.parsedOptions = {}
+		key   = None
+		value = ""
+		for option in shlex.split(self.splittedLine[8]):
+			option = option.strip()
+			if option == "": continue
+			if key == None:
+				key = option
+			else:
+				endValue = False
+				if option[-1] == ";":
+					endValue = True
+					option.rstrip(";")
+				value = "%s \"%s\"" % (value, option)
+				if endValue:
+					self.parsedOptions[key] = value
+					if key == "transcript_id":
+						self.transcriptId = value
+					key   = None
+					value = ""
+
+	def export(self):
+		return "%s\n" % (self.line)
+
+
+class GtfCleaner(TranscriptListCleaner):
+
+	def __init__(self, verbosity = 1):
+		super(GtfCleaner, self).__init__(verbosity)
+		self.acceptedTypes = ["exon"]
+		self.parents	   = {}
+
+	def getFileFormats():
+		return ["gtf"]
+	getFileFormats = staticmethod(getFileFormats)
+
+	def setAcceptedTypes(self, types):
+		self.acceptedTypes = types
+
+	def parse(self):
+		progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
+		for cpt, line in enumerate(self.inputHandle):
+			if not line or line[0] == "#": continue
+			parsedLine = ParsedLine(line, cpt)
+			if self.acceptedTypes == None or parsedLine.type in self.acceptedTypes:
+				transcriptId = parsedLine.transcriptId
+				if transcriptId not in self.parents:
+					self.parents[parsedLine.transcriptId] = [parsedLine]
+				else:
+					self.parents[parsedLine.transcriptId].append(parsedLine)
+			progress.inc()
+		progress.done()
+
+	def write(self):
+		progress = Progress(len(self.parents.keys()), "Writing output file", self.verbosity)
+		for parent in sorted(self.parents.keys()):
+			for line in self.parents[parent]:
+				self.outputHandle.write(line.export())
+			progress.inc()
+		progress.done()
+
+	def _clean(self):
+		self.parse()
+		self.write()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/cleaning/TranscriptListCleaner.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,63 @@
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import sys
+from SMART.Java.Python.structure.TranscriptList import TranscriptList
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+class TranscriptListCleaner(object):
+	"""A (quite generic) class that cleans a file containing transcripts"""
+
+	def __init__(self, verbosity = 0):
+		self.verbosity = verbosity
+
+	def setInputFileName(self, fileName):
+		try:
+			self.inputHandle = open(fileName)
+		except IOError:
+			raise Exception("Error! Transcript file '%s' does not exist! Exiting..." % (self.fileName))
+
+	def setOutputFileName(self, fileName):
+		try:
+			self.outputHandle = open(fileName, "w")
+		except IOError:
+			raise Exception("Error! Transcript file '%s' does not exist! Exiting..." % (self.fileName))
+
+	def getFileFormats():
+		pass
+	getFileFormats = staticmethod(getFileFormats)
+
+	def close(self):
+		self.inputHandle.close()
+		self.outputHandle.close()
+
+	def clean(self):
+		self._clean()
+		self.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/clusterize.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,185 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from commons.core.writer.WriterChooser import WriterChooser
+"""Clusterize a set of transcripts"""
+
+import os, os.path, random
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.Gff3Writer import Gff3Writer
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
+from SMART.Java.Python.ncList.FileSorter import FileSorter
+from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+
+class Clusterize(object):
+
+	def __init__(self, verbosity):
+		self.normalize		 = False
+		self.presorted		 = False
+		self.distance		  = 1
+		self.colinear		  = False
+		self.nbWritten		 = 0
+		self.nbMerges		  = 0
+		self.verbosity		 = verbosity
+		self.splittedFileNames = {}
+
+	def __del__(self):
+		for fileName in self.splittedFileNames.values():
+			os.remove(fileName)
+
+	def setInputFile(self, fileName, format):
+		parserChooser = ParserChooser(self.verbosity)
+		parserChooser.findFormat(format)
+		self.parser = parserChooser.getParser(fileName)
+		self.sortedFileName = "%s_sorted_%d.pkl" % (os.path.splitext(fileName)[0], random.randint(1, 100000))
+		if "SMARTTMPPATH" in os.environ:
+			self.sortedFileName = os.path.join(os.environ["SMARTTMPPATH"], os.path.basename(self.sortedFileName))
+
+	def setOutputFileName(self, fileName, format="gff3", title="S-MART", feature="transcript", featurePart="exon"):
+		writerChooser = WriterChooser()
+		writerChooser.findFormat(format)
+		self.writer = writerChooser.getWriter(fileName)
+		self.writer.setTitle(title)
+		self.writer.setFeature(feature)
+		self.writer.setFeaturePart(featurePart)
+
+	def setDistance(self, distance):
+		self.distance = distance
+
+	def setColinear(self, colinear):
+		self.colinear = colinear
+
+	def setNormalize(self, normalize):
+		self.normalize = normalize
+		
+	def setPresorted(self, presorted):
+		self.presorted = presorted
+
+	def _sortFile(self):
+		if self.presorted:
+			return
+		fs = FileSorter(self.parser, self.verbosity-4)
+		fs.perChromosome(True)
+		fs.setPresorted(self.presorted)
+		fs.setOutputFileName(self.sortedFileName)
+		fs.sort()
+		self.splittedFileNames       = fs.getOutputFileNames()
+		self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()
+		self.nbElements              = fs.getNbElements()
+		
+	def _iterate(self, chromosome):
+		if chromosome == None:
+			progress = UnlimitedProgress(10000, "Reading input file", self.verbosity)
+			parser   = self.parser
+		else:
+			progress = Progress(self.nbElementsPerChromosome[chromosome], "Checking chromosome %s" % (chromosome), self.verbosity)
+			parser   = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)
+		transcripts     = []
+		self.nbElements = 0
+		for newTranscript in parser.getIterator():
+			newTranscripts = []
+			if newTranscript.__class__.__name__ == "Mapping":
+				newTranscript = newTranscript.getTranscript()
+			for oldTranscript in transcripts:
+				if self._checkOverlap(newTranscript, oldTranscript):
+					self._merge(newTranscript, oldTranscript)
+				elif self._checkPassed(newTranscript, oldTranscript):
+					self._write(oldTranscript)
+				else:
+					newTranscripts.append(oldTranscript)
+			newTranscripts.append(newTranscript)
+			transcripts = newTranscripts
+			self.nbElements += 1
+			progress.inc()
+		for transcript in transcripts:
+			self._write(transcript)
+		progress.done()
+
+	def _merge(self, transcript1, transcript2):
+		self.nbMerges += 1
+		transcript2.setDirection(transcript1.getDirection())
+		transcript1.merge(transcript2)
+
+	def _write(self, transcript):
+		self.nbWritten += 1
+		self.writer.addTranscript(transcript)
+
+	def _checkOverlap(self, transcript1, transcript2):
+		if transcript1.getChromosome() != transcript2.getChromosome():
+			return False
+		if self.colinear and transcript1.getDirection() != transcript2.getDirection():
+			return False
+		if transcript1.getDistance(transcript2) > self.distance:
+			return False
+		return True
+
+	def _checkPassed(self, transcript1, transcript2):
+		return ((transcript1.getChromosome() != transcript2.getChromosome()) or (transcript1.getDistance(transcript2) > self.distance))
+
+	def run(self):
+		self._sortFile()
+		if self.presorted:
+			self._iterate(None)
+		else:
+			for chromosome in sorted(self.splittedFileNames.keys()):
+				self._iterate(chromosome)
+		self.writer.close()
+		if self.verbosity > 0:
+			print "# input:   %d" % (self.nbElements)
+			print "# written: %d (%d%% overlaps)" % (self.nbWritten, 0 if (self.nbElements == 0) else ((float(self.nbWritten) / self.nbElements) * 100))
+			print "# merges:  %d" % (self.nbMerges)
+		
+
+if __name__ == "__main__":
+	description = "Clusterize v1.0.3: clusterize the data which overlap. [Category: Merge]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input",     dest="inputFileName",  action="store",				     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format",    dest="format",		 action="store",				     type="string", help="format of file [format: transcript file format]")
+	parser.add_option("-o", "--output",    dest="outputFileName", action="store",				     type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
+	parser.add_option("-u", "--outputFormat", dest="outputFormat", action="store",     default="gff",		     type="string", help="output file format [format: transcript file format]")
+	parser.add_option("-c", "--colinear",  dest="colinear",       action="store_true", default=False,				help="merge colinear transcripts only [format: bool] [default: false]")
+	parser.add_option("-d", "--distance",  dest="distance",       action="store",      default=0,     type="int",    help="max. distance between two transcripts to be merged [format: int] [default: 0]")
+	parser.add_option("-n", "--normalize", dest="normalize",      action="store_true", default=False,				help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
+	parser.add_option("-s", "--sorted",    dest="sorted",		 action="store_true", default=False,				help="input is already sorted [format: bool] [default: false]")
+	parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
+	(options, args) = parser.parse_args()
+
+	c = Clusterize(options.verbosity)
+	c.setInputFile(options.inputFileName, options.format)
+	c.setOutputFileName(options.outputFileName, options.outputFormat)
+	c.setColinear(options.colinear)
+	c.setDistance(options.distance)
+	c.setNormalize(options.normalize)
+	c.setPresorted(options.sorted)
+	c.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/clusterizeBySlidingWindows.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,344 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+import re
+from commons.core.writer.WriterChooser import WriterChooser
+"""
+Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.
+"""
+
+import os, os.path
+from optparse import OptionParser
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from SMART.Java.Python.misc.Progress import Progress
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+class ClusterizeBySlidingWindows(object):
+
+    def __init__(self, verbosity = 0):
+        self.verbosity = verbosity
+        self.strands   = (0, )
+        self.normalize = False
+        self.plot      = None
+        self.excel     = None
+        self.outputFileName = ''
+        self.defaultValue = None
+
+    def __del__(self):
+        pass
+
+    def setInputFile(self, fileName, format):
+        self.parser = TranscriptContainer(fileName, format, self.verbosity)
+
+    def setOutputFileName(self, fileName, format="gff", title="S-MART", feature="transcript", featurePart="exon"):
+        writerChooser = WriterChooser(self.verbosity)
+        writerChooser.findFormat(format)
+        self.writer = writerChooser.getWriter(fileName)
+        self.writer.setTitle(title)
+        self.writer.setFeature(feature)
+        self.writer.setFeaturePart(featurePart)
+#        self.outputFileName = fileName
+#        self.outputFormat = format
+
+    def setWindowSize(self, size):
+        self.size = size
+
+    def setWindowOverlap(self, overlap):
+        self.overlap = overlap
+
+    def setTag(self, tag):
+        self.tag = tag
+
+    def setOperation(self, operation):
+        self.operation = operation
+
+    def setBothStrands(self, bothStrands):
+        if bothStrands:
+            self.strands = (-1, 1)
+
+    def setNormalize(self, normalize):
+        self.normalize = normalize
+
+    def setPlot(self, plot):
+        self.plot = plot
+
+    def setExcel(self, excel):
+        self.excel = excel
+
+    def setOutputTag(self, tag):
+        self.outputTagName = tag
+        
+    def setDefaultValue(self, defaultValue):
+        self.defaultValue = defaultValue
+
+    def checkOptions(self):
+#        if self.operation != None:
+#            raise Exception("Trying to combine the values without specifying tag! Aborting...")
+        if self.operation != None and self.operation not in ("sum", "avg", "med", "min", "max"):
+            raise Exception("Do not understand tag '%s'! Aborting..." % (self.operation))
+
+    def getChromosomeSizes(self):
+        self.sizes = {}
+        progress = Progress(self.parser.getNbTranscripts(), "Getting sizes in genome", self.verbosity)
+        for transcript in self.parser.getIterator():
+            self.sizes[transcript.getChromosome()] = max(transcript.getStart(), self.sizes.get(transcript.getChromosome(), 0))
+            progress.inc()
+        progress.done()
+
+    def getBinsFromPos(self, pos):
+        bin = (pos - 1) / (self.size - self.overlap)
+        if bin >= 1 and pos <= bin * (self.size - self.overlap) + self.overlap:
+            return (bin - 1, bin)
+        return (bin, )
+
+    def getPosFromBin(self, bin):
+        return (bin * (self.size - self.overlap) + 1, bin * (self.size - self.overlap) + self.size)
+
+    def initializeBins(self):
+        self.binsPerStrand        = {}
+        self.sumsPerStrand        = {}
+        self.valuesPerStrand      = {}
+        self.toBePlottedPerStrand = {}
+        for strand in self.strands:
+            self.binsPerStrand[strand]        = {}
+            self.sumsPerStrand[strand]        = {}
+            self.valuesPerStrand[strand]      = {}
+            self.toBePlottedPerStrand[strand] = {}
+            for chromosome in self.sizes:
+                binRange = range(self.getBinsFromPos(self.sizes[chromosome])[-1] + 1)
+                self.binsPerStrand[strand][chromosome]        = dict([[i, 0]   for i in binRange])
+                self.sumsPerStrand[strand][chromosome]        = dict([[i, 0.0] for i in binRange])
+                self.valuesPerStrand[strand][chromosome]      = dict([[i, []]  for i in binRange])
+                self.toBePlottedPerStrand[strand][chromosome] = dict([[i, 0] for i in binRange])
+
+    def getNbElements(self, transcript):
+        nbOccurrences = 1 if "nbOccurrences" not in transcript.getTagNames() else transcript.getTagValue("nbOccurrences")
+        nbElements    = 1 if "nbElements"    not in transcript.getTagNames() else transcript.getTagValue("nbElements")
+        nbOccurrences = float(nbOccurrences)
+        nbElements = float(nbElements)
+        nbElements /= float(nbOccurrences)
+        return nbElements
+
+    def setBins(self):
+        progress = Progress(self.parser.getNbTranscripts(), "Setting bins", self.verbosity)
+        for transcript in self.parser.getIterator():
+            nbElements = self.getNbElements(transcript)
+            strand     = transcript.getDirection() if len(self.strands) == 2 else 0
+            for bin in self.getBinsFromPos(transcript.getStart()):
+                self.binsPerStrand[strand][transcript.getChromosome()][bin] += nbElements
+                if self.tag != None:
+                    if self.tag not in transcript.getTagNames():
+                        if self.defaultValue is None:
+                            raise Exception("Tag %s undefined in transcript %s" % (self.tag, transcript))
+                        value = self.defaultValue
+                    else:
+                        value = float(transcript.getTagValue(self.tag))
+                    self.sumsPerStrand[strand][transcript.getChromosome()][bin] += value
+                    self.valuesPerStrand[strand][transcript.getChromosome()][bin].append(value)
+            progress.inc()
+        progress.done()
+
+    def aggregateData(self):
+        if self.operation == "sum":
+            self.computeSumData()
+        elif self.operation == "avg":
+            self.computeAvgData()
+        elif self.operation == "med":
+            self.computeMedData()
+        elif self.operation == "min":
+            self.computeMinData()
+        elif self.operation == "max":
+            self.computeMaxData()
+        elif self.operation == "GCpercent":
+            self.computeGCPercent()
+        else:
+            self.toBePlottedPerStrand = self.binsPerStrand
+
+    def computeSumData(self):
+        self.toBePlottedPerStrand = self.sumsPerStrand
+
+    def computeAvgData(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.binsPerStrand[strand][chromosome][bin] != 0:
+                        self.toBePlottedPerStrand[strand][chromosome][bin] = float(self.sumsPerStrand[strand][chromosome][bin]) / self.binsPerStrand[strand][chromosome][bin]
+
+    def computeMedData(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.valuesPerStrand[strand][chromosome][bin]:
+                        self.valuesPerStrand[strand][chromosome][bin].sort()
+                        size = len(self.valuesPerStrand[strand][chromosome][bin])
+                        if size % 2 == 1:
+                            self.toBePlottedPerStrand[strand][chromosome][bin] = self.valuesPerStrand[strand][chromosome][bin][(size - 1) / 2]
+                        else:
+                            self.toBePlottedPerStrand[strand][chromosome][bin] = (self.valuesPerStrand[strand][chromosome][bin][size / 2 - 1] + self.valuesPerStrand[strand][chromosome][bin][size / 2]) / 2.0
+
+    def computeMinData(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.valuesPerStrand[strand][chromosome][bin]:
+                        self.toBePlottedPerStrand[strand][chromosome][bin] = min(self.valuesPerStrand[strand][chromosome][bin])
+
+    def computeMaxData(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.valuesPerStrand[strand][chromosome][bin]:
+                        self.toBePlottedPerStrand[strand][chromosome][bin] = max(self.valuesPerStrand[strand][chromosome][bin])
+                        
+    def computeGCPercent(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.valuesPerStrand[strand][chromosome][bin]:
+                        subSequence = self.valuesPerStrand[strand][chromosome][bin]
+                        NPercent = 100 * (subSequence.countNt("N") / float(subSequence.getSize()))
+                        if NPercent >= 50:
+                            currentGCpercent = "NA"
+                        else:
+                            currentGCpercent = subSequence.getGCpercentageInSequenceWithoutCountNInLength()
+                        
+                        self.toBePlottedPerStrand[strand][chromosome][bin] = currentGCpercent
+        #TODO: see if a map method could be used for the various "compute" methods 
+        #return currentGCpercent, NPercent
+        
+    def plotData(self):
+        if self.plot != None:
+            for strand in self.strands:
+                adjunct = ""
+                if strand != 0:
+                    adjunct = "Strand%d" % (strand)
+                for chromosome in self.toBePlottedPerStrand[strand]:
+                    if len(self.toBePlottedPerStrand[strand][chromosome].keys()) > 0:
+                        plotter = RPlotter(self.plot, self.verbosity)
+                        plotter.setFill(0)
+                        plotter.addLine(self.toBePlottedPerStrand[strand][chromosome], chromosome)
+                        plotter.plot()
+
+    def writeExcel(self):
+        if self.excel != None:
+            excelFile = open(self.excel, "w")
+            for strand in self.strands:
+                maxBin = max([max(self.toBePlottedPerStrand[strand][chromosome].keys()) for chromosome in self.binsPerStrand[strand]])
+                for bin in range(0, maxBin + 1):
+                    excelFile.write(",%d-%d" % self.getPosFromBin(bin))
+                excelFile.write("\n")
+                for chromosome in self.toBePlottedPerStrand[strand]:
+                    excelFile.write("%s" % (chromosome))
+                    for bin in self.toBePlottedPerStrand[strand][chromosome]:
+                        excelFile.write(",%f" % (self.toBePlottedPerStrand[strand][chromosome][bin]))
+                    excelFile.write("\n")
+            excelFile.close()
+
+    def printRegions(self):
+        cpt           = 1
+        tagOp         = "nb"
+        tagName       = "Elements"
+        outputTagName = "nbElements"
+        if self.operation != None:
+            tagOp = self.operation.lower()
+        if self.tag != None:
+            tagName = self.tag.title()
+        if self.outputTagName != None:
+            outputTagName = self.outputTagName
+            
+     
+        #writer = Gff3Writer(self.outputFileName, self.verbosity)
+        
+        for strand in self.strands:
+            for chromosome in self.toBePlottedPerStrand[strand]:
+                for bin in self.toBePlottedPerStrand[strand][chromosome]:
+                    transcript = Transcript()
+                    transcript.setName("region%d" % cpt)
+                    transcript.setChromosome(chromosome)
+                    transcript.setStart(self.getPosFromBin(bin)[0])
+                    transcript.setEnd(self.getPosFromBin(bin)[1])
+                    transcript.setDirection(1 if strand == 0 else strand)
+                    transcript.setTagValue(outputTagName, self.binsPerStrand[strand][chromosome][bin])
+                    transcript.setTagValue("%s%s" % (tagOp, tagName), str(self.toBePlottedPerStrand[strand][chromosome][bin]))
+                    self.writer.addTranscript(transcript)
+                    cpt += 1
+        self.writer.close()
+
+    def run(self):
+        self.checkOptions()
+        self.getChromosomeSizes()
+        self.initializeBins()
+        self.setBins()
+        self.aggregateData()
+        if self.excel:
+            self.writeExcel()
+        if self.plot:
+            self.plotData()
+        self.printRegions()
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Clusterize by Sliding Windows v1.0.1: Produces a GFF3 file that clusters a list of transcripts using a sliding window. [Category: Sliding Windows]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
+    parser.add_option("-u", "--outputFormat", dest="outputFormat",  action="store",     default="gff",  type="string", help="format of the output file [format: transcript file format]")
+    parser.add_option("-s", "--size",        dest="size",           action="store",                     type="int",    help="size of the regions [compulsory] [format: int]")
+    parser.add_option("-e", "--overlap",     dest="overlap",        action="store",                     type="int",    help="overlap between two consecutive regions [compulsory] [format: int]")
+    parser.add_option("-m", "--normalize",   dest="normalize",      action="store_true", default=False,                help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
+    parser.add_option("-g", "--tag",         dest="tag",            action="store",      default=None,  type="string", help="use a given tag as input (instead of summing number of features) [format: string]")    
+    parser.add_option("-r", "--operation",   dest="operation",      action="store",      default=None,  type="string", help="combine tag value with given operation [format: choice (sum, avg, med, min, max)]")
+    parser.add_option("-d", "--defaultValue",dest="defaultValue",   action="store",                     type="float",    help="default value for input tag [format: float]")
+    parser.add_option("-w", "--write",       dest="writeTag",       action="store",      default=None,  type="string", help="print the result in the given tag (default usually is 'nbElements') [format: string]")    
+    parser.add_option("-2", "--strands",     dest="strands",        action="store_true", default=False,                help="consider the two strands separately [format: bool] [default: false]")
+    parser.add_option("-p", "--plot",        dest="plot",           action="store",      default=None,  type="string", help="plot regions to the given file [format: output file in PNG format]")
+    parser.add_option("-x", "--excel",       dest="excel",          action="store",      default=None,  type="string", help="write an Excel file to the given file [format: output file in Excel format]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
+    (options, args) = parser.parse_args()
+
+    cbsw = ClusterizeBySlidingWindows(options.verbosity)
+    cbsw.setInputFile(options.inputFileName, options.inputFormat)
+    cbsw.setOutputFileName(options.outputFileName, options.outputFormat)
+    cbsw.setWindowSize(options.size)
+    cbsw.setWindowOverlap(options.overlap)
+    cbsw.setTag(options.tag)
+    cbsw.setDefaultValue(options.defaultValue)
+    cbsw.setOperation(options.operation)
+    cbsw.setOutputTag(options.writeTag)
+    cbsw.setBothStrands(options.strands)
+    cbsw.setPlot(options.plot)
+    cbsw.setExcel(options.excel)
+    cbsw.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/compareOverlapping.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,126 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Compare overlap of two transcript lists"""
+import sys
+import os
+from optparse import OptionParser
+from SMART.Java.Python.misc import Utils
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
+from SMART.Java.Python.misc.RPlotter import RPlotter
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+class CompareOverlapping(object):
+
+    def __init__(self):
+        self._options = None
+
+
+    def setAttributesFromCmdLine(self):
+        description = "Compare Overlapping v1.0.3: Get the data which overlap with a reference set. [Category: Data Comparison]"
+
+        parser = OptionParser(description = description)
+        parser.add_option("-i", "--input1",           dest="inputFileName1", action="store",                     type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
+        parser.add_option("-f", "--format1",          dest="format1",        action="store",                     type="string", help="format of file 1 [compulsory] [format: transcript file format]")
+        parser.add_option("-j", "--input2",           dest="inputFileName2", action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
+        parser.add_option("-g", "--format2",          dest="format2",        action="store",                     type="string", help="format of file 2 [compulsory] [format: transcript file format]")
+        parser.add_option("-o", "--output",           dest="output",         action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+        parser.add_option("-S", "--start1",           dest="start1",         action="store",      default=None,  type="int",    help="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]")
+        parser.add_option("-s", "--start2",           dest="start2",         action="store",      default=None,  type="int",    help="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]")
+        parser.add_option("-U", "--end1",             dest="end1",           action="store",      default=None,  type="int",    help="only consider the n last nucleotides of the transcripts in file 1 (do not use it with -S) [format: int]")
+        parser.add_option("-u", "--end2",             dest="end2",           action="store",      default=None,  type="int",    help="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]")
+        parser.add_option("-t", "--intron",           dest="introns",        action="store_true", default=False,                help="also report introns [format: bool] [default: false]")
+        parser.add_option("-E", "--5primeExtension1", dest="fivePrime1",     action="store",      default=None,  type="int",    help="extension towards 5' in file 1 [format: int]")
+        parser.add_option("-e", "--5primeExtension2", dest="fivePrime2",     action="store",      default=None,  type="int",    help="extension towards 5' in file 2 [format: int]")
+        parser.add_option("-N", "--3primeExtension1", dest="threePrime1",    action="store",      default=None,  type="int",    help="extension towards 3' in file 1 [format: int]")
+        parser.add_option("-n", "--3primeExtension2", dest="threePrime2",    action="store",      default=None,  type="int",    help="extension towards 3' in file 2 [format: int]")
+        parser.add_option("-c", "--colinear",         dest="colinear",       action="store_true", default=False,                help="colinear only [format: bool] [default: false]")
+        parser.add_option("-a", "--antisense",        dest="antisense",      action="store_true", default=False,                help="antisense only [format: bool] [default: false]")
+        parser.add_option("-d", "--distance",         dest="distance",       action="store",      default=None,  type="int",    help="accept some distance between query and reference [format: int]")
+        parser.add_option("-k", "--included",         dest="included",       action="store_true", default=False,                help="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]")
+        parser.add_option("-K", "--including",        dest="including",      action="store_true", default=False,                help="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]")
+        parser.add_option("-m", "--minOverlap",       dest="minOverlap",     action="store",      default=1,     type="int",    help="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]")
+        parser.add_option("-p", "--pcOverlap",        dest="pcOverlap",      action="store",      default=None,  type="int",    help="minimum percentage of nucleotides to overlap to declare an overlap [format: int]")
+        parser.add_option("-O", "--notOverlapping",   dest="notOverlapping", action="store_true", default=False,                help="also output not overlapping data [format: bool] [default: false]")
+        parser.add_option("-x", "--exclude",          dest="exclude",        action="store_true", default=False,                help="invert the match [format: bool] [default: false]")
+        parser.add_option("-v", "--verbosity",        dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
+        parser.add_option("-l", "--log",              dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
+        (self._options, args) = parser.parse_args()
+
+
+    def run(self):             
+        logHandle = None
+        if self._options.log:
+            logHandle = open(self._options.output, "w")
+
+        transcriptContainer1 = TranscriptContainer(self._options.inputFileName1, self._options.format1, self._options.verbosity)
+        transcriptContainer2 = TranscriptContainer(self._options.inputFileName2, self._options.format2, self._options.verbosity)
+        writer               = TranscriptWriter(self._options.output, "gff3", self._options.verbosity)
+
+        transcriptListComparator = TranscriptListsComparator(logHandle, self._options.verbosity)
+        transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, self._options.start1)
+        transcriptListComparator.restrictToStart(transcriptListComparator.REFERENCE, self._options.start2)
+        transcriptListComparator.restrictToEnd(transcriptListComparator.QUERY, self._options.end1)
+        transcriptListComparator.restrictToEnd(transcriptListComparator.REFERENCE, self._options.end2)
+        transcriptListComparator.extendFivePrime(transcriptListComparator.QUERY, self._options.fivePrime1)
+        transcriptListComparator.extendFivePrime(transcriptListComparator.REFERENCE, self._options.fivePrime2)
+        transcriptListComparator.extendThreePrime(transcriptListComparator.QUERY, self._options.threePrime1)
+        transcriptListComparator.extendThreePrime(transcriptListComparator.REFERENCE, self._options.threePrime2)
+        transcriptListComparator.acceptIntrons(transcriptListComparator.QUERY, self._options.introns)
+        transcriptListComparator.acceptIntrons(transcriptListComparator.REFERENCE, self._options.introns)
+        transcriptListComparator.getAntisenseOnly(self._options.antisense)
+        transcriptListComparator.getColinearOnly(self._options.colinear)
+        transcriptListComparator.getInvert(self._options.exclude)
+        transcriptListComparator.setMaxDistance(self._options.distance)
+        transcriptListComparator.setMinOverlap(self._options.minOverlap)
+        transcriptListComparator.setPcOverlap(self._options.pcOverlap)
+        transcriptListComparator.setIncludedOnly(self._options.included)
+        transcriptListComparator.setIncludingOnly(self._options.including)
+        transcriptListComparator.includeNotOverlapping(self._options.notOverlapping)
+        transcriptListComparator.computeOdds(True)
+        transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer1)
+        transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, transcriptContainer2)
+        transcriptListComparator.setOutputWriter(writer)
+        transcriptListComparator.compareTranscriptList()
+
+        if self._options.log:
+            logHandle.close()
+
+        if not self._options.exclude:
+            odds = transcriptListComparator.getOdds()
+            if self._options.verbosity > 0 and odds:
+                print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(odds)
+                
+if __name__ == "__main__":
+    icompareOverlapping = CompareOverlapping()
+    icompareOverlapping.setAttributesFromCmdLine()
+    icompareOverlapping.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/convertTranscriptFile.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,115 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Read a transcript file and convert it to another format
+"""
+
+import os, re
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+class ConvertTranscriptFile(object):
+    def __init__(self,inputFileName="", inputFormat ="", outputFileName="", outputFormat="", name="", sequenceFileName=None, strands=False, galaxy=False, feature=None, featurePart=None, verbosity=1):
+        self.inputFileName = inputFileName
+        self.inputFormat = inputFormat
+        self.outputFileName = outputFileName
+        self.outputFormat = outputFormat
+        self.name = name
+        self.sequenceFileName = sequenceFileName
+        self.strands = strands
+        self.galaxy = galaxy
+
+        self.feature=feature
+        self.featurePart=featurePart
+        
+        self.verbosity = verbosity
+         
+    def setAttributesFromCmdLine(self):
+        description = "Convert Transcript File v1.0.3: Convert a file from a format to another. [Category: Conversion]"
+        parser = OptionParser(description = description)
+        parser.add_option("-i", "--input",        dest="inputFileName",    action="store",                       type="string", help="input file [compulsory] [format: file in format given by -f]")
+        parser.add_option("-f", "--inputFormat",  dest="inputFormat",      action="store",                       type="string", help="format of the input file [compulsory] [format: transcript or mapping file format]")
+        parser.add_option("-o", "--output",       dest="outputFileName",   action="store",                       type="string", help="output file [compulsory] [format: output file in format given by -g]")
+        parser.add_option("-g", "--outputFormat", dest="outputFormat",     action="store",                       type="string", help="format of the output file [compulsory] [format: transcript file format]")
+        parser.add_option("-n", "--name",         dest="name",             action="store",      default="SMART", type="string", help="name for the transcripts [format: string] [default: SMART]")
+        parser.add_option("-s", "--sequences",    dest="sequenceFileName", action="store",      default=None,    type="string", help="give the corresponding Multi-Fasta file (useful for EMBL format) [format: string]")
+        parser.add_option("-t", "--strands",      dest="strands",          action="store_true", default=False,                  help="consider the 2 strands as different (only useful for writing WIG files) [format: bool] [default: False]")
+        parser.add_option("-v", "--verbosity",    dest="verbosity",        action="store",      default=1,       type="int",    help="trace level [format: int] [default: 1]")
+        parser.add_option("-G", "--galaxy",       dest="galaxy",           action="store_true", default=False,                  help="used for galaxy [format: bool] [default: False]")
+        (options, args) = parser.parse_args()
+        self._setAttributesFromOptions(options)
+
+    def _setAttributesFromOptions(self, options):
+        self.inputFileName = options.inputFileName
+        self.inputFormat = options.inputFormat
+        self.outputFileName = options.outputFileName
+        self.outputFormat = options.outputFormat
+        self.name = options.name  
+        self.sequenceFileName = options.sequenceFileName
+        self.strands = options.strands
+        self.galaxy =  options.galaxy
+        self.verbosity = options.verbosity
+
+    def run(self):
+        # create parser
+        parser = TranscriptContainer(self.inputFileName, self.inputFormat, self.verbosity)
+        # create writer
+        writer = TranscriptWriter(self.outputFileName, self.outputFormat, self.verbosity)
+        # connect parser and writer
+        writer.setContainer(parser)
+            
+        if self.name != None:
+            writer.setTitle(self.name)
+        if self.feature != None:
+            writer.setFeature(self.feature)
+        if self.featurePart != None:
+            writer.setFeaturePart(self.featurePart)
+        if self.sequenceFileName != None:
+            writer.addSequenceFile(self.sequenceFileName)
+            
+        nbItems = 0
+        if self.verbosity > 0:
+            nbItems = parser.getNbItems()
+            print "%i items found" % (nbItems)
+    
+        if self.strands:
+            writer.setStrands(True)
+        # convert
+        writer.write()
+        writer.close()
+
+if __name__ == "__main__":
+    iConvertTranscriptFile = ConvertTranscriptFile()
+    iConvertTranscriptFile.setAttributesFromCmdLine()
+    iConvertTranscriptFile.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/coordinatesToSequence.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,64 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Convert a list of coordinates to sequences"""
+
+from optparse import OptionParser
+from commons.core.parsing.FastaParser import FastaParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.FastaWriter import FastaWriter
+from SMART.Java.Python.misc.Progress import Progress
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Coordinates to Sequences v1.0.2: Extract the sequences from a list of coordinates. [Category: Conversion]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-s", "--sequences", dest="sequences", action="store",  type="string", help="file that contains the sequences [compulsory] [format: file in FASTA format]")
+    parser.add_option("-o", "--output", dest="outputFileName", action="store",  default=None, type="string", help="output file (FASTA format) [format: output file in FASTA format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    # create parser
+    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity) 
+
+    sequenceParser = FastaParser(options.sequences, options.verbosity)
+
+    writer = FastaWriter(options.outputFileName, options.verbosity)
+    progress = Progress(parser.getNbTranscripts(), "Reading %s" % (options.inputFileName), options.verbosity)
+    for transcript in parser.getIterator():
+        sequence = transcript.extractSequence(sequenceParser)
+        writer.addSequence(sequence)
+        progress.inc()
+    progress.done()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/findTss.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,77 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Find TSS from short reads"""
+import os
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Find TSS v1.0.1: Find the transcription start site of a list of transcripts. [Category: Merge]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",     dest="inputFileName", action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",    dest="format",        action="store",                     type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",    dest="output",        action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
+    parser.add_option("-n", "--normalize", dest="normalize",     action="store_true", default=False,                help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
+    parser.add_option("-d", "--distance",  dest="distance",      action="store",      default=10,    type="int",    help="distance between two reads to mark the same TSS [format: int] [default: 10]")
+    parser.add_option("-e", "--colinear",  dest="colinear",      action="store_true", default=False,                help="group by strand [format: bool] [default: false]")
+    parser.add_option("-c", "--csv",       dest="csv",           action="store",      default=None,  type="string", help="output a CSV file in the given path [format: output file in Excel format]")
+    parser.add_option("-v", "--verbosity", dest="verbosity",     action="store",      default=1,     type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)        
+    transcriptListComparator = TranscriptListsComparator(None, options.verbosity)
+    transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, 1)
+    transcriptListComparator.setMaxDistance(options.distance)
+    transcriptListComparator.aggregate(True)
+    transcriptListComparator.computeOdds(True)
+    transcriptListComparator.getColinearOnly(options.colinear)
+    transcriptListComparator.setNormalization(options.normalize)
+    transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer)
+    transcriptListComparator.setOutputWriter(Gff3Writer(options.output, options.verbosity))
+    transcriptListComparator.compareTranscriptListSelfMerge()
+
+    if options.csv != None:
+        csvResults = transcriptListComparator.getOddsPerTranscript()
+        csvFile    = open(options.csv, "w")
+        csvFile.write("Number,Transcript\n")
+        for number in sorted(list(set(csvResults.values()))):
+            csvFile.write("%d," % (number))
+            for name in csvResults:
+                if csvResults[name] == number:
+                    csvFile.write("%s " % (name))
+            csvFile.write("\n")
+        csvFile.close()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/fold.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,95 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""
+Read a mapping file (many formats supported) and select some of them
+Mappings should be sorted by read names
+"""
+
+from optparse import OptionParser
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.toolLauncher.RnaFoldLauncher import RnaFoldLauncher
+from commons.core.writer.Gff3Writer import Gff3Writer
+
+
+class Fold(object):
+    """
+    Fold a series of transcripts
+    """
+
+    def __init__(self, verbosity = 0):
+        self.verbosity       = verbosity
+        self.rnaFoldLauncher = RnaFoldLauncher(verbosity)
+        self.gff3Writer      = None
+
+
+    def setInputFileName(self, fileName, format):
+        transcriptContainer = TranscriptContainer(fileName, format, options.verbosity)
+        self.rnaFoldLauncher.setTranscriptList(transcriptContainer)
+
+    
+    def setOutputFileName(self, fileName):
+        self.gff3Writer = Gff3Writer("%s.gff3" % (fileName), self.verbosity)
+
+
+    def setGenomeFileName(self, fileName):
+        self.rnaFoldLauncher.setGenomeFile(fileName)
+
+
+    def setExtensions(self, fivePrime, threePrime):
+        self.rnaFoldLauncher.setExtensions(fivePrime, threePrime)
+
+
+    def start(self):
+        self.gff3Writer.addTranscriptList(self.rnaFoldLauncher.getResults())
+
+
+
+if __name__ == "__main__":
+    
+    # parse command line
+    description = "Fold v1.0.1: Fold a list of transcript and give the energy. [Category: Personal]"
+
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",      dest="inputFileName",  action="store",            type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--format",     dest="format",         action="store",            type="string", help="format of file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",     dest="outputFileName", action="store",            type="string", help="output file [format: output file in GFF3 format]")
+    parser.add_option("-g", "--genome",     dest="genomeFileName", action="store",            type="string", help="genome file name [format: file in FASTA format]")
+    parser.add_option("-5", "--fivePrime",  dest="fivePrime",      action="store",            type="int",    help="extend towards the 5' end [format: int]")
+    parser.add_option("-3", "--threePrime", dest="threePrime",     action="store",            type="int",    help="extend towards the 3' end [format: int]")
+    parser.add_option("-v", "--verbosity",  dest="verbosity",      action="store", default=1, type="int",    help="trace level [format: int]")
+    (options, args) = parser.parse_args()
+
+    folder = Fold(options.verbosity)
+    folder.setInputFileName(options.inputFileName, options.format)
+    folder.setOutputFileName(options.outputFileName)
+    folder.setExtensions(options.fivePrime, options.threePrime)
+    folder.setGenomeFileName(options.genomeFileName)
+    folder.start()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/getDifference.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,155 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+"""Restrict a transcript list with some parameters (regions)"""
+
+from optparse import OptionParser
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
+from commons.core.writer.Gff3Writer import Gff3Writer
+from commons.core.parsing.FastaParser import FastaParser
+from SMART.Java.Python.misc.Progress import Progress
+
+class DifferenceGetter(object):
+
+    def __init__(self, verbosity):
+        self.verbosity        = verbosity
+        self.annotationParser = None
+        self.referenceParser  = None