Repository 's_mart'
hg clone https://toolshed.g2.bx.psu.edu/repos/yufei-luo/s_mart

Changeset 35:d94018ca4ada (2013-04-30)
Previous changeset 34:529e3e6a0954 (2013-04-30) Next changeset 36:44d5973c188c (2013-04-30)
Commit message:
Deleted selected files
removed:
SMART/DiffExpAnal/DESeqTools/HTseqClean.R
SMART/DiffExpAnal/DESeqTools/MAplotDE.R
SMART/DiffExpAnal/DESeqTools/RNAseqFunctions.R
SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R
SMART/DiffExpAnal/DESeqTools/barplotNul.R
SMART/DiffExpAnal/DESeqTools/barplotTC.R
SMART/DiffExpAnal/DESeqTools/boxplotCounts.R
SMART/DiffExpAnal/DESeqTools/clusterPlot.R
SMART/DiffExpAnal/DESeqTools/densityPlot.R
SMART/DiffExpAnal/DESeqTools/exportComplete.R
SMART/DiffExpAnal/DESeqTools/exportDiff.R
SMART/DiffExpAnal/DESeqTools/histoRawp.R
SMART/DiffExpAnal/DESeqTools/loadCountData.R
SMART/DiffExpAnal/DESeqTools/loadTargetFile.R
SMART/DiffExpAnal/DESeqTools/majSequence.R
SMART/DiffExpAnal/DESeqTools/pairwiseSERE.R
SMART/DiffExpAnal/DESeqTools/pairwiseScatterPlots.R
SMART/DiffExpAnal/DESeqTools/plotDispEstimates.R
SMART/DiffExpAnal/DESeqTools/raw/f1cond1.tsv
SMART/DiffExpAnal/DESeqTools/raw/f1cond2.tsv
SMART/DiffExpAnal/DESeqTools/raw/f2cond1.tsv
SMART/DiffExpAnal/DESeqTools/raw/f2cond2.tsv
SMART/DiffExpAnal/DESeqTools/raw2counts.R
SMART/DiffExpAnal/DESeqTools/removeNul.R
SMART/DiffExpAnal/__init__.py
SMART/DiffExpAnal/bam_to_sam_parallel.py
SMART/DiffExpAnal/bam_to_sam_parallel.xml
SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.py
SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.xml
SMART/DiffExpAnal/compareOverlapping_parallel.py
SMART/DiffExpAnal/compareOverlapping_parallel.xml
SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.py
SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.xml
SMART/DiffExpAnal/countNumber.pl
SMART/DiffExpAnal/countNumber.xml
SMART/DiffExpAnal/countNumber_parallel.py
SMART/DiffExpAnal/countNumber_parallel.xml
SMART/DiffExpAnal/countNumber_parallel_unSQL.py
SMART/DiffExpAnal/countNumber_parallel_unSQL.xml
SMART/DiffExpAnal/deseq.sh
SMART/DiffExpAnal/deseq.xml
SMART/DiffExpAnal/fastq_groomer_parallel.py
SMART/DiffExpAnal/fastq_groomer_parallel.xml
SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.py
SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.xml
SMART/DiffExpAnal/gsnap.xml
SMART/DiffExpAnal/gsnap_parallel_unSQL.py
SMART/DiffExpAnal/gsnap_parallel_unSQL.xml
SMART/DiffExpAnal/listInputs.pl
SMART/DiffExpAnal/listInputs.xml
SMART/DiffExpAnal/loadHTSeqResultFiles.py
SMART/DiffExpAnal/loadHTSeqResultFiles.xml
SMART/DiffExpAnal/loadMultiFastqFiles.py
SMART/DiffExpAnal/loadMultiFastqFiles.sh
SMART/DiffExpAnal/loadMultiFastqFiles.xml
SMART/DiffExpAnal/testR.R
SMART/DiffExpAnal/testR.sh
SMART/DiffExpAnal/tophat_parallel.py
SMART/DiffExpAnal/tophat_parallel.xml
SMART/DiffExpAnal/tophat_parallel_unSQL.py
SMART/DiffExpAnal/tophat_parallel_unSQL.xml
SMART/DiffExpAnal/wrappGSNAP.py
SMART/Java/File.java
SMART/Java/Files.java
SMART/Java/FormatType.java
SMART/Java/FormatsContainer.java
SMART/Java/FormatsReader.java
SMART/Java/Global.java
SMART/Java/Installer/Old/PasswordAsker.java
SMART/Java/Installer/Old/SmartInstaller.java
SMART/Java/Installer/Old/SmartInstallerTask.java
SMART/Java/Installer/PasswordAsker.java
SMART/Java/Installer/SmartInstaller.jar
SMART/Java/Installer/SmartInstaller.java
SMART/Java/Installer/SmartInstallerTask.java
SMART/Java/Installer/build.sh
SMART/Java/Installer/manifest.txt
SMART/Java/Installer/s-mart.zip
SMART/Java/LICENSE.txt
SMART/Java/Program.java
SMART/Java/ProgramFileReader.java
SMART/Java/ProgramLauncher.java
SMART/Java/ProgramOption.java
SMART/Java/Python/.RData
SMART/Java/Python/.gitignore
SMART/Java/Python/100%
SMART/Java/Python/CleanTranscriptFile.py
SMART/Java/Python/ClusterizeByTags.py
SMART/Java/Python/CollapseReads.py
SMART/Java/Python/CombineTags.py
SMART/Java/Python/CompareOverlapping.py
SMART/Java/Python/CompareOverlapping.pyc
SMART/Java/Python/CompareOverlappingSmallQuery.py
SMART/Java/Python/CompareOverlappingSmallRef.py
SMART/Java/Python/ComputeCoverage.py
SMART/Java/Python/CountLoci.py
SMART/Java/Python/CountReadGCPercent.py
SMART/Java/Python/FindOverlapsOptim.py
SMART/Java/Python/GetDifferentialExpression.py
SMART/Java/Python/GetDistribution.py
SMART/Java/Python/GetFlanking.py
SMART/Java/Python/GetFlanking.pyc
SMART/Java/Python/GetRandomSubset.py
SMART/Java/Python/GetReadDistribution.py
SMART/Java/Python/GetReadSizes.py
SMART/Java/Python/GetUpDownStream.py
SMART/Java/Python/GetUpDownStream.pyc
SMART/Java/Python/Helitrons.fasta
SMART/Java/Python/RestrictFromCoverage.py
SMART/Java/Python/Rplots.pdf
SMART/Java/Python/S1_S3_blast.blast
SMART/Java/Python/SelectByTag.py
SMART/Java/Python/TestFiles/SR1.fastq
SMART/Java/Python/TestFiles/Wig/chr1.wig
SMART/Java/Python/TestFiles/adress.txt
SMART/Java/Python/TestFiles/clusterize_default_expected.gff3
SMART/Java/Python/TestFiles/clusterize_default_expected.map
SMART/Java/Python/TestFiles/clusterize_normalize_expected.gff3
SMART/Java/Python/TestFiles/clusterize_output_tag_expected.gff3
SMART/Java/Python/TestFiles/clusterize_strands_expected.gff3
SMART/Java/Python/TestFiles/expOutputGff.gff3
SMART/Java/Python/TestFiles/expRef.fasta
SMART/Java/Python/TestFiles/inputCR.gff3
SMART/Java/Python/TestFiles/inputFileTest1.bed
SMART/Java/Python/TestFiles/inputFileTest2.bed
SMART/Java/Python/TestFiles/inputMSWC1.gff3
SMART/Java/Python/TestFiles/inputMSWC2.gff3
SMART/Java/Python/TestFiles/inputMTC.sam
SMART/Java/Python/TestFiles/inputMapping.map
SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3
SMART/Java/Python/TestFiles/mapperAnalyzerMappings.axt
SMART/Java/Python/TestFiles/mapperAnalyzerOutput.gff3
SMART/Java/Python/TestFiles/mapperAnalyzerSequences.mfq
SMART/Java/Python/TestFiles/sorted_file_oneline.gff3
SMART/Java/Python/TestFiles/sorted_query.gff3
SMART/Java/Python/TestFiles/sorted_query_wig.wig
SMART/Java/Python/TestFiles/sorted_ref.gff3
SMART/Java/Python/TestFiles/testBedParser1.bed
SMART/Java/Python/TestFiles/testC2S.fa
SMART/Java/Python/TestFiles/testC2S.gff3
SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense1.gff3
SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense2.gff3
SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple1.gff3
SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple2.gff3
SMART/Java/Python/TestFiles/testDifferentialExpressionExpected.gff3
SMART/Java/Python/TestFiles/testDifferentialExpressionOutput.gff3
SMART/Java/Python/TestFiles/testDifferentialExpressionReference.gff3
SMART/Java/Python/TestFiles/testDifferentialExpressionSample1.gff3
SMART/Java/Python/TestFiles/testDifferentialExpressionSample2.gff3
SMART/Java/Python/TestFiles/testGffParser1.gff3
SMART/Java/Python/TestFiles/testPlot.gff3
SMART/Java/Python/TestFiles/testSW.gff3
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList1.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList2.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference1.gff3
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference2.gff3
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation1.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation2.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1_modif.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway2.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple1.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple2.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMerge1.gff3
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeDifferentClusters1.bed
SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeSense1.bed
SMART/Java/Python/TestFiles/testTranscriptNormalize.gff3
SMART/Java/Python/TestFiles/test_distance.bed
SMART/Java/Python/TestFiles/test_minoverlapp.bed
SMART/Java/Python/WrappGetDistribution.py
SMART/Java/Python/WrappGetReadDistribution.py
SMART/Java/Python/WrappPlotCoverage.py
SMART/Java/Python/WrappPlotRepartition.py
SMART/Java/Python/__init__.py
SMART/Java/Python/__init__.pyc
SMART/Java/Python/adaptorStripper.py
SMART/Java/Python/changeGffFeatures.sh
SMART/Java/Python/changeTagName.py
SMART/Java/Python/cleanGff.py
SMART/Java/Python/cleanGff.pyc
SMART/Java/Python/cleaning/CleanerChooser.py
SMART/Java/Python/cleaning/CleanerChooser.pyc
SMART/Java/Python/cleaning/DefaultCleaner.py
SMART/Java/Python/cleaning/DefaultCleaner.pyc
SMART/Java/Python/cleaning/GffCleaner.py
SMART/Java/Python/cleaning/GffCleaner.pyc
SMART/Java/Python/cleaning/GtfCleaner.py
SMART/Java/Python/cleaning/GtfCleaner.pyc
SMART/Java/Python/cleaning/TranscriptListCleaner.py
SMART/Java/Python/cleaning/TranscriptListCleaner.pyc
SMART/Java/Python/cleaning/__init__.py
SMART/Java/Python/cleaning/__init__.pyc
SMART/Java/Python/clusterize.py
SMART/Java/Python/clusterizeBySlidingWindows.py
SMART/Java/Python/compareOverlapping.py
SMART/Java/Python/compare_TAIR10_Reiterative4th.gff3
SMART/Java/Python/convertTranscriptFile.py
SMART/Java/Python/coordinatesToSequence.py
SMART/Java/Python/findTss.py
SMART/Java/Python/fo.py
SMART/Java/Python/fold.py
SMART/Java/Python/genes.gtf
SMART/Java/Python/genome.fasta
SMART/Java/Python/getDifference.py
SMART/Java/Python/getDistance.py
SMART/Java/Python/getDistribution.py
SMART/Java/Python/getElement.py
SMART/Java/Python/getExons.py
SMART/Java/Python/getInfoPerCoverage.py
SMART/Java/Python/getIntrons.py
SMART/Java/Python/getLetterDistribution.py
SMART/Java/Python/getNb.py
SMART/Java/Python/getRandomRegions.py
SMART/Java/Python/getReadDistribution.py
SMART/Java/Python/getSequence.py
SMART/Java/Python/getSizes.py
SMART/Java/Python/getWigData.py
SMART/Java/Python/getWigDistance.py
SMART/Java/Python/getWigProfile.py
SMART/Java/Python/gf.py
SMART/Java/Python/mapperAnalyzer.py
SMART/Java/Python/mappingToCoordinates.py
SMART/Java/Python/mergeSlidingWindowsClusters.py
SMART/Java/Python/mergeTranscriptLists.py
SMART/Java/Python/misc/MultipleRPlotter.py
SMART/Java/Python/misc/MultipleRPlotter.pyc
SMART/Java/Python/misc/Progress.py
SMART/Java/Python/misc/Progress.pyc
SMART/Java/Python/misc/RPlotter.py
SMART/Java/Python/misc/RPlotter.pyc
SMART/Java/Python/misc/UnlimitedProgress.py
SMART/Java/Python/misc/UnlimitedProgress.pyc
SMART/Java/Python/misc/Utils.py
SMART/Java/Python/misc/Utils.pyc
SMART/Java/Python/misc/__init__.py
SMART/Java/Python/misc/__init__.pyc
SMART/Java/Python/misc/test/Test_Utils.py
SMART/Java/Python/misc/test/__init__.py
SMART/Java/Python/modifyFasta.py
SMART/Java/Python/modifyGenomicCoordinates.py
SMART/Java/Python/modifySequenceList.py
SMART/Java/Python/mySql/MySqlConnection.py
SMART/Java/Python/mySql/MySqlConnection.pyc
SMART/Java/Python/mySql/MySqlExonTable.py
SMART/Java/Python/mySql/MySqlExonTable.pyc
SMART/Java/Python/mySql/MySqlQuery.py
SMART/Java/Python/mySql/MySqlQuery.pyc
SMART/Java/Python/mySql/MySqlTable.py
SMART/Java/Python/mySql/MySqlTable.pyc
SMART/Java/Python/mySql/MySqlTranscriptTable.py
SMART/Java/Python/mySql/MySqlTranscriptTable.pyc
SMART/Java/Python/mySql/__init__.py
SMART/Java/Python/mySql/__init__.pyc
SMART/Java/Python/mySql/test/Test_MySqlTranscriptTable.py
SMART/Java/Python/mySql/test/__init__.py
SMART/Java/Python/ncList/ConvertToNCList.py
SMART/Java/Python/ncList/ConvertToNCList.pyc
SMART/Java/Python/ncList/FileSorter.py
SMART/Java/Python/ncList/FileSorter.pyc
SMART/Java/Python/ncList/FindOverlapsWithOneInterval.py
SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervals.py
SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsBin.py
SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsIndex.py
SMART/Java/Python/ncList/FindOverlaps_naif.py
SMART/Java/Python/ncList/NCIndex.py
SMART/Java/Python/ncList/NCIndex.pyc
SMART/Java/Python/ncList/NCList.py
SMART/Java/Python/ncList/NCList.pyc
SMART/Java/Python/ncList/NCListCursor.py
SMART/Java/Python/ncList/NCListCursor.pyc
SMART/Java/Python/ncList/NCListFilePickle.py
SMART/Java/Python/ncList/NCListFilePickle.pyc
SMART/Java/Python/ncList/NCListHandler.py
SMART/Java/Python/ncList/NCListHandler.pyc
SMART/Java/Python/ncList/NCListMerger.py
SMART/Java/Python/ncList/NCListMerger.pyc
SMART/Java/Python/ncList/NCListParser.py
SMART/Java/Python/ncList/NCListParser.pyc
SMART/Java/Python/ncList/__init__.py
SMART/Java/Python/ncList/__init__.pyc
SMART/Java/Python/ncList/test/MockFindOverlapsWithSeveralIntervals.py
SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py
SMART/Java/Python/ncList/test/Test_F_FileSorter.py
SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithOneInterval.py
SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithSeveralIntervals.py
SMART/Java/Python/ncList/test/Test_F_FindOverlaps_naif.py
SMART/Java/Python/ncList/test/Test_F_FindOverlaps_randomExample.py
SMART/Java/Python/ncList/test/Test_F_NCList.py
SMART/Java/Python/ncList/test/Test_FindOverlapsWithOneInterval.py
SMART/Java/Python/ncList/test/Test_FindOverlapsWithSeveralIntervals.py
SMART/Java/Python/ncList/test/Test_FindOverlaps_randomExample.py
SMART/Java/Python/ncList/test/Test_randExample.py
SMART/Java/Python/ncList/test/__init__.py
SMART/Java/Python/plot.py
SMART/Java/Python/plotCoverage.py
SMART/Java/Python/plotGenomeCoverage.py
SMART/Java/Python/plotRepartition.py
SMART/Java/Python/plotTranscriptList.py
SMART/Java/Python/qualToFastq.py
SMART/Java/Python/re_filter_ratio_5_NbReads_100_samples_all_norm_Window100overlap50.gff3
SMART/Java/Python/removeAllTmpTables.py
SMART/Java/Python/removeEmptySequences.py
SMART/Java/Python/removeExonLines.sh
SMART/Java/Python/repetGffConverter.py
SMART/Java/Python/restrictFromNucleotides.py
SMART/Java/Python/restrictFromSize.py
SMART/Java/Python/restrictSequenceList.py
SMART/Java/Python/restrictTranscriptList.py
SMART/Java/Python/runRandomJobs.py
SMART/Java/Python/selectByNbOccurrences.py
SMART/Java/Python/sequenceListSplitter.py
SMART/Java/Python/splitByTag.py
SMART/Java/Python/splitMultiFasta.py
SMART/Java/Python/structure/Bins.py
SMART/Java/Python/structure/Bins.pyc
SMART/Java/Python/structure/Interval.py
SMART/Java/Python/structure/Interval.pyc
SMART/Java/Python/structure/Mapping.py
SMART/Java/Python/structure/Mapping.pyc
SMART/Java/Python/structure/Sequence.py
SMART/Java/Python/structure/Sequence.pyc
SMART/Java/Python/structure/SequenceList.py
SMART/Java/Python/structure/SequenceList.pyc
SMART/Java/Python/structure/SubMapping.py
SMART/Java/Python/structure/SubMapping.pyc
SMART/Java/Python/structure/Transcript.py
SMART/Java/Python/structure/Transcript.pyc
SMART/Java/Python/structure/TranscriptContainer.py
SMART/Java/Python/structure/TranscriptContainer.pyc
SMART/Java/Python/structure/TranscriptList.py
SMART/Java/Python/structure/TranscriptList.pyc
SMART/Java/Python/structure/TranscriptListIterator.py
SMART/Java/Python/structure/TranscriptListsComparator.py
SMART/Java/Python/structure/TranscriptListsComparator.pyc
SMART/Java/Python/structure/__init__.py
SMART/Java/Python/structure/__init__.pyc
SMART/Java/Python/structure/test/Test_Interval.py
SMART/Java/Python/structure/test/Test_Mapping.py
SMART/Java/Python/structure/test/Test_Sequence.py
SMART/Java/Python/structure/test/Test_SubMapping.py
SMART/Java/Python/structure/test/Test_Transcript.py
SMART/Java/Python/structure/test/Test_TranscriptListsComparator.py
SMART/Java/Python/structure/test/__init__.py
SMART/Java/Python/test.gff3
SMART/Java/Python/test.pdf
SMART/Java/Python/test.png
SMART/Java/Python/test/MockGetLetterDistribution.py
SMART/Java/Python/test/Test_F_Clusterize.py
SMART/Java/Python/test/Test_F_ClusterizeByTags.py
SMART/Java/Python/test/Test_F_CollapseReads.py
SMART/Java/Python/test/Test_F_CombineTags.py
SMART/Java/Python/test/Test_F_FindOverlapsOptim.py
SMART/Java/Python/test/Test_F_GetDifferentialExpression.py
SMART/Java/Python/test/Test_F_GetFlanking.py
SMART/Java/Python/test/Test_F_GetRandomSubset.py
SMART/Java/Python/test/Test_F_GetSizes.py
SMART/Java/Python/test/Test_F_RestrictFromCoverage.py
SMART/Java/Python/test/Test_F_clusterizeBySlidingWindows.py
SMART/Java/Python/test/Test_F_compareOverlapping.py
SMART/Java/Python/test/Test_F_convertTranscriptFile.py
SMART/Java/Python/test/Test_F_coordinatesToSequence.py
SMART/Java/Python/test/Test_F_findTss.py
SMART/Java/Python/test/Test_F_getExons.py
SMART/Java/Python/test/Test_F_getLetterDistribution.py
SMART/Java/Python/test/Test_F_getRandomRegions.py
SMART/Java/Python/test/Test_F_getReadDistribution.py
SMART/Java/Python/test/Test_F_getWigData.py
SMART/Java/Python/test/Test_F_getWigDistance.py
SMART/Java/Python/test/Test_F_getWigProfile.py
SMART/Java/Python/test/Test_F_mapperAnalyzer.py
SMART/Java/Python/test/Test_F_mappingToCoordinates.py
SMART/Java/Python/test/Test_F_mergeSlidingWindowsClusters.py
SMART/Java/Python/test/Test_F_mergeTranscriptLists.py
SMART/Java/Python/test/Test_F_plot.py
SMART/Java/Python/test/Test_F_plotCoverage.py
SMART/Java/Python/test/Test_F_qualToFastq.py
SMART/Java/Python/test/Test_F_restrictSequenceList.py
SMART/Java/Python/test/Test_F_selectByTag.py
SMART/Java/Python/test/Test_F_trimSequences.py
SMART/Java/Python/test/Test_FindOverlapsOptim.py
SMART/Java/Python/test/Test_FindOverlaps_optim.py
SMART/Java/Python/test/__init__.py
SMART/Java/Python/test/timeResults.R
SMART/Java/Python/test3.gff3
SMART/Java/Python/test3.png
SMART/Java/Python/test3.png_I.png
SMART/Java/Python/testInstall.py
SMART/Java/Python/testOut.gff3
SMART/Java/Python/toolLauncher/RnaFoldLauncher.py
SMART/Java/Python/toolLauncher/__init__.py
SMART/Java/Python/trimAdaptor.py
SMART/Java/Python/trimSequence.py
SMART/Java/Python/trimSequences.py
SMART/Java/Python/txtToFasta.py
SMART/Java/Python/updateQual.py
SMART/Java/Python/wigExploder.py
SMART/Java/Python/wrongFastqToQual.py
SMART/Java/PythonHelperReader.java
SMART/Java/PythonProgramFinder.java
SMART/Java/README.txt
SMART/Java/Sav/File.java
SMART/Java/Sav/Files.java
SMART/Java/Sav/FormatType.java
SMART/Java/Sav/FormatsContainer.java
SMART/Java/Sav/FormatsReader.java
SMART/Java/Sav/Global.java
SMART/Java/Sav/Program.java
SMART/Java/Sav/ProgramFileReader.java
SMART/Java/Sav/ProgramLauncher.java
SMART/Java/Sav/ProgramOption.java
SMART/Java/Sav/PythonHelperReader.java
SMART/Java/Sav/PythonProgramFinder.java
SMART/Java/Sav/Smart.java
SMART/Java/Smart.jar
SMART/Java/Smart.java
SMART/Java/SmartInstaller.jar
SMART/Java/WindowsRegistry.java
SMART/Java/__init__.py
SMART/Java/__init__.pyc
SMART/Java/doc.pdf
SMART/Java/formats.txt
SMART/Java/manifest.txt
SMART/__init__.py
SMART/__init__.pyc
SMART/bacteriaRegulatoryRegion_Detection/changeName.py
SMART/bacteriaRegulatoryRegion_Detection/changeName.xml
SMART/bacteriaRegulatoryRegion_Detection/colorGff.pl
SMART/bacteriaRegulatoryRegion_Detection/colorGff.xml
SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl
SMART/bacteriaRegulatoryRegion_Detection/coverageGff.xml
SMART/bacteriaRegulatoryRegion_Detection/interElementGff.pl
SMART/bacteriaRegulatoryRegion_Detection/interElementGff.xml
SMART/bacteriaRegulatoryRegion_Detection/listGff.sh
SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.sh
SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.xml
SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl
SMART/bacteriaRegulatoryRegion_Detection/seedGff.xml
SMART/bacteriaRegulatoryRegion_Detection/sortGff.pl
SMART/bacteriaRegulatoryRegion_Detection/sortGff.xml
SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.pl
SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.xml
SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.pl
SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.xml
SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.py
SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.xml
SMART/data/REF.fasta
SMART/data/REF.fasta.fai
SMART/data/SR1.fasta
SMART/data/SR1.fastq
SMART/data/SR2.fastq
SMART/data/bamFile.bam
SMART/data/dummy.fasta
SMART/data/expRef.fasta
SMART/data/expRef_withoutSplit.fasta
SMART/data/output.png
SMART/data/part0.bam
SMART/data/part0.sam
SMART/data/part1.bam
SMART/data/part1.sam
SMART/data/part2.bam
SMART/data/part2.sam
SMART/data/part3.bam
SMART/data/part3.sam
SMART/data/part4.bam
SMART/data/part4.sam
SMART/data/samFile.sam
SMART/data/sortedBamFile.bam
SMART/data/test.gff.gff3
SMART/data/test_clusterize.gff3
SMART/data/test_clusterize2.gff3
SMART/galaxy/CleanTranscriptFile.xml
SMART/galaxy/Clusterize.xml
SMART/galaxy/CollapseReads.xml
SMART/galaxy/CompareOverlappingSmallQuery.xml
SMART/galaxy/CompareOverlappingSmallRef.xml
SMART/galaxy/ConvertTranscriptFile.xml
SMART/galaxy/ConvertTranscriptFile_BedToCsv.xml
SMART/galaxy/ConvertTranscriptFile_BedToGff2.xml
SMART/galaxy/ConvertTranscriptFile_BedToGff3.xml
SMART/galaxy/ConvertTranscriptFile_BedToSam.xml
SMART/galaxy/ConvertTranscriptFile_BlastToCsv.xml
SMART/galaxy/ConvertTranscriptFile_BlastToGff2.xml
SMART/galaxy/ConvertTranscriptFile_BlastToGff3.xml
SMART/galaxy/ConvertTranscriptFile_BlastToSam.xml
SMART/galaxy/ConvertTranscriptFile_FastqToFasta.xml
SMART/galaxy/ConvertTranscriptFile_Gff2ToCsv.xml
SMART/galaxy/ConvertTranscriptFile_Gff2ToGff3.xml
SMART/galaxy/ConvertTranscriptFile_Gff2ToSam.xml
SMART/galaxy/ConvertTranscriptFile_Gff3ToCsv.xml
SMART/galaxy/ConvertTranscriptFile_Gff3ToGff2.xml
SMART/galaxy/ConvertTranscriptFile_Gff3ToSam.xml
SMART/galaxy/ConvertTranscriptFile_Gff3ToWig.xml
SMART/galaxy/ConvertTranscriptFile_SamToCsv.xml
SMART/galaxy/ConvertTranscriptFile_SamToGff2.xml
SMART/galaxy/ConvertTranscriptFile_SamToGff3.xml
SMART/galaxy/CountReadGCPercent.xml
SMART/galaxy/DiffExpAnal.xml
SMART/galaxy/FindOverlaps_optim.xml
SMART/galaxy/GetDifferentialExpression.xml
SMART/galaxy/GetFlanking.xml
SMART/galaxy/SelectByTag.xml
SMART/galaxy/WrappGetLetterDistribution.py
SMART/galaxy/WrappGetLetterDistribution.xml
SMART/galaxy/__init__.py
SMART/galaxy/changeGffFeatures.xml
SMART/galaxy/changeTagName.xml
SMART/galaxy/cleanGff.xml
SMART/galaxy/clusterize.xml
SMART/galaxy/clusterizeBySlidingWindows.xml
SMART/galaxy/compareOverlapping.xml
SMART/galaxy/computeCoverage.xml
SMART/galaxy/coordinatesToSequence.xml
SMART/galaxy/findTss.xml
SMART/galaxy/getDifference.xml
SMART/galaxy/getDistance.xml
SMART/galaxy/getDistribution.xml
SMART/galaxy/getExons.xml
SMART/galaxy/getIntrons.xml
SMART/galaxy/getNb.xml
SMART/galaxy/getReadDistribution.xml
SMART/galaxy/getSequence.xml
SMART/galaxy/getSizes.xml
SMART/galaxy/getWigData.xml
SMART/galaxy/getWigDistance.xml
SMART/galaxy/getWigProfile.xml
SMART/galaxy/mapperAnalyzer.xml
SMART/galaxy/mappingToCoordinates.xml
SMART/galaxy/mergeSlidingWindowsClusters.xml
SMART/galaxy/mergeTranscriptLists.xml
SMART/galaxy/modifyFasta.xml
SMART/galaxy/modifyGenomicCoordinates.xml
SMART/galaxy/modifySequenceList.xml
SMART/galaxy/plot.xml
SMART/galaxy/plotCoverage.xml
SMART/galaxy/plotGenomeCoverage.xml
SMART/galaxy/plotRepartition.xml
SMART/galaxy/plotTranscriptList.xml
SMART/galaxy/qualToFastq.xml
SMART/galaxy/removeExonLines.sh
SMART/galaxy/removeExonLines.xml
SMART/galaxy/restrictFromSize.xml
SMART/galaxy/restrictSequenceList.xml
SMART/galaxy/restrictTranscriptList.xml
SMART/galaxy/test/CollapseReads.xml
SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py
SMART/galaxy/test/__init__.py
SMART/galaxy/testArgum.xml
SMART/galaxy/testR.xml
SMART/galaxy/trimAdaptor.xml
SMART/galaxy/trimSequences.xml
commons/__init__.py
commons/__init__.pyc
commons/core/LoggerFactory.py
commons/core/__init__.py
commons/core/checker/AbstractChecker.py
commons/core/checker/CheckerException.py
commons/core/checker/CheckerUtils.py
commons/core/checker/ConfigChecker.py
commons/core/checker/ConfigException.py
commons/core/checker/ConfigValue.py
commons/core/checker/IChecker.py
commons/core/checker/OldConfigChecker.py
commons/core/checker/RepetException.py
commons/core/checker/RepetException.pyc
commons/core/checker/__init__.py
commons/core/checker/__init__.pyc
commons/core/checker/test/TestSuite_Checker.py
commons/core/checker/test/Test_CheckerUtils.py
commons/core/checker/test/Test_ConfigChecker.py
commons/core/checker/test/Test_ConfigValue.py
commons/core/checker/test/Test_F_ConfigChecker.py
commons/core/checker/test/Test_OldConfigChecker.py
commons/core/checker/test/__init__.py
commons/core/coord/Align.py
commons/core/coord/Align.pyc
commons/core/coord/AlignUtils.py
commons/core/coord/ConvCoord.py
commons/core/coord/Map.py
commons/core/coord/Map.pyc
commons/core/coord/MapUtils.py
commons/core/coord/Match.py
commons/core/coord/MatchUtils.py
commons/core/coord/MergedRange.py
commons/core/coord/Path.py
commons/core/coord/PathUtils.py
commons/core/coord/Range.py
commons/core/coord/Range.pyc
commons/core/coord/Set.py
commons/core/coord/SetUtils.py
commons/core/coord/SlidingWindow.py
commons/core/coord/__init__.py
commons/core/coord/__init__.pyc
commons/core/coord/align2set.py
commons/core/coord/test/TestSuite_coord.py
commons/core/coord/test/Test_Align.py
commons/core/coord/test/Test_AlignUtils.py
commons/core/coord/test/Test_ConvCoord.py
commons/core/coord/test/Test_F_ConvCoord.py
commons/core/coord/test/Test_Map.py
commons/core/coord/test/Test_MapUtils.py
commons/core/coord/test/Test_Match.py
commons/core/coord/test/Test_MatchUtils.py
commons/core/coord/test/Test_MergedRange.py
commons/core/coord/test/Test_Path.py
commons/core/coord/test/Test_PathUtils.py
commons/core/coord/test/Test_Range.py
commons/core/coord/test/Test_Set.py
commons/core/coord/test/Test_SetUtils.py
commons/core/coord/test/Test_SlidingWindow.py
commons/core/coord/test/__init__.py
commons/core/parsing/AxtParser.py
commons/core/parsing/AxtParser.pyc
commons/core/parsing/BamParser.py
commons/core/parsing/BamParser.pyc
commons/core/parsing/BedParser.py
commons/core/parsing/BedParser.pyc
commons/core/parsing/BlastParser.py
commons/core/parsing/BlastParser.pyc
commons/core/parsing/BlatFileParser.py
commons/core/parsing/BlatParser.py
commons/core/parsing/BlatToGff.py
commons/core/parsing/BlatToGffForBesPaired.py
commons/core/parsing/BowtieParser.py
commons/core/parsing/BowtieParser.pyc
commons/core/parsing/CoordsParser.py
commons/core/parsing/CoordsParser.pyc
commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py
commons/core/parsing/ElandParser.py
commons/core/parsing/ElandParser.pyc
commons/core/parsing/ExoParser.py
commons/core/parsing/ExoParser.pyc
commons/core/parsing/FastaParser.py
commons/core/parsing/FastaParser.pyc
commons/core/parsing/FastqParser.py
commons/core/parsing/FastqParser.pyc
commons/core/parsing/FindRep.py
commons/core/parsing/GbParser.py
commons/core/parsing/GffParser.py
commons/core/parsing/GffParser.pyc
commons/core/parsing/GtfParser.py
commons/core/parsing/GtfParser.pyc
commons/core/parsing/MapParser.py
commons/core/parsing/MapParser.pyc
commons/core/parsing/MapperParser.py
commons/core/parsing/MapperParser.pyc
commons/core/parsing/MaqParser.py
commons/core/parsing/MaqParser.pyc
commons/core/parsing/MrepsToSet.py
commons/core/parsing/Multifasta2SNPFile.py
commons/core/parsing/MummerParser.py
commons/core/parsing/NCListParser.py
commons/core/parsing/NCListParser.pyc
commons/core/parsing/NucmerParser.py
commons/core/parsing/PalsToAlign.py
commons/core/parsing/ParserChooser.py
commons/core/parsing/ParserChooser.pyc
commons/core/parsing/PathNum2Id.py
commons/core/parsing/PilerTAToGrouperMap.py
commons/core/parsing/PklParser.py
commons/core/parsing/PklParser.pyc
commons/core/parsing/PslParser.py
commons/core/parsing/PslParser.pyc
commons/core/parsing/README_MultiFasta2SNPFile
commons/core/parsing/RmapParser.py
commons/core/parsing/RmapParser.pyc
commons/core/parsing/SamParser.py
commons/core/parsing/SamParser.pyc
commons/core/parsing/SeqmapParser.py
commons/core/parsing/SeqmapParser.pyc
commons/core/parsing/SequenceListParser.py
commons/core/parsing/SequenceListParser.pyc
commons/core/parsing/ShrimpParser.py
commons/core/parsing/ShrimpParser.pyc
commons/core/parsing/Soap2Parser.py
commons/core/parsing/Soap2Parser.pyc
commons/core/parsing/SoapParser.py
commons/core/parsing/SoapParser.pyc
commons/core/parsing/SsrParser.py
commons/core/parsing/TranscriptListParser.py
commons/core/parsing/TranscriptListParser.pyc
commons/core/parsing/VarscanFile.py
commons/core/parsing/VarscanFileForGnpSNP.py
commons/core/parsing/VarscanHit.py
commons/core/parsing/VarscanHitForGnpSNP.py
commons/core/parsing/VarscanHit_WithTag.py
commons/core/parsing/VarscanHit_v2_2_8.py
commons/core/parsing/VarscanHit_v2_2_8_WithTag.py
commons/core/parsing/VarscanToVCF.py
commons/core/parsing/WigParser.py
commons/core/parsing/WigParser.pyc
commons/core/parsing/__init__.py
commons/core/parsing/__init__.pyc
commons/core/parsing/multifastaParserLauncher.py
commons/core/parsing/test/Test_BedParser.py
commons/core/parsing/test/Test_BlatFileParser.py
commons/core/parsing/test/Test_BlatParser.py
commons/core/parsing/test/Test_BlatToGff.py
commons/core/parsing/test/Test_BlatToGffForBesPaired.py
commons/core/parsing/test/Test_BowtieParser.py
commons/core/parsing/test/Test_CoordsParser.py
commons/core/parsing/test/Test_CrossSsrAndBesMappedByBlatToGff.py
commons/core/parsing/test/Test_F_BlatToGff.py
commons/core/parsing/test/Test_F_BlatToGffForBesPaired.py
commons/core/parsing/test/Test_F_CrossSsrAndBesMappedByBlatToGff.py
commons/core/parsing/test/Test_F_VarscanToVCF.py
commons/core/parsing/test/Test_FastaParser.py
commons/core/parsing/test/Test_FindRep.py
commons/core/parsing/test/Test_GffParser.py
commons/core/parsing/test/Test_MapParser.py
commons/core/parsing/test/Test_MrepsToSet.py
commons/core/parsing/test/Test_Multifasta2SNPFile.py
commons/core/parsing/test/Test_Multifasta2SNPFileWriter.py
commons/core/parsing/test/Test_PalsToAlign.py
commons/core/parsing/test/Test_PathNum2Id.py
commons/core/parsing/test/Test_PslParser.py
commons/core/parsing/test/Test_SsrParser.py
commons/core/parsing/test/Test_VarscanFile.py
commons/core/parsing/test/Test_VarscanFileForGnpSNP.py
commons/core/parsing/test/Test_VarscanHit.py
commons/core/parsing/test/Test_VarscanHitForGnpSNP.py
commons/core/parsing/test/Test_VarscanHit_WithTag.py
commons/core/parsing/test/Test_VarscanHit_v2_2_8.py
commons/core/parsing/test/Test_VarscanHit_v2_2_8_WithTag.py
commons/core/parsing/test/Test_VarscanToVCF.py
commons/core/parsing/test/Test_WigParser.py
commons/core/parsing/test/Test_pilerTAToGrouperMap.py
commons/core/parsing/test/__init__.py
commons/core/parsing/test/data/ExpPotDooblonsSubSNP.csv
commons/core/parsing/test/data/Wig/chr1.wig
commons/core/parsing/test/data/realExpBatchLine.csv
commons/core/parsing/test/data/realExpIndividual.csv
commons/core/parsing/test/data/realExpSequences.fsa
commons/core/parsing/test/data/realExpSubSNP.csv
commons/core/parsing/test/data/real_multifasta_input.fasta
commons/core/parsing/test/data/sampleForTestVarscanToVCF.varscan
commons/core/parsing/test/data/test.wig
commons/core/parsing/test/data/test1.wig
commons/core/parsing/test/data/test2.wig
commons/core/parsing/test/data/testBedParser1.bed
commons/core/parsing/test/data/testCoordsParser.coords
commons/core/parsing/test/data/testCoordsParser_showcoord.coords
commons/core/parsing/test/data/testCoordsParser_showcoord_promer.coords
commons/core/parsing/test/data/testGffParser1.gff3
commons/core/seq/AlignedBioseqDB.py
commons/core/seq/Bioseq.py
commons/core/seq/Bioseq.pyc
commons/core/seq/BioseqDB.py
commons/core/seq/BioseqUtils.py
commons/core/seq/ClusterConsensusCollection.py
commons/core/seq/FastaUtils.py
commons/core/seq/__init__.py
commons/core/seq/__init__.pyc
commons/core/seq/test/TestClusterConsensusCollection.py
commons/core/seq/test/TestSuite_seq.py
commons/core/seq/test/Test_AlignedBioseqDB.py
commons/core/seq/test/Test_Bioseq.py
commons/core/seq/test/Test_BioseqDB.py
commons/core/seq/test/Test_BioseqUtils.py
commons/core/seq/test/Test_FastaUtils.py
commons/core/seq/test/Utils_for_T_FastaUtils.py
commons/core/seq/test/__init__.py
commons/core/utils/FileUtils.py
commons/core/utils/PipelineStepFTests.py
commons/core/utils/RepetConfigParser.py
commons/core/utils/RepetOptionParser.py
commons/core/utils/RepetOptionParser.pyc
commons/core/utils/__init__.py
commons/core/utils/__init__.pyc
commons/core/utils/test/TestSuite_utils.py
commons/core/utils/test/Test_FileUtils.py
commons/core/utils/test/__init__.py
commons/core/writer/BedWriter.py
commons/core/writer/BedWriter.pyc
commons/core/writer/CsvWriter.py
commons/core/writer/CsvWriter.pyc
commons/core/writer/EmblWriter.py
commons/core/writer/EmblWriter.pyc
commons/core/writer/FastaWriter.py
commons/core/writer/FastaWriter.pyc
commons/core/writer/FastqWriter.py
commons/core/writer/FastqWriter.pyc
commons/core/writer/GbWriter.py
commons/core/writer/GbWriter.pyc
commons/core/writer/Gff2Writer.py
commons/core/writer/Gff2Writer.pyc
commons/core/writer/Gff3Writer.py
commons/core/writer/Gff3Writer.pyc
commons/core/writer/GtfWriter.py
commons/core/writer/GtfWriter.pyc
commons/core/writer/MapWriter.py
commons/core/writer/MapWriter.pyc
commons/core/writer/MySqlTranscriptWriter.py
commons/core/writer/MySqlTranscriptWriter.pyc
commons/core/writer/SamWriter.py
commons/core/writer/SamWriter.pyc
commons/core/writer/SequenceListWriter.py
commons/core/writer/SequenceListWriter.pyc
commons/core/writer/TranscriptListWriter.py
commons/core/writer/TranscriptListWriter.pyc
commons/core/writer/TranscriptWriter.py
commons/core/writer/TranscriptWriter.pyc
commons/core/writer/UcscWriter.py
commons/core/writer/UcscWriter.pyc
commons/core/writer/WigWriter.py
commons/core/writer/WigWriter.pyc
commons/core/writer/WriterChooser.py
commons/core/writer/WriterChooser.pyc
commons/core/writer/__init__.py
commons/core/writer/__init__.pyc
commons/core/writer/test/Test_Gff3Writer.py
commons/core/writer/test/Test_MapWriter.py
commons/core/writer/test/__init__.py
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/HTseqClean.R
--- a/SMART/DiffExpAnal/DESeqTools/HTseqClean.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,19 +0,0 @@
-# HTseqClean
-# remove extra counts out of genes
-# for HTseq output
-
-# input : rawCounts
-# output : cleaned rawCounts
-
-# created Feb 6th, 2012
-# Modified Feb 16th, 2012
-# Marie-Agnes Dillies
-
-
-HTseqClean <- function( rawCounts ){
-
-  row2remove <- c("alignment_not_unique", "ambiguous", "no_feature", "not_aligned", "too_low_aQual")
-  rawCounts <- rawCounts[!rawCounts$Id %in% row2remove,]
-  rawCounts[is.na(rawCounts)] <- 0
-  return(rawCounts)
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/MAplotDE.R
--- a/SMART/DiffExpAnal/DESeqTools/MAplotDE.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,16 +0,0 @@
-# MAplotDE
-# MAplot of DE genes
-
-# input : res, alpha,OUT_MAplotDEName 
-# output : MAplot (png)
-
-MAplotDE <- function( res, alpha, OUT_MAplotDEName, out = TRUE ){
-
-  if (out) png( file=OUT_MAplotDEName )
-  
-  plot( res$baseMean, res$log2FoldChange, pch=".", xlab="Mean expression", ylab="log2FC", main="",
-   log="x", col=ifelse(res$padj < alpha, "red", "black") )
- abline(h=0, col="red")
-
-   if (out) dev.off()
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/RNAseqFunctions.R
--- a/SMART/DiffExpAnal/DESeqTools/RNAseqFunctions.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,38 +0,0 @@
-# RNAseqFunctions
-# when sourced, sources all R functions associated with RNAseq data analysis
-
-RNAseqFunctions <- function( RfuncDir ){
-  
-  source(paste(RfuncDir, "loadTargetFile.R", sep=""))
-  source(paste(RfuncDir, "loadCountData.R", sep=""))
-# source(paste(RfuncDir, "loadStrandData.R", sep=""))
-  source(paste(RfuncDir, "HTseqClean.R", sep=""))
-  source(paste(RfuncDir, "raw2counts.R", sep=""))
-  source(paste(RfuncDir, "barplotTC.R", sep=""))
-  source(paste(RfuncDir, "barplotNul.R", sep=""))
-  source(paste(RfuncDir, "removeNul.R", sep=""))
-  source(paste(RfuncDir, "densityPlot.R", sep=""))
-  source(paste(RfuncDir, "boxplotCounts.R", sep=""))
-  source(paste(RfuncDir, "majSequence.R", sep=""))
-  source(paste(RfuncDir, "clusterPlot.R", sep=""))
-  source(paste(RfuncDir, "pairwiseSERE.R", sep=""))
-  source(paste(RfuncDir, "pairwiseScatterPlots.R", sep=""))
-#  source(paste(RfuncDir, "pairwiseScatterPlotsAll.R", sep=""))
-  source(paste(RfuncDir, "plotDispEstimates.R", sep=""))
-#  source(paste(RfuncDir, "deseqByCond.R", sep="")) 
-#  source(paste(RfuncDir, "edgeRByCond.R", sep=""))  
-#  source(paste(RfuncDir, "fisher.R", sep=""))
-  source(paste(RfuncDir, "histoRawp.R", sep=""))
-#  source(paste(RfuncDir, "histoRawpMconds.R", sep=""))
-  source(paste(RfuncDir, "MAplotDE.R", sep=""))
-#  source(paste(RfuncDir, "MAplotDEMconds.R", sep=""))
-  source(paste(RfuncDir, "exportComplete.R", sep=""))
-#  source(paste(RfuncDir, "exportCompleteEdgeR.R", sep=""))
-#  source(paste(RfuncDir, "exportCompleteFisher.R", sep=""))
-#  source(paste(RfuncDir, "exportCompleteMconds.R", sep=""))
-#  source(paste(RfuncDir, "exportCompleteByCond.R", sep=""))
-#  source(paste(RfuncDir, "exportCompletePaired.R", sep=""))
-  source(paste(RfuncDir, "exportDiff.R", sep=""))
-#  source(paste(RfuncDir, "synthese.R", sep=""))
-#  source(paste(RfuncDir, "exportDiffByCond.R", sep=""))
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R
--- a/SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,191 +0,0 @@
-# Analyse differentielle de donnees d expression par gene
-# avec DESeq
-# 2 conditions
-
-args <- commandArgs()
-#print(args[1])
-#print(args[2])
-#print(args[3])
-#print(args[4])
-#print(args[5])
-#print(args[6])
-#output file names
-#print(args[7]) # HTML file name
-#print(args[8]) # HTML file all images directory 
-#print(args[9]) # complete xls file name
-#print(args[10])# UP xls file name
-#print(args[11]) #Down xls file name
-#print(args[12]) #the executable scipt (for getting the path)
-
-library(R2HTML)
-library(R.utils)
-
-#run example: 
-projectName <- "DESeqAnalysis"
-analysisVersion <- "V1"    # fitType=local, sharingMode=fit-only, method=blind 
-rawDir <- "raw"
-targetFile <- args[4]
-header <- as.integer(args[5]) #si on a header ou pas, si on a, header=1, sinon header=0
-withOutReplicates <- as.integer(args[6])
-
-#get the directory to write the results
-tab <- splitByPattern(args[7], pattern="/")
-res_dir <- ""
-for (e in tab[1:length(tab)-1]) { res_dir <- paste(res_dir, e, sep="")}
-#get the html output file name
-OUT_HTMLname <- args[7]
-#get the images directory to write to
-OUT_imgDir <- args[8]
-#if the directory dosen't existe, we should create it first
-
-alpha <- 0.05
-adjMethod <- "BH"
-outfile <- T
-runningScriptTab <-  splitByPattern(args[12], pattern="/")
-RfuncDir <- ""
-for (r in runningScriptTab[1:length(runningScriptTab)-1]) { RfuncDir <- paste(RfuncDir, r, sep="")} #find the path of executable script  
-RfuncDir <- paste(RfuncDir, "DESeqTools/", sep="") #define the function files path
-# Dossier contenant les fonctions
-print(RfuncDir)
-source( paste(RfuncDir, "RNAseqFunctions.R", sep="/") )
-
-# Chargement des packages et des fonctions
-library(DESeq)
-RNAseqFunctions(RfuncDir)
-# Chargement du target file
-target <- loadTargetFile( targetFile, header )
-# Chargement des donnees, construction d'une table de comptages par gene
-#have changed
-rawCounts <- loadCountData( target, header )
-conds <- unique(target$group)
-cond1 <- as.character(conds[1])
-cond2 <- as.character(conds[!conds == conds[1]])
-rawCounts <- HTseqClean( rawCounts )
-
-# Transformation en matrice de comptages
-counts <- raw2counts( rawCounts )[[1]]
-
-# Nombre de reads par echantillon
-OUT_barplotTCName <- paste(OUT_imgDir, "barplotTC.png", sep="/")
-barplotTC( counts, target$group, OUT_barplotTCName, out=outfile )
-
-# Proportion comptages nuls
-OUT_barplotNulName <- paste(OUT_imgDir, "barplotNul.png", sep="/")
-barplotNul( counts, target$group, OUT_barplotNulName, out=outfile )
-
-# Suppression comptages nuls
-counts <- removeNul( counts )[[1]]
-
-# Density plot
-OUT_densityPlotName <- paste(OUT_imgDir, "densityPlot.png", sep="/")
-densityPlot( counts, target$group, OUT_densityPlotName, out=outfile )
-
-# Boxplot
-OUT_boxplotCountsName <- paste(OUT_imgDir, "boxplotCounts.png", sep="/")
-boxplotCounts( counts, target$group, type = c("raw", "norm"), OUT_boxplotCountsName, out=outfile )
-# Sequence majoritaire
-OUT_majSequenceName <- paste(OUT_imgDir, "majSequence.png", sep="/")
-majSequence( counts, target$group, OUT_majSequenceName, out=outfile )
-
-# ScatterPlot between two samples
-OUT_scatterPlot <- paste(OUT_imgDir, "scatterPlot.png", sep="/")
-pairwiseScatterPlots(counts, target, OUT_scatterPlot, out=outfile, pdffile=FALSE)
-
-# SERE coefficient calculation (Poisson hypothesis for replicates techiques), to know if the variability between the réplicates or the conditons is hight or not. 
-coef <- pairwiseSERE(counts)
-print(coef)
-coef
-# Creation structure de donnees cds, !! we use newCountDataset because that we have first column not numeric, and DESeq dosen't take non numeric values.
-cds <- newCountDataSet( counts, target$group )
-
-# Diagnostic for clustering of non-normalized samples
-OUT_clusterPlot_before <- paste(OUT_imgDir, "clusteringOfSamplesBefore.png", sep="/")
-clusterPlot(cds, OUT_clusterPlot_before, out=outfile)
-
-
-# Normalisation (calcul des lib size factors )
-cds <- estimateSizeFactors( cds )
-
-# Estimation de la dispersion
-# parametres: 
- # method: how samples are pooled to estimate dispersion. If no replicates use "blind"
- # sharingMode: how variance estimate is computed with respect to the fitted line. 
- #  "Maximum" is the most conservative (max between fit and estimation), "fit-only" keeps the estimated value
- # fitType: refers to the model. "Local" is the published model, "parametric" is glm-based (may not converge), now we use "parametric" as default value.
-#in this case, without replicates
-if(withOutReplicates!=0){
- cds <- estimateDispersions( cds, sharingMode="fit-only", method="blind")
-} else if(withOutReplicates==0){
- #cds <- estimateDispersions( cds, sharingMode="fit-only", fitType="local")}
- cds <- estimateDispersions( cds)}
-# Analyse differentielle, ajustement BH par defaut
-res <- nbinomTest( cds, cond1, cond2)  
-
-# Diagnostic for clustering of normalized samples
-OUT_clusterPlot <- paste(OUT_imgDir, "clusteringOfSamples.png", sep="/")
-clusterPlot(cds, OUT_clusterPlot, out=outfile)
-
-# Control plot of dispersion estimates
-OUT_plotDispEstimatesName <- paste(OUT_imgDir, "disperssionEstimates.png", sep="/")
-plotDispEstimates( cds, OUT_plotDispEstimatesName, out=outfile )
-
-# Distribution of raw p-values
-OUT_histoRawpName <- paste(OUT_imgDir, "histoRawPvalue.png", sep="/")
-histoRawp( res, OUT_histoRawpName, out=outfile )
-
-# MAplot showing DE genes
-OUT_MAplotDEName <- paste(OUT_imgDir, "MAplotDE.png", sep="/")
-MAplotDE( res, alpha, OUT_MAplotDEName, out=outfile )
-
-# export complete data
-OUT_completeName <- args[9]
-complete <- exportComplete( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out=outfile )
-
-# export significant genes
-OUT_upName <- args[10]
-OUT_downName <- args[11]
-diff <- exportDiff( complete, alpha, adjMethod, OUT_upName, OUT_downName, out=outfile )
-
-# write all images results into an HTML file
-prefixHTMLname <- tab[length(tab)]
-#HTMLCSS(file.path(res_dir), filename=prefixHTMLname, CSSfile="R2HTML")
-HTMLInitFile(file.path(res_dir), filename=prefixHTMLname, BackGroundColor="white")
-HTML.title("<center>Differential Expression DESeq analysis.", HR=1) 
-HTML.title("<center>BarplotTC: number of RNA-seq reads per sample.", HR=2)
-     HTMLInsertGraph("barplotTC.png")
-
-HTML.title("<center>BarplotNul: number of RNA-seq reads that the count is 0 (nul).", HR=2)
- HTMLInsertGraph("barplotNul.png")
-
-HTML.title("<center>DensityPlot: density of each sample.", HR=2)
- HTMLInsertGraph("densityPlot.png")
-
-HTML.title("<center>Boxplot: number of RNA-seq reads distribution per sample.", HR=2)
- HTMLInsertGraph("boxplotCounts.png")
-
-HTML.title("<center>MajorSequence: the proportion of reads associated with the most expressed sequence.", HR=2)
- HTMLInsertGraph("majSequence.png")
-
-HTML.title("<center>ScatterPlot: Scatter plot of samples.", HR=2)
- HTMLInsertGraph("scatterPlot.png")
-
-HTML.title("<center>Clustering Of No-Normalized Samples: Representing the no-normalized samples in Diagnostic.", HR=2)
- HTMLInsertGraph("clusteringOfSamplesBefore.png")
-
-HTML.title("<center>Clustering Of Normalized Samples: Representing the normalized samples in Diagnostic.", HR=2)
- HTMLInsertGraph("clusteringOfSamples.png")
-
-HTML.title("<center>DispersionEstimates: representing dispersion estimates vs mean expression.", HR=2)
- HTMLInsertGraph("disperssionEstimates.png")
-
-HTML.title("<center>HistoRawPValue: histogram of raw p-value.", HR=2)
- HTMLInsertGraph("histoRawPvalue.png")
-
-HTML.title("<center>MAplotDE: the differentially expressed genes (red point).", HR=2)
- HTMLInsertGraph("MAplotDE.png")
-HTMLEndFile()
-absoluPrefixHTMLname <- paste(res_dir, prefixHTMLname, sep="")
-outName <- paste(absoluPrefixHTMLname, ".html", sep="")
-# change name is to be adapted into Galaxy
-file.rename(outName, OUT_HTMLname)
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/barplotNul.R
--- a/SMART/DiffExpAnal/DESeqTools/barplotNul.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,19 +0,0 @@
-# barplotNul
-# barplot representing null counts per sample
-
-# input : counts, target, projectName
-# output : barplotNul (png)
-
-# created Feb 7th, 2012
-# modified April 30th, 2012 (target$group instead of target)
-
-barplotNul <- function( counts, group,  OUT_barplotNulName, out = TRUE ){
-
-  if (out) png( file=OUT_barplotNulName )
-
-  N <- apply(counts, 2, function(x){sum(x == 0)})/nrow(counts)
-  barplot(N, col=as.integer(group)+1, main = "Proportion of null counts per Sample", ylim = c(0,1))
-  legend("topright", as.character(unique(group)), lty=1, col=as.integer(unique(group))+1)
-
-  if (out) dev.off()
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/barplotTC.R
--- a/SMART/DiffExpAnal/DESeqTools/barplotTC.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,20 +0,0 @@
-# barplotTC
-# barplot representing total count per sample
-
-# input : counts, target, projectName
-# output : barplotTC (png)
-
-# created Feb 7th, 2012
-# modified April 30th, 2012 (group instead of target$group)
-
-barplotTC <- function( counts, group, OUT_barplotTCName, out = TRUE ){
-
-  if (out) png( file=OUT_barplotTCName )
-
-  ylim <- c(0, max(colSums(counts))*1.2)
-  barplot( colSums(counts), col=as.integer(group)+1, main = "Total Read Count per Sample",  ylim=ylim )
-  legend( "topright", as.character(unique(group)), lty=1,
-         col=as.integer(unique(group))+1 )
-
-  if (out) dev.off()
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/boxplotCounts.R
--- a/SMART/DiffExpAnal/DESeqTools/boxplotCounts.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,18 +0,0 @@
-# boxplotCounts
-# boxplots representing counts distribution per sample
-
-# input : counts, target, projectName, type of data (raw or norm)
-# output : boxplot (png)
-
-# created Feb 7th, 2012
-# modified April 30th, 2012
-
-boxplotCounts <- function( counts, group, type = c("raw", "norm"), OUT_boxplotCountsName, out = TRUE ){
-
-  if (out) png( file=OUT_boxplotCountsName )
-
-  boxplot( log2(counts+1), col=as.integer(group)+1, main = paste(type[1], " counts distribution", sep="" ) )
-  legend( "topright", as.character(unique(group)), lty=1, col=as.integer(unique(group))+1 )
-
-  if (out) dev.off()
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/clusterPlot.R
--- a/SMART/DiffExpAnal/DESeqTools/clusterPlot.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,27 +0,0 @@
-# clusterPlot
-# dendrogram of sample clustering
-
-# input : counts, outputName, type of data (raw or norm)
-# output : dendrogram (jpeg)
-
-# created Sept 13th, 2012
-# modified Oct 30th, 2012
-# Marie-Agnes Dillies
-
-
-clusterPlot <- function( cds, OUT_clusterPlot, type = "raw", out = TRUE ){
-
-  if (out) png( file=OUT_clusterPlot )
-
-  if (type == "norm"){
-    cdsblind <- estimateDispersions( cds, method="blind" )
-    vsd <- getVarianceStabilizedData( cdsblind )
-  }
-  else {
-    vsd <- counts(cds)
-  }
-  hc <- hclust( dist(t(vsd)), method="ward" )
-  plot( hc, xlab = "Euclidean distance, Ward criterion", main=paste("Cluster Dendrogram, ", type, " data", sep="") )
-
-  if (out) dev.off()
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/densityPlot.R
--- a/SMART/DiffExpAnal/DESeqTools/densityPlot.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,23 +0,0 @@
-# densityPlot
-# density plot of all samples
-
-# input : counts, target, projectName
-# output : densplot (png)
-
-# created Feb 7th, 2012
-# modified April 30th, 2012
-
-
-densityPlot <- function( counts, group, OUT_densityPlotName, out = TRUE ){
-
-  if (out) png( file=OUT_densityPlotName )
-
-  couleurs <- as.integer( group ) + 1
-  ylim <- c(0, max(density(log2(counts)+1)$y)*1.5)
-  plot( density(log2(counts[,1])+1), main="Density of counts distribution", col=couleurs[1], ylim = ylim )
-  for (i in 2:ncol(counts))
-   lines( density(log2(counts[,i])+1), col=couleurs[i] )
-  legend( "topright", as.character(unique(group)), lty=1, col=as.integer(unique(group))+1 )
-
-  if (out) dev.off()
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/exportComplete.R
--- a/SMART/DiffExpAnal/DESeqTools/exportComplete.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,20 +0,0 @@
-# exportComplete
-# export complete data and results
-
-# input : counts res, target
-# output : complete data and xls file (in text format)
-
-# created Feb 14th, 2012
-# modified March 9th, 2012 (order of cond1 and cond2)
-
-
-exportComplete <- function( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out = T ){
-
- complete <- data.frame( res$id, counts, res[,3:ncol(res)] )
- colnames(complete) <- c( "id", as.character(target$label), cond2, cond1, "FC", "log2FC", "rawp", 
- paste("adjp",adjMethod,sep="") )
-
-  if (out)
-   write.table( complete, file=OUT_completeName, sep="\t", row.names=F )
-  return( complete )
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/exportDiff.R
--- a/SMART/DiffExpAnal/DESeqTools/exportDiff.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,42 +0,0 @@
-# exportDiff
-# export differentially expressed genes
-
-# input : complete, alpha, adjMethod, projectName
-# output : diff genes, up and down in xls files
-
-# created Feb 14th, 2012
-
-
-exportDiff <- function( complete, alpha, adjMethod, OUT_upName, OUT_downName, out = T ){
-
- diff <- complete[which(complete[,grep("adjp",colnames(complete))] < alpha),]
-
- gup <- up( diff )
- gdown <- down( diff )
-
-  if (out){
-    gup[,(ncol(gup)-4):ncol(gup)] <- format( gup[,(ncol(gup)-4):ncol(gup)], digits=3, dec=",")
-    gdown[,(ncol(gdown)-4):ncol(gdown)] <- format( gdown[,(ncol(gdown)-4):ncol(gdown)], digits=3, dec=",")
- write.table(gup, file=OUT_upName, row.names=F, sep="\t")
- write.table(gdown, file=OUT_downName, row.names=F, sep="\t")
-  }
-  return( diff )
-}
-
-
-up <- function( diff ){
-
- up <- diff[diff$log2FC > 0,]
- up <- up[order(up[,grep("adjp",colnames(up))]),]
-
- return( up )
-}
-
-
-down <- function( diff ){
-
- down <- diff[diff$log2FC < 0,]
- down <- down[order(down[,grep("adjp",colnames(down))]),]
-
- return( down )
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/histoRawp.R
--- a/SMART/DiffExpAnal/DESeqTools/histoRawp.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,18 +0,0 @@
-# histoRawp
-# histogram of raw p-values
-
-# input : res, OUT_histoRawpName
-# output : histogram (png)
-
-
-histoRawp <- function( res, OUT_histoRawpName, out = TRUE ){
-
-  if (out) png( file=OUT_histoRawpName )
-  
-  ind <- grep("val", colnames(res))
-  hist( res[,ind], nclass=50, xlab="Raw p-values", main="", col="skyblue" )
-
-  if (out) dev.off()
-}
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/loadCountData.R
--- a/SMART/DiffExpAnal/DESeqTools/loadCountData.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,36 +0,0 @@
-# loadCountData
-# loads counts, one file per lane
-# file names from target file
-
-# input : target
-# output : raw count table
-
-# created Feb 6th, 2012
-# modified May 2nd, 2012 (colnames -> target$label)
-# Marie-Agnes Dillies
-
-
-loadCountData <- function(target, header){
-
-  require(DESeq)
-  fileNames <- target$files
-
-if(header!=0){
- #rawCounts <- read.table(as.character(paste(rawDir,target$files[1],sep="/")), sep="\t", header=TRUE)
- rawCounts <- read.table(as.character(target$files[1],sep="/"), sep="\t", header=TRUE)
-} else if(header==0){
- rawCounts <- read.table(as.character(target$files[1],sep="/"), sep="\t")}
-  
-  colnames(rawCounts) <- c("Id", as.character(target$label[1]))
-
-  for (i in 2:length(fileNames)){
- if(header!=0){
-   tmp <- read.table(as.character(target$files[i],sep="/"), sep="\t", header=TRUE)
- } else if(header==0){
- tmp <- read.table(as.character(target$files[i],sep="/"), sep="\t")}
-   colnames(tmp) <- c("Id", as.character(target$label[i]))
-   rawCounts <- merge(rawCounts, tmp, by="Id", all=T)
-  }
-  rawCounts[is.na(rawCounts)] <- 0
-  return(rawCounts)
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/loadTargetFile.R
--- a/SMART/DiffExpAnal/DESeqTools/loadTargetFile.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,17 +0,0 @@
-# loadTargetFile
-# loads file containing sample info
-
-# input : targetFile Name
-# output : target
-
-# created Feb 6th, 2012
-# Marie-Agnes Dillies
-
-
-loadTargetFile <- function(targetFile, header){
-if(header!=0){
-  return(read.table(targetFile, header=T, sep="\t"))
- }else if(header==0){
-  return(read.table(targetFile, sep="\t"))
- }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/majSequence.R
--- a/SMART/DiffExpAnal/DESeqTools/majSequence.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,26 +0,0 @@
-# majSequence
-# compute proportion of reads associated with most expressed sequence
-
-# input : counts, target, projectName
-# output : barplot, % associated with majority gene
-
-# created Feb 7th, 2012
-# modified Feb 20th, 2012
-# modified April 30th, 2012
-# Marie-Agnes Dillies
-
-
-majSequence <- function( counts, group, OUT_majSequenceName, out = T, position = "topright" ){
-
-  if (out) png( file=OUT_majSequenceName )
-
-  maj <- apply(counts, 2, function(x){x <- x[order(x, decreasing=T)]; x[1]*100/sum(x)})
-  seqname <- apply(counts, 2, function(x){x <- x[order(x, decreasing=T)]; names(x)[1]})
-
-  x <- barplot( maj, col=as.integer(group)+1, main = "Proportion of reads from most expressed gene", 
- ylim = c(0, max(maj)*1.2), cex.main=0.8 )
-  for (i in 1:length(seqname)) text( x[i], maj[i]/2, seqname[i], cex=0.8, srt=90, adj=0)
-  legend( position, as.character(unique(group)), lty=1, col=as.integer(unique(group))+1 )
-
-  if (out) dev.off()
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/pairwiseSERE.R
--- a/SMART/DiffExpAnal/DESeqTools/pairwiseSERE.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,41 +0,0 @@
-# pairwiseSERE
-# compute pairwise SERE statistics
-
-# input : counts
-# output : matrix of SERE values
-
-# created october 19th, 2012
-# Marie-Agnes Dillies
-
-
-pairwiseSERE <- function( counts ){
-  
-  sere <- matrix( NA, ncol=ncol(counts), nrow=ncol(counts) )
-  for (i in 1:ncol(counts)){
-    for (j in 1:ncol(counts)){
-      sere[i,j] <- sigfun_Pearson( counts[,c(i,j)] )
-    }
-  }
-  colnames(sere) <- rownames(sere) <- colnames(counts)
-  return( formatC(sere, format="f", digits=2) )
-}
-
-sigfun_Pearson <- function(observed) {
-  #calculate lambda and expected values
-  laneTotals<- colSums(observed);
-  total <- sum(laneTotals)
-  fullObserved <- observed[rowSums(observed)>0,];
-  fullLambda <- rowSums(fullObserved)/total;
-  fullLhat <- fullLambda > 0;
-  fullExpected<- outer(fullLambda, laneTotals);
-
-  #keep values
-  fullKeep <- which(fullExpected > 0);
-  
-  #calculate degrees of freedom (nrow*(ncol -1) >> number of parameters - calculated (just lamda is calculated >> thats why minus 1)
-  #calculate pearson and deviance for all values
-  oeFull <- (fullObserved[fullKeep] - fullExpected[fullKeep])^2/ fullExpected[fullKeep] # pearson chisq test
-  dfFull <- length(fullKeep) - sum(fullLhat!=0);
-  
-  return(c(sqrt(sum(oeFull)/dfFull)));
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/pairwiseScatterPlots.R
--- a/SMART/DiffExpAnal/DESeqTools/pairwiseScatterPlots.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,31 +0,0 @@
-# pairwiseScatterPlots
-# scatter plots for pairwise comparaisons of log counts
-
-# input : counts, target, outputName
-# output : scatter plots (pdf: allows multiple figures in one file)
-
-# created Feb 21th, 2012
-# modified Sept 27th, 2012 (pdf output file)
-# modified Oct 30th, 2012 (png)
-# Marie-Agnes Dillies
-
-
-pairwiseScatterPlots <- function( counts, target, OUT_scatterPlot, out = TRUE, pdffile = FALSE ){
-
-  if (out & !pdffile) png( OUT_scatterPlot )
-  if (pdffile) pdf( OUT_scatterPlot )
-  
-  conds <- unique(target$group)
-  # colnames(counts) <- target$label
-  
-  for (i in 1:(length(conds)-1)){
-   for (j in (i+1):length(conds)){
-   cond1 <- conds[i]; cond2 <- conds[j]
- pairs( log2(counts[, which(target$group %in% c(as.character(cond1), as.character(cond2)))]+1), 
- pch=".", cex=0.5, main = paste(cond1, cond2, sep=" vs ") )
-   }
-  }
-
-  if (pdffile) dev.off()
-  if (out) dev.off()
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/plotDispEstimates.R
--- a/SMART/DiffExpAnal/DESeqTools/plotDispEstimates.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-# plotDispEstimates
-# scatter plots representing dispersion estimates vs mean expression
-
-# input : cds, OUT_plotDispEstimatesName
-# output : scatterplot (png)
-
-plotDispEstimates <- function( cds, OUT_plotDispEstimatesName, out = TRUE ){
-
-  if (out) png( file=OUT_plotDispEstimatesName )
-  
-  plot(
-   rowMeans( counts(cds, normalized=T) ),
-   fitInfo(cds)$perGeneDispEsts,
-   pch=".", log="xy",
-   xlab = "Mean expression strength", ylab = "Dispersion estimate" )
-  
-  xg <- 10^seq(-.5, 5, length.out=300)
-  lines( xg, fitInfo(cds)$dispFun(xg), col="red" )
-
-  if (out) dev.off()
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/raw/f1cond1.tsv
--- a/SMART/DiffExpAnal/DESeqTools/raw/f1cond1.tsv Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b"@@ -1,18761 +0,0 @@\n-GliNS1\tG144\n-13CDNA73\t4\n-15E1.2\t75\n-182-FIP\t118\n-2'-PDE\t39\n-3'HEXO\t18\n-3.8-1\t0\n-384D8-2\t3\n-76P\t61\n-7h3\t4\n-8D6A\t1\n-A1BG\t1\n-A2BP1\t19\n-A2M\t2724\n-A4GALT\t0\n-A4GNT\t0\n-AAA1\t2\n-AAAS\t57\n-AACS\t1904\n-AADACL1\t3\n-AADAT\t18\n-AAK1\t2\n-AAMP\t215\n-AANAT\t0\n-AARS\t157\n-AARSD1\t27\n-AARSL\t21\n-AASDH\t15\n-AASDHPPT\t162\n-AASS\t159\n-AATF\t68\n-AATK\t3\n-ABAT\t493\n-ABC1\t7\n-ABCA1\t23\n-ABCA10\t1\n-ABCA11\t10\n-ABCA12\t3\n-ABCA13\t0\n-ABCA2\t38\n-ABCA3\t95\n-ABCA4\t0\n-ABCA5\t1\n-ABCA6\t0\n-ABCA7\t23\n-ABCA8\t98\n-ABCA9\t155\n-ABCB1\t0\n-ABCB10\t64\n-ABCB11\t0\n-ABCB4\t51\n-ABCB5\t2\n-ABCB6\t26\n-ABCB7\t34\n-ABCB8\t84\n-ABCB9\t12\n-ABCC1\t24\n-ABCC10\t17\n-ABCC11\t4\n-ABCC12\t0\n-ABCC13\t4\n-ABCC2\t1\n-ABCC3\t4\n-ABCC4\t17\n-ABCC5\t41\n-ABCC6\t27\n-ABCC8\t29\n-ABCC9\t4\n-ABCD1\t9\n-ABCD2\t2\n-ABCD3\t147\n-ABCD4\t44\n-ABCE1\t490\n-ABCF1\t68\n-ABCF2\t42\n-ABCF3\t35\n-ABCG1\t1\n-ABCG2\t48\n-ABCG4\t0\n-ABHD1\t0\n-ABHD10\t6\n-ABHD11\t1804\n-ABHD14A\t44\n-ABHD14B\t18\n-ABHD2\t85\n-ABHD3\t133\n-ABHD4\t114\n-ABHD5\t34\n-ABHD6\t62\n-ABHD7\t0\n-ABHD8\t49\n-ABI1\t275\n-ABI2\t195\n-ABI3\t0\n-ABI3BP\t3\n-ABL1\t112\n-ABL2\t25\n-ABLIM1\t70\n-ABLIM2\t0\n-ABLIM3\t1\n-ABO\t0\n-ABR\t199\n-ABRA\t0\n-ABT1\t51\n-ABTB1\t11\n-ABTB2\t19\n-ACAA1\t34\n-ACAA2\t144\n-ACACA\t47\n-ACACB\t2\n-ACAD10\t36\n-ACAD11\t109\n-ACAD8\t22\n-ACAD9\t116\n-ACADL\t1\n-ACADM\t69\n-ACADS\t37\n-ACADSB\t36\n-ACADVL\t200\n-ACAS2\t9\n-ACAS2L\t3\n-ACAT1\t111\n-ACAT2\t165\n-ACATE2\t12\n-ACBD3\t213\n-ACBD4\t4\n-ACBD5\t14\n-ACBD6\t130\n-ACBD7\t6\n-ACCN2\t20\n-ACCN3\t11\n-ACCN4\t1\n-ACD\t9\n-ACDC\t2\n-ACE\t0\n-ACF\t25\n-ACHE\t199\n-ACIN1\t44\n-ACLY\t313\n-ACMSD\t6\n-ACN9\t133\n-ACO1\t70\n-ACO2\t348\n-ACOT2\t19\n-ACOT4\t1\n-ACOT7\t192\n-ACOT8\t2\n-ACOT9\t15\n-ACOX1\t97\n-ACOX2\t2\n-ACOX3\t4\n-ACOXL\t0\n-ACP1\t239\n-ACP2\t42\n-ACP5\t0\n-ACP6\t241\n-ACPL2\t55\n-ACPP\t7\n-ACR\t0\n-ACRBP\t0\n-ACRC\t3\n-ACRV1\t0\n-ACSBG1\t7\n-ACSL1\t5\n-ACSL3\t440\n-ACSL4\t63\n-ACSL5\t2\n-ACSL6\t19\n-ACSM2\t0\n-ACSM3\t1\n-ACSS1\t91\n-ACSS2\t55\n-ACTA1\t74\n-ACTA2\t0\n-ACTB\t26071\n-ACTC\t1\n-ACTG1\t2667\n-ACTG2\t26\n-ACTL6A\t58\n-ACTL6B\t0\n-ACTL8\t0\n-ACTN1\t463\n-ACTN2\t12\n-ACTN4\t3028\n-ACTR10\t77\n-ACTR1A\t234\n-ACTR1B\t18\n-ACTR2\t3044\n-ACTR3\t319\n-ACTR3B\t2\n-ACTR5\t13\n-ACTR6\t162\n-ACTR8\t64\n-ACTRT1\t0\n-ACVR1\t7\n-ACVR1B\t17\n-ACVR1C\t0\n-ACVR2\t14\n-ACVR2A\t3\n-ACVR2B\t2\n-ACVRL1\t0\n-ACY1\t10\n-ACY1L2\t46\n-ACY3\t0\n-ACYP1\t59\n-ACYP2\t58\n-AD-003\t1\n-AD-020\t9\n-AD023\t0\n-AD031\t51\n-AD7C-NTP\t3\n-ADA\t1889\n-ADAL\t0\n-ADAM10\t251\n-ADAM11\t9\n-ADAM12\t932\n-ADAM15\t74\n-ADAM17\t101\n-ADAM18\t0\n-ADAM19\t118\n-ADAM20\t0\n-ADAM21\t6\n-ADAM22\t264\n-ADAM23\t66\n-ADAM28\t0\n-ADAM32\t0\n-ADAM33\t35\n-ADAM8\t0\n-ADAM9\t581\n-ADAMDEC1\t0\n-ADAMTS1\t61\n-ADAMTS10\t58\n-ADAMTS12\t22\n-ADAMTS13\t3\n-ADAMTS15\t19\n-ADAMTS16\t16\n-ADAMTS17\t1\n-ADAMTS18\t0\n-ADAMTS19\t0\n-ADAMTS2\t2\n-ADAMTS20\t0\n-ADAMTS3\t147\n-ADAMTS4\t23\n-ADAMTS5\t43\n-ADAMTS6\t31\n-ADAMTS7\t2\n-ADAMTS8\t0\n-ADAMTS9\t321\n-ADAMTSL1\t6\n-ADAMTSL2\t0\n-ADAMTSL3\t0\n-ADAMTSL4\t1\n-ADAR\t53\n-ADARB1\t7\n-ADARB2\t0\n-ADAT1\t38\n-ADC\t0\n-ADCK1\t28\n-ADCK2\t11\n-ADCK4\t106\n-ADCK5\t4\n-ADCY1\t186\n-ADCY2\t0\n-ADCY3\t14\n-ADCY5\t9\n-ADCY6\t182\n-ADCY7\t19\n-ADCY8\t0\n-ADCY9\t21\n-ADCYAP1\t0\n-ADCYAP1R1\t3\n-ADD1\t322\n-ADD2\t48\n-ADD3\t448\n-ADFP\t31\n-ADH1B\t0\n-ADH1C\t0\n-ADH4\t3\n-ADH5\t490\n-ADHFE1\t5\n-ADI1\t181\n-ADIPOR1\t102\n-ADIPOR2\t26\n-ADK\t135\n-ADM\t78\n-ADM2\t0\n-ADMP\t0\n-ADMR\t2386\n-ADNP\t253\n-ADORA1\t3\n-ADORA2A\t1\n-ADORA2B\t7\n-ADPGK\t2019\n-ADPN\t7\n-ADPRH\t2\n-ADPRHL1\t0\n-ADPRHL2\t31\n-ADRA1A\t0\n-ADRA1B\t0\n-ADRA1D\t1\n-ADRA2A\t39\n-ADRA2B\t0\n-ADRB1\t11\n-ADRB2\t0\n-ADRB3\t17\n-ADRBK1\t51\n-ADRBK2\t4\n-ADRM1\t189\n-ADSL\t96\n-ADSS\t165\n-ADSSL1\t0\n-AE2\t6\n-AEBP1\t1856\n-AEBP2\t144\n-AEGP\t3\n-AER61\t3\n-AES\t1381\n-AF15Q14\t1\n-AF1Q\t24\n-AF5Q31\t244\n-AFAP\t139\n-AFAR3\t6\n-AFF1\t19\n-AFF2\t0\n-AFF3\t7\n-AFF4\t1\n-AFG3L1\t5\n-AFG3L2\t107\n-AFMID\t92\n-AFP\t0\n-AFTIPHILIN\t81\n-AG1\t6\n-AGA\t19\n-AGBL2\t1\n-AGBL3\t2\n-AGC1\t8\n-AGER\t4\n-AGGF1\t52\n-AGL\t71\n-AGMAT\t5\n-AGPAT1\t83\n-AGPAT2\t2\n-AGPAT3\t20\n-AGPAT4\t51\n-AGPAT5\t260\n-AGPAT6\t51\n-AGPAT7\t25\n-AGPS\t96\n-AGR2\t21\n-AGRN\t345\n-AGRP\t0\n-AGT\t948\n-AGTPBP1\t28\n-AGTR1\t0\n-AGTR2\t0\n-AGTRAP\t204\n-AGXT2L1\t0\n-AHCTF1\t69\n-AHCY\t594\n-AHCYL1\t709\n-AHDC1\t8\n-AHI1\t55\n-AHNAK\t0\n-AHR\t458\n-AHSA1\t136\n-AHSA2\t38\n-AHSG\t0\n-AICDA\t1\n-AIFL\t0\n-AIG1\t63\n-AIM1\t2\n-AIM1L\t0\n-AIP\t87\n-AIP1\t92\n-AIPL1\t8\n-AK1\t37\n-AK2\t156\n-AK3\t197\n-AK3L1\t14\n-AK5\t9\n-AK7\t1\n-AKAP1\t42\n-AKAP10\t86\n-AKAP11\t78\n-AKAP12\t72\n-AKAP13\t26\n-AKAP14\t9\n-AKAP3\t3\n-AKAP6\t29\n-AKAP7\t57\n-AKAP8\t51\n-AKAP8L\t155\n-AKAP9\t56\n-AKIP\t1\n-AKNA\t8\n-AKR1A1\t126\n-AKR1B1\t305\n-AKR1B10"..b'59\n-ZNF278\t80\n-ZNF28\t31\n-ZNF281\t139\n-ZNF282\t64\n-ZNF283\t18\n-ZNF284\t64\n-ZNF285\t9\n-ZNF286\t488\n-ZNF287\t48\n-ZNF289\t51\n-ZNF291\t49\n-ZNF292\t112\n-ZNF294\t45\n-ZNF295\t52\n-ZNF297\t45\n-ZNF297B\t84\n-ZNF3\t39\n-ZNF30\t97\n-ZNF300\t6\n-ZNF302\t170\n-ZNF304\t45\n-ZNF305\t18\n-ZNF306\t2\n-ZNF307\t24\n-ZNF31\t10\n-ZNF311\t15\n-ZNF312\t0\n-ZNF313\t199\n-ZNF317\t51\n-ZNF318\t50\n-ZNF319\t22\n-ZNF32\t66\n-ZNF322A\t6\n-ZNF323\t6\n-ZNF324\t105\n-ZNF326\t43\n-ZNF329\t32\n-ZNF330\t33\n-ZNF331\t28\n-ZNF333\t41\n-ZNF334\t11\n-ZNF335\t6\n-ZNF336\t9\n-ZNF337\t33\n-ZNF33A\t11\n-ZNF34\t13\n-ZNF341\t1\n-ZNF342\t0\n-ZNF343\t16\n-ZNF345\t9\n-ZNF346\t47\n-ZNF347\t35\n-ZNF35\t12\n-ZNF350\t14\n-ZNF354A\t62\n-ZNF354B\t22\n-ZNF354C\t1336\n-ZNF358\t70\n-ZNF364\t40\n-ZNF365\t7\n-ZNF366\t0\n-ZNF367\t66\n-ZNF37A\t74\n-ZNF37B\t102\n-ZNF38\t53\n-ZNF382\t6\n-ZNF383\t7\n-ZNF384\t90\n-ZNF385\t1\n-ZNF390\t5\n-ZNF394\t643\n-ZNF395\t80\n-ZNF396\t2\n-ZNF397\t43\n-ZNF398\t29\n-ZNF403\t106\n-ZNF404\t17\n-ZNF406\t1\n-ZNF407\t9\n-ZNF408\t18\n-ZNF41\t57\n-ZNF410\t66\n-ZNF414\t3\n-ZNF415\t132\n-ZNF416\t49\n-ZNF417\t0\n-ZNF418\t10\n-ZNF419\t30\n-ZNF42\t84\n-ZNF420\t1\n-ZNF423\t38\n-ZNF425\t9\n-ZNF426\t12\n-ZNF429\t42\n-ZNF43\t55\n-ZNF430\t59\n-ZNF431\t11\n-ZNF432\t26\n-ZNF433\t22\n-ZNF434\t48\n-ZNF435\t1\n-ZNF436\t412\n-ZNF438\t67\n-ZNF439\t35\n-ZNF44\t71\n-ZNF440\t23\n-ZNF440L\t7\n-ZNF441\t15\n-ZNF442\t3\n-ZNF443\t52\n-ZNF444\t299\n-ZNF445\t15\n-ZNF446\t34\n-ZNF447\t56\n-ZNF449\t24\n-ZNF45\t187\n-ZNF451\t43\n-ZNF452\t0\n-ZNF454\t39\n-ZNF46\t7\n-ZNF462\t70\n-ZNF467\t10\n-ZNF468\t198\n-ZNF469\t3\n-ZNF471\t41\n-ZNF473\t18\n-ZNF479\t12\n-ZNF480\t117\n-ZNF482\t37\n-ZNF483\t3\n-ZNF484\t12\n-ZNF485\t4\n-ZNF486\t630\n-ZNF488\t10\n-ZNF490\t9\n-ZNF491\t3\n-ZNF492\t0\n-ZNF493\t27\n-ZNF496\t91\n-ZNF497\t4\n-ZNF498\t38\n-ZNF499\t15\n-ZNF500\t7\n-ZNF501\t11\n-ZNF502\t11\n-ZNF503\t111\n-ZNF505\t3\n-ZNF506\t19\n-ZNF507\t101\n-ZNF509\t6\n-ZNF510\t63\n-ZNF511\t336\n-ZNF512\t67\n-ZNF513\t41\n-ZNF514\t9\n-ZNF516\t104\n-ZNF517\t14\n-ZNF518\t50\n-ZNF519\t27\n-ZNF521\t49\n-ZNF524\t79\n-ZNF525\t12\n-ZNF526\t39\n-ZNF527\t8\n-ZNF528\t51\n-ZNF529\t53\n-ZNF530\t14\n-ZNF532\t459\n-ZNF533\t0\n-ZNF536\t25\n-ZNF537\t299\n-ZNF539\t118\n-ZNF540\t13\n-ZNF542\t40\n-ZNF543\t16\n-ZNF544\t33\n-ZNF545\t0\n-ZNF546\t6\n-ZNF547\t138\n-ZNF548\t44\n-ZNF549\t29\n-ZNF550\t26\n-ZNF551\t65\n-ZNF552\t14\n-ZNF553\t89\n-ZNF554\t8\n-ZNF555\t27\n-ZNF557\t13\n-ZNF558\t60\n-ZNF559\t87\n-ZNF560\t0\n-ZNF561\t131\n-ZNF562\t10\n-ZNF563\t19\n-ZNF564\t68\n-ZNF565\t14\n-ZNF566\t14\n-ZNF567\t17\n-ZNF568\t27\n-ZNF569\t59\n-ZNF570\t6\n-ZNF571\t7\n-ZNF572\t0\n-ZNF573\t4\n-ZNF574\t32\n-ZNF575\t6\n-ZNF576\t46\n-ZNF577\t50\n-ZNF578\t8\n-ZNF579\t47\n-ZNF580\t270\n-ZNF581\t158\n-ZNF582\t7\n-ZNF583\t65\n-ZNF584\t20\n-ZNF585A\t133\n-ZNF585B\t99\n-ZNF586\t12\n-ZNF587\t70\n-ZNF588\t40\n-ZNF589\t24\n-ZNF592\t14\n-ZNF593\t21\n-ZNF594\t6\n-ZNF595\t12\n-ZNF596\t227\n-ZNF597\t0\n-ZNF598\t26\n-ZNF599\t30\n-ZNF6\t43\n-ZNF600\t17\n-ZNF605\t14\n-ZNF606\t37\n-ZNF607\t72\n-ZNF608\t84\n-ZNF609\t22\n-ZNF610\t5\n-ZNF611\t186\n-ZNF613\t17\n-ZNF614\t25\n-ZNF615\t52\n-ZNF616\t35\n-ZNF618\t1\n-ZNF619\t1\n-ZNF620\t19\n-ZNF621\t26\n-ZNF622\t36\n-ZNF623\t45\n-ZNF624\t20\n-ZNF625\t0\n-ZNF626\t37\n-ZNF627\t31\n-ZNF629\t72\n-ZNF630\t0\n-ZNF638\t261\n-ZNF639\t50\n-ZNF641\t14\n-ZNF642\t0\n-ZNF643\t0\n-ZNF644\t65\n-ZNF646\t19\n-ZNF649\t119\n-ZNF650\t33\n-ZNF651\t174\n-ZNF652\t27\n-ZNF653\t6\n-ZNF654\t20\n-ZNF655\t103\n-ZNF658\t20\n-ZNF659\t5\n-ZNF66\t0\n-ZNF660\t8\n-ZNF663\t0\n-ZNF664\t120\n-ZNF665\t15\n-ZNF667\t191\n-ZNF668\t9\n-ZNF669\t51\n-ZNF670\t3\n-ZNF671\t51\n-ZNF672\t70\n-ZNF673\t7\n-ZNF677\t0\n-ZNF678\t7\n-ZNF680\t13\n-ZNF681\t1\n-ZNF682\t11\n-ZNF684\t6\n-ZNF687\t32\n-ZNF688\t9\n-ZNF689\t55\n-ZNF69\t7\n-ZNF690\t11\n-ZNF691\t28\n-ZNF692\t12\n-ZNF694\t11\n-ZNF695\t0\n-ZNF697\t26\n-ZNF7\t49\n-ZNF70\t1\n-ZNF700\t53\n-ZNF701\t6\n-ZNF702\t0\n-ZNF703\t3\n-ZNF704\t0\n-ZNF706\t353\n-ZNF707\t13\n-ZNF708\t35\n-ZNF71\t33\n-ZNF710\t36\n-ZNF713\t2\n-ZNF714\t61\n-ZNF718\t0\n-ZNF720\t66\n-ZNF721\t15\n-ZNF722\t0\n-ZNF74\t41\n-ZNF740\t19\n-ZNF75\t36\n-ZNF75A\t86\n-ZNF76\t125\n-ZNF77\t3\n-ZNF79\t1\n-ZNF8\t1\n-ZNF80\t1\n-ZNF81\t3\n-ZNF83\t572\n-ZNF84\t23\n-ZNF85\t66\n-ZNF9\t112\n-ZNF91\t163\n-ZNF92\t68\n-ZNF93\t72\n-ZNF96\t1\n-ZNFN1A2\t0\n-ZNFN1A3\t0\n-ZNFN1A4\t21\n-ZNFN1A5\t59\n-ZNFX1\t15\n-ZNHIT1\t18\n-ZNHIT2\t9\n-ZNHIT3\t58\n-ZNHIT4\t14\n-ZNRD1\t42\n-ZNRF1\t112\n-ZNRF2\t15\n-ZNRF3\t54\n-ZP3\t21\n-ZPBP\t0\n-ZPLD1\t0\n-ZRANB1\t53\n-ZRANB3\t7\n-ZRF1\t134\n-ZSCAN1\t73\n-ZSCAN2\t8\n-ZSCAN5\t17\n-ZSWIM1\t37\n-ZSWIM3\t3\n-ZSWIM4\t28\n-ZSWIM5\t7\n-ZSWIM6\t381\n-ZW10\t19\n-ZWILCH\t4\n-ZWINT\t157\n-ZXDA\t0\n-ZXDB\t16\n-ZXDC\t42\n-ZYG11B\t1581\n-ZYG11BL\t28\n-ZYX\t1233\n-ZZANK1\t48\n-ZZEF1\t26\n-ZZZ3\t77\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/raw/f1cond2.tsv
--- a/SMART/DiffExpAnal/DESeqTools/raw/f1cond2.tsv Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b"@@ -1,18761 +0,0 @@\n-GliNS1\tCB541\n-13CDNA73\t1\n-15E1.2\t458\n-182-FIP\t231\n-2'-PDE\t127\n-3'HEXO\t111\n-3.8-1\t0\n-384D8-2\t4\n-76P\t108\n-7h3\t0\n-8D6A\t0\n-A1BG\t0\n-A2BP1\t7\n-A2M\t49\n-A4GALT\t0\n-A4GNT\t0\n-AAA1\t0\n-AAAS\t49\n-AACS\t5365\n-AADACL1\t683\n-AADAT\t24\n-AAK1\t2\n-AAMP\t383\n-AANAT\t0\n-AARS\t183\n-AARSD1\t122\n-AARSL\t11\n-AASDH\t47\n-AASDHPPT\t543\n-AASS\t145\n-AATF\t36\n-AATK\t0\n-ABAT\t281\n-ABC1\t24\n-ABCA1\t27\n-ABCA10\t6\n-ABCA11\t73\n-ABCA12\t0\n-ABCA13\t30\n-ABCA2\t1\n-ABCA3\t166\n-ABCA4\t0\n-ABCA5\t51\n-ABCA6\t7\n-ABCA7\t6\n-ABCA8\t1\n-ABCA9\t317\n-ABCB1\t2\n-ABCB10\t169\n-ABCB11\t0\n-ABCB4\t0\n-ABCB5\t1\n-ABCB6\t69\n-ABCB7\t51\n-ABCB8\t28\n-ABCB9\t11\n-ABCC1\t127\n-ABCC10\t7\n-ABCC11\t27\n-ABCC12\t0\n-ABCC13\t3\n-ABCC2\t0\n-ABCC3\t301\n-ABCC4\t57\n-ABCC5\t38\n-ABCC6\t44\n-ABCC8\t0\n-ABCC9\t6\n-ABCD1\t13\n-ABCD2\t0\n-ABCD3\t86\n-ABCD4\t598\n-ABCE1\t790\n-ABCF1\t32\n-ABCF2\t36\n-ABCF3\t32\n-ABCG1\t0\n-ABCG2\t19\n-ABCG4\t0\n-ABHD1\t1\n-ABHD10\t16\n-ABHD11\t676\n-ABHD14A\t8\n-ABHD14B\t7\n-ABHD2\t35\n-ABHD3\t343\n-ABHD4\t210\n-ABHD5\t68\n-ABHD6\t394\n-ABHD7\t0\n-ABHD8\t0\n-ABI1\t257\n-ABI2\t247\n-ABI3\t0\n-ABI3BP\t145\n-ABL1\t94\n-ABL2\t45\n-ABLIM1\t13\n-ABLIM2\t8\n-ABLIM3\t15\n-ABO\t1\n-ABR\t1137\n-ABRA\t0\n-ABT1\t25\n-ABTB1\t10\n-ABTB2\t12\n-ACAA1\t104\n-ACAA2\t156\n-ACACA\t173\n-ACACB\t1\n-ACAD10\t1\n-ACAD11\t17\n-ACAD8\t56\n-ACAD9\t256\n-ACADL\t2\n-ACADM\t2\n-ACADS\t50\n-ACADSB\t82\n-ACADVL\t35\n-ACAS2\t44\n-ACAS2L\t8\n-ACAT1\t195\n-ACAT2\t25\n-ACATE2\t41\n-ACBD3\t692\n-ACBD4\t2\n-ACBD5\t5\n-ACBD6\t123\n-ACBD7\t9\n-ACCN2\t40\n-ACCN3\t28\n-ACCN4\t0\n-ACD\t117\n-ACDC\t17\n-ACE\t1\n-ACF\t45\n-ACHE\t37\n-ACIN1\t0\n-ACLY\t234\n-ACMSD\t0\n-ACN9\t399\n-ACO1\t124\n-ACO2\t1552\n-ACOT2\t28\n-ACOT4\t0\n-ACOT7\t208\n-ACOT8\t0\n-ACOT9\t16\n-ACOX1\t123\n-ACOX2\t66\n-ACOX3\t5\n-ACOXL\t1\n-ACP1\t942\n-ACP2\t25\n-ACP5\t0\n-ACP6\t361\n-ACPL2\t136\n-ACPP\t6\n-ACR\t0\n-ACRBP\t2\n-ACRC\t13\n-ACRV1\t0\n-ACSBG1\t2\n-ACSL1\t77\n-ACSL3\t1546\n-ACSL4\t126\n-ACSL5\t52\n-ACSL6\t1\n-ACSM2\t0\n-ACSM3\t0\n-ACSS1\t48\n-ACSS2\t18\n-ACTA1\t10\n-ACTA2\t182\n-ACTB\t11099\n-ACTC\t0\n-ACTG1\t1712\n-ACTG2\t29\n-ACTL6A\t78\n-ACTL6B\t0\n-ACTL8\t1\n-ACTN1\t7604\n-ACTN2\t0\n-ACTN4\t8247\n-ACTR10\t30\n-ACTR1A\t18\n-ACTR1B\t1\n-ACTR2\t3964\n-ACTR3\t830\n-ACTR3B\t1\n-ACTR5\t20\n-ACTR6\t21\n-ACTR8\t289\n-ACTRT1\t1\n-ACVR1\t31\n-ACVR1B\t6\n-ACVR1C\t0\n-ACVR2\t41\n-ACVR2A\t21\n-ACVR2B\t3\n-ACVRL1\t0\n-ACY1\t3\n-ACY1L2\t253\n-ACY3\t3\n-ACYP1\t155\n-ACYP2\t240\n-AD-003\t0\n-AD-020\t24\n-AD023\t30\n-AD031\t48\n-AD7C-NTP\t3\n-ADA\t5311\n-ADAL\t0\n-ADAM10\t403\n-ADAM11\t1\n-ADAM12\t144\n-ADAM15\t76\n-ADAM17\t560\n-ADAM18\t0\n-ADAM19\t142\n-ADAM20\t5\n-ADAM21\t0\n-ADAM22\t116\n-ADAM23\t61\n-ADAM28\t0\n-ADAM32\t0\n-ADAM33\t12\n-ADAM8\t7\n-ADAM9\t2589\n-ADAMDEC1\t0\n-ADAMTS1\t94\n-ADAMTS10\t2\n-ADAMTS12\t4\n-ADAMTS13\t32\n-ADAMTS15\t44\n-ADAMTS16\t10\n-ADAMTS17\t1\n-ADAMTS18\t3\n-ADAMTS19\t11\n-ADAMTS2\t0\n-ADAMTS20\t0\n-ADAMTS3\t0\n-ADAMTS4\t78\n-ADAMTS5\t9\n-ADAMTS6\t86\n-ADAMTS7\t0\n-ADAMTS8\t0\n-ADAMTS9\t223\n-ADAMTSL1\t37\n-ADAMTSL2\t0\n-ADAMTSL3\t0\n-ADAMTSL4\t19\n-ADAR\t96\n-ADARB1\t28\n-ADARB2\t1\n-ADAT1\t6\n-ADC\t12\n-ADCK1\t5\n-ADCK2\t23\n-ADCK4\t21\n-ADCK5\t2\n-ADCY1\t59\n-ADCY2\t0\n-ADCY3\t166\n-ADCY5\t0\n-ADCY6\t519\n-ADCY7\t98\n-ADCY8\t7\n-ADCY9\t43\n-ADCYAP1\t0\n-ADCYAP1R1\t1\n-ADD1\t808\n-ADD2\t339\n-ADD3\t400\n-ADFP\t77\n-ADH1B\t0\n-ADH1C\t0\n-ADH4\t0\n-ADH5\t612\n-ADHFE1\t26\n-ADI1\t204\n-ADIPOR1\t123\n-ADIPOR2\t40\n-ADK\t287\n-ADM\t215\n-ADM2\t0\n-ADMP\t10\n-ADMR\t8582\n-ADNP\t615\n-ADORA1\t132\n-ADORA2A\t30\n-ADORA2B\t42\n-ADPGK\t4998\n-ADPN\t4\n-ADPRH\t0\n-ADPRHL1\t0\n-ADPRHL2\t77\n-ADRA1A\t0\n-ADRA1B\t1\n-ADRA1D\t2\n-ADRA2A\t0\n-ADRA2B\t0\n-ADRB1\t12\n-ADRB2\t4\n-ADRB3\t10\n-ADRBK1\t11\n-ADRBK2\t18\n-ADRM1\t204\n-ADSL\t112\n-ADSS\t386\n-ADSSL1\t9\n-AE2\t30\n-AEBP1\t4799\n-AEBP2\t320\n-AEGP\t9\n-AER61\t11\n-AES\t292\n-AF15Q14\t0\n-AF1Q\t45\n-AF5Q31\t395\n-AFAP\t480\n-AFAR3\t11\n-AFF1\t132\n-AFF2\t0\n-AFF3\t70\n-AFF4\t6\n-AFG3L1\t19\n-AFG3L2\t88\n-AFMID\t232\n-AFP\t4\n-AFTIPHILIN\t309\n-AG1\t1\n-AGA\t159\n-AGBL2\t6\n-AGBL3\t0\n-AGC1\t0\n-AGER\t0\n-AGGF1\t130\n-AGL\t209\n-AGMAT\t1\n-AGPAT1\t12\n-AGPAT2\t0\n-AGPAT3\t54\n-AGPAT4\t45\n-AGPAT5\t1757\n-AGPAT6\t33\n-AGPAT7\t7\n-AGPS\t350\n-AGR2\t12\n-AGRN\t98\n-AGRP\t0\n-AGT\t79\n-AGTPBP1\t9\n-AGTR1\t0\n-AGTR2\t0\n-AGTRAP\t286\n-AGXT2L1\t0\n-AHCTF1\t33\n-AHCY\t463\n-AHCYL1\t745\n-AHDC1\t5\n-AHI1\t40\n-AHNAK\t7\n-AHR\t1326\n-AHSA1\t159\n-AHSA2\t185\n-AHSG\t2\n-AICDA\t0\n-AIFL\t0\n-AIG1\t289\n-AIM1\t58\n-AIM1L\t0\n-AIP\t453\n-AIP1\t142\n-AIPL1\t85\n-AK1\t138\n-AK2\t415\n-AK3\t106\n-AK3L1\t21\n-AK5\t36\n-AK7\t13\n-AKAP1\t18\n-AKAP10\t557\n-AKAP11\t3\n-AKAP12\t196\n-AKAP13\t40\n-AKAP14\t0\n-AKAP3\t9\n-AKAP6\t206\n-AKAP7\t225\n-AKAP8\t80\n-AKAP8L\t19\n-AKAP9\t36"..b'F281\t151\n-ZNF282\t100\n-ZNF283\t19\n-ZNF284\t62\n-ZNF285\t0\n-ZNF286\t562\n-ZNF287\t64\n-ZNF289\t16\n-ZNF291\t57\n-ZNF292\t245\n-ZNF294\t69\n-ZNF295\t105\n-ZNF297\t116\n-ZNF297B\t337\n-ZNF3\t27\n-ZNF30\t29\n-ZNF300\t0\n-ZNF302\t262\n-ZNF304\t29\n-ZNF305\t60\n-ZNF306\t0\n-ZNF307\t20\n-ZNF31\t13\n-ZNF311\t0\n-ZNF312\t0\n-ZNF313\t462\n-ZNF317\t176\n-ZNF318\t45\n-ZNF319\t95\n-ZNF32\t53\n-ZNF322A\t4\n-ZNF323\t20\n-ZNF324\t162\n-ZNF326\t143\n-ZNF329\t39\n-ZNF330\t105\n-ZNF331\t72\n-ZNF333\t49\n-ZNF334\t121\n-ZNF335\t9\n-ZNF336\t17\n-ZNF337\t63\n-ZNF33A\t1\n-ZNF34\t0\n-ZNF341\t0\n-ZNF342\t0\n-ZNF343\t44\n-ZNF345\t0\n-ZNF346\t39\n-ZNF347\t49\n-ZNF35\t8\n-ZNF350\t12\n-ZNF354A\t165\n-ZNF354B\t43\n-ZNF354C\t1139\n-ZNF358\t10\n-ZNF364\t45\n-ZNF365\t47\n-ZNF366\t0\n-ZNF367\t441\n-ZNF37A\t79\n-ZNF37B\t57\n-ZNF38\t78\n-ZNF382\t11\n-ZNF383\t0\n-ZNF384\t43\n-ZNF385\t1\n-ZNF390\t4\n-ZNF394\t598\n-ZNF395\t56\n-ZNF396\t11\n-ZNF397\t32\n-ZNF398\t19\n-ZNF403\t89\n-ZNF404\t0\n-ZNF406\t0\n-ZNF407\t10\n-ZNF408\t8\n-ZNF41\t192\n-ZNF410\t117\n-ZNF414\t4\n-ZNF415\t35\n-ZNF416\t2\n-ZNF417\t2\n-ZNF418\t20\n-ZNF419\t7\n-ZNF42\t27\n-ZNF420\t0\n-ZNF423\t82\n-ZNF425\t23\n-ZNF426\t27\n-ZNF429\t13\n-ZNF43\t59\n-ZNF430\t66\n-ZNF431\t4\n-ZNF432\t15\n-ZNF433\t19\n-ZNF434\t34\n-ZNF435\t0\n-ZNF436\t193\n-ZNF438\t95\n-ZNF439\t15\n-ZNF44\t96\n-ZNF440\t21\n-ZNF440L\t0\n-ZNF441\t45\n-ZNF442\t0\n-ZNF443\t63\n-ZNF444\t230\n-ZNF445\t9\n-ZNF446\t25\n-ZNF447\t775\n-ZNF449\t18\n-ZNF45\t99\n-ZNF451\t197\n-ZNF452\t0\n-ZNF454\t97\n-ZNF46\t24\n-ZNF462\t61\n-ZNF467\t0\n-ZNF468\t513\n-ZNF469\t0\n-ZNF471\t40\n-ZNF473\t8\n-ZNF479\t9\n-ZNF480\t108\n-ZNF482\t157\n-ZNF483\t0\n-ZNF484\t11\n-ZNF485\t13\n-ZNF486\t338\n-ZNF488\t0\n-ZNF490\t0\n-ZNF491\t0\n-ZNF492\t1\n-ZNF493\t21\n-ZNF496\t132\n-ZNF497\t0\n-ZNF498\t119\n-ZNF499\t2\n-ZNF500\t2\n-ZNF501\t21\n-ZNF502\t22\n-ZNF503\t0\n-ZNF505\t0\n-ZNF506\t23\n-ZNF507\t94\n-ZNF509\t76\n-ZNF510\t223\n-ZNF511\t275\n-ZNF512\t18\n-ZNF513\t110\n-ZNF514\t11\n-ZNF516\t38\n-ZNF517\t57\n-ZNF518\t98\n-ZNF519\t27\n-ZNF521\t8\n-ZNF524\t8\n-ZNF525\t37\n-ZNF526\t33\n-ZNF527\t0\n-ZNF528\t45\n-ZNF529\t4\n-ZNF530\t5\n-ZNF532\t1033\n-ZNF533\t2\n-ZNF536\t37\n-ZNF537\t238\n-ZNF539\t30\n-ZNF540\t15\n-ZNF542\t45\n-ZNF543\t5\n-ZNF544\t50\n-ZNF545\t148\n-ZNF546\t4\n-ZNF547\t225\n-ZNF548\t23\n-ZNF549\t4\n-ZNF550\t0\n-ZNF551\t50\n-ZNF552\t41\n-ZNF553\t186\n-ZNF554\t7\n-ZNF555\t49\n-ZNF557\t15\n-ZNF558\t5\n-ZNF559\t116\n-ZNF560\t13\n-ZNF561\t135\n-ZNF562\t0\n-ZNF563\t12\n-ZNF564\t79\n-ZNF565\t8\n-ZNF566\t0\n-ZNF567\t19\n-ZNF568\t39\n-ZNF569\t55\n-ZNF570\t6\n-ZNF571\t36\n-ZNF572\t1\n-ZNF573\t0\n-ZNF574\t2\n-ZNF575\t14\n-ZNF576\t36\n-ZNF577\t20\n-ZNF578\t18\n-ZNF579\t1\n-ZNF580\t131\n-ZNF581\t13\n-ZNF582\t0\n-ZNF583\t43\n-ZNF584\t42\n-ZNF585A\t147\n-ZNF585B\t23\n-ZNF586\t13\n-ZNF587\t15\n-ZNF588\t92\n-ZNF589\t9\n-ZNF592\t88\n-ZNF593\t165\n-ZNF594\t17\n-ZNF595\t9\n-ZNF596\t423\n-ZNF597\t4\n-ZNF598\t12\n-ZNF599\t17\n-ZNF6\t44\n-ZNF600\t14\n-ZNF605\t41\n-ZNF606\t26\n-ZNF607\t23\n-ZNF608\t130\n-ZNF609\t13\n-ZNF610\t16\n-ZNF611\t84\n-ZNF613\t13\n-ZNF614\t31\n-ZNF615\t20\n-ZNF616\t12\n-ZNF618\t0\n-ZNF619\t0\n-ZNF620\t33\n-ZNF621\t55\n-ZNF622\t100\n-ZNF623\t245\n-ZNF624\t19\n-ZNF625\t9\n-ZNF626\t66\n-ZNF627\t35\n-ZNF629\t117\n-ZNF630\t8\n-ZNF638\t1139\n-ZNF639\t454\n-ZNF641\t18\n-ZNF642\t41\n-ZNF643\t0\n-ZNF644\t210\n-ZNF646\t4\n-ZNF649\t60\n-ZNF650\t131\n-ZNF651\t145\n-ZNF652\t104\n-ZNF653\t10\n-ZNF654\t10\n-ZNF655\t483\n-ZNF658\t52\n-ZNF659\t11\n-ZNF66\t0\n-ZNF660\t51\n-ZNF663\t0\n-ZNF664\t352\n-ZNF665\t7\n-ZNF667\t37\n-ZNF668\t3\n-ZNF669\t12\n-ZNF670\t46\n-ZNF671\t97\n-ZNF672\t135\n-ZNF673\t50\n-ZNF677\t0\n-ZNF678\t19\n-ZNF680\t18\n-ZNF681\t0\n-ZNF682\t8\n-ZNF684\t52\n-ZNF687\t28\n-ZNF688\t36\n-ZNF689\t6\n-ZNF69\t6\n-ZNF690\t9\n-ZNF691\t54\n-ZNF692\t0\n-ZNF694\t10\n-ZNF695\t39\n-ZNF697\t11\n-ZNF7\t163\n-ZNF70\t0\n-ZNF700\t126\n-ZNF701\t7\n-ZNF702\t14\n-ZNF703\t0\n-ZNF704\t0\n-ZNF706\t1130\n-ZNF707\t2\n-ZNF708\t70\n-ZNF71\t1\n-ZNF710\t20\n-ZNF713\t0\n-ZNF714\t0\n-ZNF718\t24\n-ZNF720\t92\n-ZNF721\t88\n-ZNF722\t5\n-ZNF74\t23\n-ZNF740\t28\n-ZNF75\t191\n-ZNF75A\t381\n-ZNF76\t661\n-ZNF77\t14\n-ZNF79\t0\n-ZNF8\t0\n-ZNF80\t0\n-ZNF81\t16\n-ZNF83\t553\n-ZNF84\t72\n-ZNF85\t147\n-ZNF9\t326\n-ZNF91\t79\n-ZNF92\t618\n-ZNF93\t90\n-ZNF96\t0\n-ZNFN1A2\t0\n-ZNFN1A3\t0\n-ZNFN1A4\t32\n-ZNFN1A5\t91\n-ZNFX1\t64\n-ZNHIT1\t0\n-ZNHIT2\t18\n-ZNHIT3\t276\n-ZNHIT4\t31\n-ZNRD1\t58\n-ZNRF1\t278\n-ZNRF2\t121\n-ZNRF3\t117\n-ZP3\t144\n-ZPBP\t0\n-ZPLD1\t0\n-ZRANB1\t31\n-ZRANB3\t34\n-ZRF1\t313\n-ZSCAN1\t118\n-ZSCAN2\t5\n-ZSCAN5\t0\n-ZSWIM1\t62\n-ZSWIM3\t0\n-ZSWIM4\t26\n-ZSWIM5\t9\n-ZSWIM6\t752\n-ZW10\t14\n-ZWILCH\t22\n-ZWINT\t222\n-ZXDA\t0\n-ZXDB\t20\n-ZXDC\t44\n-ZYG11B\t4403\n-ZYG11BL\t21\n-ZYX\t1378\n-ZZANK1\t42\n-ZZEF1\t25\n-ZZZ3\t430\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/raw/f2cond1.tsv
--- a/SMART/DiffExpAnal/DESeqTools/raw/f2cond1.tsv Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b"@@ -1,18761 +0,0 @@\n-GliNS1\tG166\n-13CDNA73\t0\n-15E1.2\t74\n-182-FIP\t127\n-2'-PDE\t38\n-3'HEXO\t20\n-3.8-1\t0\n-384D8-2\t3\n-76P\t51\n-7h3\t0\n-8D6A\t1\n-A1BG\t0\n-A2BP1\t18\n-A2M\t2209\n-A4GALT\t0\n-A4GNT\t0\n-AAA1\t0\n-AAAS\t29\n-AACS\t1294\n-AADACL1\t13\n-AADAT\t5\n-AAK1\t0\n-AAMP\t170\n-AANAT\t0\n-AARS\t105\n-AARSD1\t48\n-AARSL\t10\n-AASDH\t5\n-AASDHPPT\t128\n-AASS\t101\n-AATF\t40\n-AATK\t0\n-ABAT\t838\n-ABC1\t13\n-ABCA1\t13\n-ABCA10\t0\n-ABCA11\t1\n-ABCA12\t0\n-ABCA13\t0\n-ABCA2\t5\n-ABCA3\t91\n-ABCA4\t0\n-ABCA5\t2\n-ABCA6\t1\n-ABCA7\t7\n-ABCA8\t76\n-ABCA9\t133\n-ABCB1\t0\n-ABCB10\t41\n-ABCB11\t0\n-ABCB4\t28\n-ABCB5\t0\n-ABCB6\t17\n-ABCB7\t47\n-ABCB8\t53\n-ABCB9\t6\n-ABCC1\t31\n-ABCC10\t16\n-ABCC11\t0\n-ABCC12\t0\n-ABCC13\t5\n-ABCC2\t0\n-ABCC3\t3\n-ABCC4\t24\n-ABCC5\t15\n-ABCC6\t8\n-ABCC8\t13\n-ABCC9\t2\n-ABCD1\t19\n-ABCD2\t0\n-ABCD3\t147\n-ABCD4\t71\n-ABCE1\t378\n-ABCF1\t39\n-ABCF2\t42\n-ABCF3\t29\n-ABCG1\t5\n-ABCG2\t29\n-ABCG4\t0\n-ABHD1\t1\n-ABHD10\t14\n-ABHD11\t1182\n-ABHD14A\t37\n-ABHD14B\t10\n-ABHD2\t79\n-ABHD3\t95\n-ABHD4\t82\n-ABHD5\t31\n-ABHD6\t204\n-ABHD7\t0\n-ABHD8\t6\n-ABI1\t199\n-ABI2\t200\n-ABI3\t0\n-ABI3BP\t4\n-ABL1\t67\n-ABL2\t14\n-ABLIM1\t50\n-ABLIM2\t1\n-ABLIM3\t0\n-ABO\t1\n-ABR\t158\n-ABRA\t0\n-ABT1\t31\n-ABTB1\t12\n-ABTB2\t9\n-ACAA1\t52\n-ACAA2\t109\n-ACACA\t67\n-ACACB\t2\n-ACAD10\t19\n-ACAD11\t70\n-ACAD8\t43\n-ACAD9\t53\n-ACADL\t0\n-ACADM\t67\n-ACADS\t21\n-ACADSB\t51\n-ACADVL\t86\n-ACAS2\t12\n-ACAS2L\t2\n-ACAT1\t141\n-ACAT2\t153\n-ACATE2\t19\n-ACBD3\t169\n-ACBD4\t0\n-ACBD5\t21\n-ACBD6\t167\n-ACBD7\t1\n-ACCN2\t14\n-ACCN3\t6\n-ACCN4\t0\n-ACD\t3\n-ACDC\t0\n-ACE\t0\n-ACF\t12\n-ACHE\t75\n-ACIN1\t0\n-ACLY\t431\n-ACMSD\t9\n-ACN9\t128\n-ACO1\t42\n-ACO2\t309\n-ACOT2\t9\n-ACOT4\t0\n-ACOT7\t128\n-ACOT8\t0\n-ACOT9\t11\n-ACOX1\t71\n-ACOX2\t4\n-ACOX3\t4\n-ACOXL\t0\n-ACP1\t303\n-ACP2\t26\n-ACP5\t1\n-ACP6\t125\n-ACPL2\t71\n-ACPP\t5\n-ACR\t0\n-ACRBP\t0\n-ACRC\t10\n-ACRV1\t2\n-ACSBG1\t4\n-ACSL1\t3\n-ACSL3\t521\n-ACSL4\t68\n-ACSL5\t2\n-ACSL6\t46\n-ACSM2\t4\n-ACSM3\t0\n-ACSS1\t26\n-ACSS2\t30\n-ACTA1\t28\n-ACTA2\t0\n-ACTB\t16535\n-ACTC\t0\n-ACTG1\t1349\n-ACTG2\t23\n-ACTL6A\t38\n-ACTL6B\t2\n-ACTL8\t0\n-ACTN1\t669\n-ACTN2\t30\n-ACTN4\t4178\n-ACTR10\t61\n-ACTR1A\t55\n-ACTR1B\t3\n-ACTR2\t4056\n-ACTR3\t264\n-ACTR3B\t1\n-ACTR5\t16\n-ACTR6\t218\n-ACTR8\t78\n-ACTRT1\t0\n-ACVR1\t3\n-ACVR1B\t11\n-ACVR1C\t0\n-ACVR2\t21\n-ACVR2A\t4\n-ACVR2B\t0\n-ACVRL1\t2\n-ACY1\t21\n-ACY1L2\t58\n-ACY3\t0\n-ACYP1\t61\n-ACYP2\t81\n-AD-003\t2\n-AD-020\t4\n-AD023\t0\n-AD031\t36\n-AD7C-NTP\t0\n-ADA\t1274\n-ADAL\t1\n-ADAM10\t188\n-ADAM11\t7\n-ADAM12\t333\n-ADAM15\t40\n-ADAM17\t69\n-ADAM18\t0\n-ADAM19\t100\n-ADAM20\t0\n-ADAM21\t4\n-ADAM22\t238\n-ADAM23\t48\n-ADAM28\t0\n-ADAM32\t0\n-ADAM33\t11\n-ADAM8\t1\n-ADAM9\t411\n-ADAMDEC1\t0\n-ADAMTS1\t192\n-ADAMTS10\t28\n-ADAMTS12\t9\n-ADAMTS13\t2\n-ADAMTS15\t4\n-ADAMTS16\t10\n-ADAMTS17\t0\n-ADAMTS18\t0\n-ADAMTS19\t2\n-ADAMTS2\t10\n-ADAMTS20\t0\n-ADAMTS3\t122\n-ADAMTS4\t21\n-ADAMTS5\t25\n-ADAMTS6\t36\n-ADAMTS7\t0\n-ADAMTS8\t0\n-ADAMTS9\t206\n-ADAMTSL1\t2\n-ADAMTSL2\t0\n-ADAMTSL3\t0\n-ADAMTSL4\t2\n-ADAR\t42\n-ADARB1\t5\n-ADARB2\t0\n-ADAT1\t15\n-ADC\t0\n-ADCK1\t10\n-ADCK2\t2\n-ADCK4\t87\n-ADCK5\t5\n-ADCY1\t158\n-ADCY2\t0\n-ADCY3\t15\n-ADCY5\t7\n-ADCY6\t78\n-ADCY7\t9\n-ADCY8\t0\n-ADCY9\t11\n-ADCYAP1\t0\n-ADCYAP1R1\t4\n-ADD1\t196\n-ADD2\t52\n-ADD3\t581\n-ADFP\t24\n-ADH1B\t0\n-ADH1C\t0\n-ADH4\t0\n-ADH5\t389\n-ADHFE1\t5\n-ADI1\t170\n-ADIPOR1\t71\n-ADIPOR2\t12\n-ADK\t153\n-ADM\t40\n-ADM2\t0\n-ADMP\t0\n-ADMR\t2037\n-ADNP\t246\n-ADORA1\t11\n-ADORA2A\t0\n-ADORA2B\t1\n-ADPGK\t1928\n-ADPN\t2\n-ADPRH\t3\n-ADPRHL1\t4\n-ADPRHL2\t16\n-ADRA1A\t0\n-ADRA1B\t0\n-ADRA1D\t0\n-ADRA2A\t66\n-ADRA2B\t0\n-ADRB1\t6\n-ADRB2\t2\n-ADRB3\t17\n-ADRBK1\t9\n-ADRBK2\t5\n-ADRM1\t150\n-ADSL\t99\n-ADSS\t166\n-ADSSL1\t2\n-AE2\t15\n-AEBP1\t1063\n-AEBP2\t109\n-AEGP\t0\n-AER61\t14\n-AES\t864\n-AF15Q14\t0\n-AF1Q\t26\n-AF5Q31\t106\n-AFAP\t104\n-AFAR3\t6\n-AFF1\t15\n-AFF2\t1\n-AFF3\t5\n-AFF4\t1\n-AFG3L1\t7\n-AFG3L2\t74\n-AFMID\t112\n-AFP\t0\n-AFTIPHILIN\t66\n-AG1\t2\n-AGA\t17\n-AGBL2\t0\n-AGBL3\t0\n-AGC1\t2\n-AGER\t3\n-AGGF1\t35\n-AGL\t13\n-AGMAT\t5\n-AGPAT1\t31\n-AGPAT2\t1\n-AGPAT3\t14\n-AGPAT4\t41\n-AGPAT5\t280\n-AGPAT6\t24\n-AGPAT7\t11\n-AGPS\t55\n-AGR2\t7\n-AGRN\t298\n-AGRP\t0\n-AGT\t860\n-AGTPBP1\t29\n-AGTR1\t0\n-AGTR2\t0\n-AGTRAP\t152\n-AGXT2L1\t0\n-AHCTF1\t53\n-AHCY\t395\n-AHCYL1\t653\n-AHDC1\t2\n-AHI1\t69\n-AHNAK\t0\n-AHR\t252\n-AHSA1\t107\n-AHSA2\t33\n-AHSG\t0\n-AICDA\t0\n-AIFL\t0\n-AIG1\t65\n-AIM1\t0\n-AIM1L\t0\n-AIP\t115\n-AIP1\t74\n-AIPL1\t15\n-AK1\t45\n-AK2\t110\n-AK3\t101\n-AK3L1\t22\n-AK5\t4\n-AK7\t1\n-AKAP1\t39\n-AKAP10\t81\n-AKAP11\t90\n-AKAP12\t103\n-AKAP13\t45\n-AKAP14\t1\n-AKAP3\t4\n-AKAP6\t34\n-AKAP7\t59\n-AKAP8\t54\n-AKAP8L\t20\n-AKAP9\t39\n-AKIP\t0\n-AKNA\t2\n-AKR1A1\t183\n-AKR1B1\t370\n-AKR1B10\t0\n-AKR1C1\t1\n-AKR1C"..b'NF274\t20\n-ZNF275\t30\n-ZNF276\t16\n-ZNF277\t442\n-ZNF278\t43\n-ZNF28\t26\n-ZNF281\t94\n-ZNF282\t51\n-ZNF283\t23\n-ZNF284\t103\n-ZNF285\t14\n-ZNF286\t306\n-ZNF287\t35\n-ZNF289\t21\n-ZNF291\t22\n-ZNF292\t84\n-ZNF294\t37\n-ZNF295\t76\n-ZNF297\t38\n-ZNF297B\t90\n-ZNF3\t21\n-ZNF30\t128\n-ZNF300\t7\n-ZNF302\t194\n-ZNF304\t33\n-ZNF305\t27\n-ZNF306\t1\n-ZNF307\t4\n-ZNF31\t7\n-ZNF311\t24\n-ZNF312\t0\n-ZNF313\t142\n-ZNF317\t47\n-ZNF318\t59\n-ZNF319\t29\n-ZNF32\t41\n-ZNF322A\t0\n-ZNF323\t1\n-ZNF324\t112\n-ZNF326\t59\n-ZNF329\t17\n-ZNF330\t41\n-ZNF331\t27\n-ZNF333\t32\n-ZNF334\t10\n-ZNF335\t4\n-ZNF336\t3\n-ZNF337\t27\n-ZNF33A\t6\n-ZNF34\t0\n-ZNF341\t0\n-ZNF342\t1\n-ZNF343\t17\n-ZNF345\t10\n-ZNF346\t34\n-ZNF347\t14\n-ZNF35\t4\n-ZNF350\t39\n-ZNF354A\t31\n-ZNF354B\t13\n-ZNF354C\t964\n-ZNF358\t15\n-ZNF364\t29\n-ZNF365\t5\n-ZNF366\t0\n-ZNF367\t66\n-ZNF37A\t59\n-ZNF37B\t52\n-ZNF38\t37\n-ZNF382\t10\n-ZNF383\t8\n-ZNF384\t66\n-ZNF385\t2\n-ZNF390\t2\n-ZNF394\t349\n-ZNF395\t24\n-ZNF396\t0\n-ZNF397\t36\n-ZNF398\t26\n-ZNF403\t128\n-ZNF404\t5\n-ZNF406\t3\n-ZNF407\t12\n-ZNF408\t9\n-ZNF41\t72\n-ZNF410\t54\n-ZNF414\t0\n-ZNF415\t111\n-ZNF416\t37\n-ZNF417\t1\n-ZNF418\t10\n-ZNF419\t29\n-ZNF42\t73\n-ZNF420\t4\n-ZNF423\t16\n-ZNF425\t4\n-ZNF426\t17\n-ZNF429\t30\n-ZNF43\t66\n-ZNF430\t86\n-ZNF431\t1\n-ZNF432\t37\n-ZNF433\t19\n-ZNF434\t43\n-ZNF435\t2\n-ZNF436\t279\n-ZNF438\t102\n-ZNF439\t10\n-ZNF44\t103\n-ZNF440\t16\n-ZNF440L\t6\n-ZNF441\t3\n-ZNF442\t2\n-ZNF443\t50\n-ZNF444\t392\n-ZNF445\t11\n-ZNF446\t5\n-ZNF447\t52\n-ZNF449\t36\n-ZNF45\t171\n-ZNF451\t53\n-ZNF452\t0\n-ZNF454\t53\n-ZNF46\t3\n-ZNF462\t114\n-ZNF467\t6\n-ZNF468\t171\n-ZNF469\t0\n-ZNF471\t32\n-ZNF473\t22\n-ZNF479\t8\n-ZNF480\t121\n-ZNF482\t27\n-ZNF483\t2\n-ZNF484\t7\n-ZNF485\t3\n-ZNF486\t410\n-ZNF488\t5\n-ZNF490\t2\n-ZNF491\t0\n-ZNF492\t2\n-ZNF493\t29\n-ZNF496\t51\n-ZNF497\t0\n-ZNF498\t35\n-ZNF499\t9\n-ZNF500\t3\n-ZNF501\t7\n-ZNF502\t3\n-ZNF503\t32\n-ZNF505\t7\n-ZNF506\t21\n-ZNF507\t70\n-ZNF509\t5\n-ZNF510\t54\n-ZNF511\t403\n-ZNF512\t58\n-ZNF513\t77\n-ZNF514\t7\n-ZNF516\t76\n-ZNF517\t21\n-ZNF518\t72\n-ZNF519\t17\n-ZNF521\t69\n-ZNF524\t8\n-ZNF525\t9\n-ZNF526\t14\n-ZNF527\t3\n-ZNF528\t28\n-ZNF529\t51\n-ZNF530\t7\n-ZNF532\t340\n-ZNF533\t1\n-ZNF536\t48\n-ZNF537\t179\n-ZNF539\t80\n-ZNF540\t2\n-ZNF542\t31\n-ZNF543\t19\n-ZNF544\t10\n-ZNF545\t0\n-ZNF546\t6\n-ZNF547\t212\n-ZNF548\t19\n-ZNF549\t11\n-ZNF550\t20\n-ZNF551\t18\n-ZNF552\t16\n-ZNF553\t77\n-ZNF554\t11\n-ZNF555\t23\n-ZNF557\t1\n-ZNF558\t25\n-ZNF559\t59\n-ZNF560\t0\n-ZNF561\t102\n-ZNF562\t11\n-ZNF563\t23\n-ZNF564\t76\n-ZNF565\t2\n-ZNF566\t10\n-ZNF567\t7\n-ZNF568\t14\n-ZNF569\t61\n-ZNF570\t2\n-ZNF571\t7\n-ZNF572\t0\n-ZNF573\t7\n-ZNF574\t25\n-ZNF575\t1\n-ZNF576\t41\n-ZNF577\t36\n-ZNF578\t9\n-ZNF579\t36\n-ZNF580\t343\n-ZNF581\t76\n-ZNF582\t6\n-ZNF583\t41\n-ZNF584\t22\n-ZNF585A\t93\n-ZNF585B\t64\n-ZNF586\t12\n-ZNF587\t26\n-ZNF588\t26\n-ZNF589\t8\n-ZNF592\t19\n-ZNF593\t33\n-ZNF594\t3\n-ZNF595\t7\n-ZNF596\t329\n-ZNF597\t0\n-ZNF598\t43\n-ZNF599\t8\n-ZNF6\t40\n-ZNF600\t15\n-ZNF605\t12\n-ZNF606\t63\n-ZNF607\t83\n-ZNF608\t59\n-ZNF609\t12\n-ZNF610\t8\n-ZNF611\t122\n-ZNF613\t25\n-ZNF614\t20\n-ZNF615\t51\n-ZNF616\t23\n-ZNF618\t3\n-ZNF619\t0\n-ZNF620\t12\n-ZNF621\t11\n-ZNF622\t24\n-ZNF623\t43\n-ZNF624\t34\n-ZNF625\t0\n-ZNF626\t35\n-ZNF627\t29\n-ZNF629\t65\n-ZNF630\t0\n-ZNF638\t382\n-ZNF639\t74\n-ZNF641\t5\n-ZNF642\t2\n-ZNF643\t0\n-ZNF644\t44\n-ZNF646\t1\n-ZNF649\t67\n-ZNF650\t38\n-ZNF651\t115\n-ZNF652\t17\n-ZNF653\t18\n-ZNF654\t10\n-ZNF655\t99\n-ZNF658\t10\n-ZNF659\t5\n-ZNF66\t0\n-ZNF660\t12\n-ZNF663\t2\n-ZNF664\t82\n-ZNF665\t4\n-ZNF667\t231\n-ZNF668\t1\n-ZNF669\t22\n-ZNF670\t2\n-ZNF671\t57\n-ZNF672\t62\n-ZNF673\t1\n-ZNF677\t2\n-ZNF678\t7\n-ZNF680\t8\n-ZNF681\t0\n-ZNF682\t5\n-ZNF684\t12\n-ZNF687\t10\n-ZNF688\t2\n-ZNF689\t53\n-ZNF69\t6\n-ZNF690\t11\n-ZNF691\t25\n-ZNF692\t5\n-ZNF694\t15\n-ZNF695\t0\n-ZNF697\t25\n-ZNF7\t27\n-ZNF70\t0\n-ZNF700\t31\n-ZNF701\t12\n-ZNF702\t0\n-ZNF703\t0\n-ZNF704\t2\n-ZNF706\t359\n-ZNF707\t17\n-ZNF708\t26\n-ZNF71\t29\n-ZNF710\t37\n-ZNF713\t0\n-ZNF714\t29\n-ZNF718\t0\n-ZNF720\t54\n-ZNF721\t15\n-ZNF722\t0\n-ZNF74\t28\n-ZNF740\t5\n-ZNF75\t68\n-ZNF75A\t84\n-ZNF76\t110\n-ZNF77\t3\n-ZNF79\t1\n-ZNF8\t2\n-ZNF80\t0\n-ZNF81\t2\n-ZNF83\t698\n-ZNF84\t34\n-ZNF85\t80\n-ZNF9\t85\n-ZNF91\t128\n-ZNF92\t13\n-ZNF93\t51\n-ZNF96\t0\n-ZNFN1A2\t0\n-ZNFN1A3\t0\n-ZNFN1A4\t5\n-ZNFN1A5\t43\n-ZNFX1\t9\n-ZNHIT1\t5\n-ZNHIT2\t4\n-ZNHIT3\t95\n-ZNHIT4\t4\n-ZNRD1\t17\n-ZNRF1\t77\n-ZNRF2\t18\n-ZNRF3\t72\n-ZP3\t39\n-ZPBP\t0\n-ZPLD1\t0\n-ZRANB1\t54\n-ZRANB3\t9\n-ZRF1\t99\n-ZSCAN1\t99\n-ZSCAN2\t12\n-ZSCAN5\t10\n-ZSWIM1\t37\n-ZSWIM3\t0\n-ZSWIM4\t14\n-ZSWIM5\t13\n-ZSWIM6\t329\n-ZW10\t6\n-ZWILCH\t6\n-ZWINT\t152\n-ZXDA\t1\n-ZXDB\t12\n-ZXDC\t35\n-ZYG11B\t1785\n-ZYG11BL\t11\n-ZYX\t698\n-ZZANK1\t15\n-ZZEF1\t15\n-ZZZ3\t94\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/raw/f2cond2.tsv
--- a/SMART/DiffExpAnal/DESeqTools/raw/f2cond2.tsv Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b"@@ -1,18761 +0,0 @@\n-GliNS1\tCB660\n-13CDNA73\t0\n-15E1.2\t215\n-182-FIP\t334\n-2'-PDE\t34\n-3'HEXO\t121\n-3.8-1\t0\n-384D8-2\t3\n-76P\t358\n-7h3\t9\n-8D6A\t0\n-A1BG\t0\n-A2BP1\t1\n-A2M\t193\n-A4GALT\t0\n-A4GNT\t0\n-AAA1\t0\n-AAAS\t202\n-AACS\t3737\n-AADACL1\t158\n-AADAT\t22\n-AAK1\t0\n-AAMP\t512\n-AANAT\t0\n-AARS\t111\n-AARSD1\t112\n-AARSL\t55\n-AASDH\t18\n-AASDHPPT\t888\n-AASS\t130\n-AATF\t100\n-AATK\t0\n-ABAT\t8467\n-ABC1\t34\n-ABCA1\t11\n-ABCA10\t6\n-ABCA11\t10\n-ABCA12\t14\n-ABCA13\t180\n-ABCA2\t20\n-ABCA3\t250\n-ABCA4\t0\n-ABCA5\t181\n-ABCA6\t30\n-ABCA7\t1\n-ABCA8\t9\n-ABCA9\t302\n-ABCB1\t1\n-ABCB10\t71\n-ABCB11\t1\n-ABCB4\t7\n-ABCB5\t1\n-ABCB6\t26\n-ABCB7\t34\n-ABCB8\t82\n-ABCB9\t36\n-ABCC1\t21\n-ABCC10\t19\n-ABCC11\t11\n-ABCC12\t1\n-ABCC13\t2\n-ABCC2\t46\n-ABCC3\t57\n-ABCC4\t73\n-ABCC5\t28\n-ABCC6\t7\n-ABCC8\t0\n-ABCC9\t25\n-ABCD1\t13\n-ABCD2\t13\n-ABCD3\t261\n-ABCD4\t208\n-ABCE1\t567\n-ABCF1\t58\n-ABCF2\t90\n-ABCF3\t21\n-ABCG1\t3\n-ABCG2\t10\n-ABCG4\t0\n-ABHD1\t3\n-ABHD10\t6\n-ABHD11\t1161\n-ABHD14A\t219\n-ABHD14B\t16\n-ABHD2\t149\n-ABHD3\t406\n-ABHD4\t253\n-ABHD5\t90\n-ABHD6\t315\n-ABHD7\t49\n-ABHD8\t32\n-ABI1\t510\n-ABI2\t413\n-ABI3\t0\n-ABI3BP\t14\n-ABL1\t223\n-ABL2\t36\n-ABLIM1\t197\n-ABLIM2\t25\n-ABLIM3\t0\n-ABO\t0\n-ABR\t538\n-ABRA\t0\n-ABT1\t48\n-ABTB1\t11\n-ABTB2\t94\n-ACAA1\t120\n-ACAA2\t134\n-ACACA\t183\n-ACACB\t9\n-ACAD10\t33\n-ACAD11\t61\n-ACAD8\t33\n-ACAD9\t228\n-ACADL\t6\n-ACADM\t55\n-ACADS\t63\n-ACADSB\t107\n-ACADVL\t179\n-ACAS2\t8\n-ACAS2L\t1\n-ACAT1\t288\n-ACAT2\t88\n-ACATE2\t74\n-ACBD3\t866\n-ACBD4\t1\n-ACBD5\t38\n-ACBD6\t112\n-ACBD7\t22\n-ACCN2\t10\n-ACCN3\t3\n-ACCN4\t0\n-ACD\t28\n-ACDC\t10\n-ACE\t0\n-ACF\t54\n-ACHE\t15\n-ACIN1\t3\n-ACLY\t497\n-ACMSD\t7\n-ACN9\t114\n-ACO1\t134\n-ACO2\t770\n-ACOT2\t0\n-ACOT4\t0\n-ACOT7\t266\n-ACOT8\t8\n-ACOT9\t19\n-ACOX1\t277\n-ACOX2\t300\n-ACOX3\t8\n-ACOXL\t0\n-ACP1\t1384\n-ACP2\t80\n-ACP5\t0\n-ACP6\t172\n-ACPL2\t101\n-ACPP\t4\n-ACR\t13\n-ACRBP\t0\n-ACRC\t14\n-ACRV1\t0\n-ACSBG1\t52\n-ACSL1\t14\n-ACSL3\t2158\n-ACSL4\t66\n-ACSL5\t21\n-ACSL6\t0\n-ACSM2\t0\n-ACSM3\t10\n-ACSS1\t15\n-ACSS2\t83\n-ACTA1\t65\n-ACTA2\t824\n-ACTB\t25909\n-ACTC\t0\n-ACTG1\t14476\n-ACTG2\t3\n-ACTL6A\t198\n-ACTL6B\t0\n-ACTL8\t1\n-ACTN1\t4575\n-ACTN2\t0\n-ACTN4\t7378\n-ACTR10\t33\n-ACTR1A\t159\n-ACTR1B\t40\n-ACTR2\t8300\n-ACTR3\t584\n-ACTR3B\t0\n-ACTR5\t17\n-ACTR6\t65\n-ACTR8\t176\n-ACTRT1\t0\n-ACVR1\t0\n-ACVR1B\t6\n-ACVR1C\t0\n-ACVR2\t43\n-ACVR2A\t22\n-ACVR2B\t6\n-ACVRL1\t0\n-ACY1\t17\n-ACY1L2\t93\n-ACY3\t0\n-ACYP1\t69\n-ACYP2\t282\n-AD-003\t0\n-AD-020\t19\n-AD023\t1\n-AD031\t32\n-AD7C-NTP\t6\n-ADA\t3713\n-ADAL\t12\n-ADAM10\t638\n-ADAM11\t1\n-ADAM12\t191\n-ADAM15\t58\n-ADAM17\t283\n-ADAM18\t2\n-ADAM19\t555\n-ADAM20\t0\n-ADAM21\t0\n-ADAM22\t254\n-ADAM23\t59\n-ADAM28\t0\n-ADAM32\t12\n-ADAM33\t22\n-ADAM8\t2\n-ADAM9\t3047\n-ADAMDEC1\t0\n-ADAMTS1\t0\n-ADAMTS10\t175\n-ADAMTS12\t89\n-ADAMTS13\t1\n-ADAMTS15\t163\n-ADAMTS16\t8\n-ADAMTS17\t0\n-ADAMTS18\t0\n-ADAMTS19\t0\n-ADAMTS2\t21\n-ADAMTS20\t0\n-ADAMTS3\t515\n-ADAMTS4\t0\n-ADAMTS5\t7\n-ADAMTS6\t46\n-ADAMTS7\t0\n-ADAMTS8\t0\n-ADAMTS9\t281\n-ADAMTSL1\t19\n-ADAMTSL2\t0\n-ADAMTSL3\t1\n-ADAMTSL4\t4\n-ADAR\t96\n-ADARB1\t44\n-ADARB2\t0\n-ADAT1\t17\n-ADC\t7\n-ADCK1\t21\n-ADCK2\t8\n-ADCK4\t84\n-ADCK5\t12\n-ADCY1\t63\n-ADCY2\t23\n-ADCY3\t27\n-ADCY5\t1\n-ADCY6\t376\n-ADCY7\t44\n-ADCY8\t0\n-ADCY9\t41\n-ADCYAP1\t0\n-ADCYAP1R1\t127\n-ADD1\t1109\n-ADD2\t0\n-ADD3\t680\n-ADFP\t85\n-ADH1B\t1\n-ADH1C\t0\n-ADH4\t6\n-ADH5\t1368\n-ADHFE1\t35\n-ADI1\t213\n-ADIPOR1\t208\n-ADIPOR2\t31\n-ADK\t195\n-ADM\t323\n-ADM2\t0\n-ADMP\t0\n-ADMR\t5108\n-ADNP\t961\n-ADORA1\t27\n-ADORA2A\t43\n-ADORA2B\t2\n-ADPGK\t3996\n-ADPN\t4\n-ADPRH\t0\n-ADPRHL1\t0\n-ADPRHL2\t86\n-ADRA1A\t0\n-ADRA1B\t0\n-ADRA1D\t0\n-ADRA2A\t0\n-ADRA2B\t1\n-ADRB1\t11\n-ADRB2\t0\n-ADRB3\t17\n-ADRBK1\t88\n-ADRBK2\t14\n-ADRM1\t405\n-ADSL\t188\n-ADSS\t526\n-ADSSL1\t10\n-AE2\t23\n-AEBP1\t3115\n-AEBP2\t468\n-AEGP\t25\n-AER61\t2\n-AES\t1972\n-AF15Q14\t1\n-AF1Q\t84\n-AF5Q31\t574\n-AFAP\t84\n-AFAR3\t15\n-AFF1\t69\n-AFF2\t8\n-AFF3\t1\n-AFF4\t0\n-AFG3L1\t25\n-AFG3L2\t94\n-AFMID\t293\n-AFP\t22\n-AFTIPHILIN\t144\n-AG1\t2\n-AGA\t133\n-AGBL2\t0\n-AGBL3\t0\n-AGC1\t0\n-AGER\t3\n-AGGF1\t184\n-AGL\t374\n-AGMAT\t6\n-AGPAT1\t84\n-AGPAT2\t1\n-AGPAT3\t101\n-AGPAT4\t64\n-AGPAT5\t1149\n-AGPAT6\t47\n-AGPAT7\t48\n-AGPS\t293\n-AGR2\t14\n-AGRN\t587\n-AGRP\t0\n-AGT\t0\n-AGTPBP1\t47\n-AGTR1\t0\n-AGTR2\t0\n-AGTRAP\t306\n-AGXT2L1\t0\n-AHCTF1\t41\n-AHCY\t489\n-AHCYL1\t767\n-AHDC1\t11\n-AHI1\t40\n-AHNAK\t0\n-AHR\t1101\n-AHSA1\t311\n-AHSA2\t67\n-AHSG\t0\n-AICDA\t0\n-AIFL\t0\n-AIG1\t243\n-AIM1\t0\n-AIM1L\t0\n-AIP\t468\n-AIP1\t59\n-AIPL1\t12\n-AK1\t112\n-AK2\t316\n-AK3\t131\n-AK3L1\t52\n-AK5\t16\n-AK7\t0\n-AKAP1\t47\n-AKAP10\t309\n-AKAP11\t76\n-AKAP12\t370\n-AKAP13\t79\n-AKAP14\t11\n-AKAP3\t3\n-AKAP6\t18\n-AKAP7\t410\n-AKAP8"..b'4\n-ZNF283\t11\n-ZNF284\t85\n-ZNF285\t20\n-ZNF286\t719\n-ZNF287\t75\n-ZNF289\t112\n-ZNF291\t83\n-ZNF292\t208\n-ZNF294\t82\n-ZNF295\t61\n-ZNF297\t128\n-ZNF297B\t261\n-ZNF3\t19\n-ZNF30\t100\n-ZNF300\t3\n-ZNF302\t493\n-ZNF304\t44\n-ZNF305\t34\n-ZNF306\t5\n-ZNF307\t21\n-ZNF31\t3\n-ZNF311\t0\n-ZNF312\t41\n-ZNF313\t247\n-ZNF317\t151\n-ZNF318\t81\n-ZNF319\t130\n-ZNF32\t70\n-ZNF322A\t18\n-ZNF323\t12\n-ZNF324\t85\n-ZNF326\t122\n-ZNF329\t85\n-ZNF330\t52\n-ZNF331\t23\n-ZNF333\t88\n-ZNF334\t78\n-ZNF335\t10\n-ZNF336\t17\n-ZNF337\t55\n-ZNF33A\t26\n-ZNF34\t5\n-ZNF341\t4\n-ZNF342\t0\n-ZNF343\t48\n-ZNF345\t0\n-ZNF346\t171\n-ZNF347\t27\n-ZNF35\t15\n-ZNF350\t24\n-ZNF354A\t59\n-ZNF354B\t46\n-ZNF354C\t1747\n-ZNF358\t61\n-ZNF364\t98\n-ZNF365\t42\n-ZNF366\t0\n-ZNF367\t134\n-ZNF37A\t74\n-ZNF37B\t94\n-ZNF38\t45\n-ZNF382\t24\n-ZNF383\t8\n-ZNF384\t95\n-ZNF385\t4\n-ZNF390\t11\n-ZNF394\t684\n-ZNF395\t62\n-ZNF396\t6\n-ZNF397\t50\n-ZNF398\t29\n-ZNF403\t89\n-ZNF404\t38\n-ZNF406\t0\n-ZNF407\t27\n-ZNF408\t16\n-ZNF41\t73\n-ZNF410\t189\n-ZNF414\t14\n-ZNF415\t171\n-ZNF416\t43\n-ZNF417\t2\n-ZNF418\t21\n-ZNF419\t16\n-ZNF42\t134\n-ZNF420\t8\n-ZNF423\t277\n-ZNF425\t5\n-ZNF426\t20\n-ZNF429\t13\n-ZNF43\t52\n-ZNF430\t51\n-ZNF431\t0\n-ZNF432\t15\n-ZNF433\t12\n-ZNF434\t30\n-ZNF435\t0\n-ZNF436\t455\n-ZNF438\t62\n-ZNF439\t80\n-ZNF44\t42\n-ZNF440\t28\n-ZNF440L\t0\n-ZNF441\t19\n-ZNF442\t0\n-ZNF443\t88\n-ZNF444\t959\n-ZNF445\t3\n-ZNF446\t27\n-ZNF447\t298\n-ZNF449\t39\n-ZNF45\t367\n-ZNF451\t138\n-ZNF452\t0\n-ZNF454\t11\n-ZNF46\t27\n-ZNF462\t376\n-ZNF467\t1\n-ZNF468\t350\n-ZNF469\t0\n-ZNF471\t51\n-ZNF473\t1\n-ZNF479\t0\n-ZNF480\t44\n-ZNF482\t175\n-ZNF483\t3\n-ZNF484\t26\n-ZNF485\t29\n-ZNF486\t648\n-ZNF488\t44\n-ZNF490\t0\n-ZNF491\t0\n-ZNF492\t1\n-ZNF493\t28\n-ZNF496\t229\n-ZNF497\t7\n-ZNF498\t69\n-ZNF499\t8\n-ZNF500\t8\n-ZNF501\t7\n-ZNF502\t6\n-ZNF503\t13\n-ZNF505\t1\n-ZNF506\t3\n-ZNF507\t291\n-ZNF509\t39\n-ZNF510\t80\n-ZNF511\t649\n-ZNF512\t56\n-ZNF513\t10\n-ZNF514\t0\n-ZNF516\t78\n-ZNF517\t120\n-ZNF518\t150\n-ZNF519\t47\n-ZNF521\t145\n-ZNF524\t38\n-ZNF525\t8\n-ZNF526\t48\n-ZNF527\t5\n-ZNF528\t35\n-ZNF529\t136\n-ZNF530\t8\n-ZNF532\t830\n-ZNF533\t0\n-ZNF536\t0\n-ZNF537\t0\n-ZNF539\t114\n-ZNF540\t32\n-ZNF542\t58\n-ZNF543\t40\n-ZNF544\t49\n-ZNF545\t71\n-ZNF546\t4\n-ZNF547\t302\n-ZNF548\t98\n-ZNF549\t1\n-ZNF550\t23\n-ZNF551\t44\n-ZNF552\t0\n-ZNF553\t300\n-ZNF554\t17\n-ZNF555\t24\n-ZNF557\t0\n-ZNF558\t22\n-ZNF559\t237\n-ZNF560\t0\n-ZNF561\t315\n-ZNF562\t11\n-ZNF563\t36\n-ZNF564\t110\n-ZNF565\t0\n-ZNF566\t11\n-ZNF567\t18\n-ZNF568\t59\n-ZNF569\t58\n-ZNF570\t0\n-ZNF571\t19\n-ZNF572\t3\n-ZNF573\t4\n-ZNF574\t47\n-ZNF575\t2\n-ZNF576\t37\n-ZNF577\t42\n-ZNF578\t3\n-ZNF579\t19\n-ZNF580\t695\n-ZNF581\t133\n-ZNF582\t3\n-ZNF583\t24\n-ZNF584\t87\n-ZNF585A\t192\n-ZNF585B\t5\n-ZNF586\t24\n-ZNF587\t47\n-ZNF588\t18\n-ZNF589\t2\n-ZNF592\t182\n-ZNF593\t98\n-ZNF594\t3\n-ZNF595\t0\n-ZNF596\t321\n-ZNF597\t9\n-ZNF598\t32\n-ZNF599\t36\n-ZNF6\t93\n-ZNF600\t7\n-ZNF605\t17\n-ZNF606\t100\n-ZNF607\t83\n-ZNF608\t45\n-ZNF609\t16\n-ZNF610\t8\n-ZNF611\t178\n-ZNF613\t44\n-ZNF614\t3\n-ZNF615\t86\n-ZNF616\t46\n-ZNF618\t1\n-ZNF619\t0\n-ZNF620\t15\n-ZNF621\t52\n-ZNF622\t128\n-ZNF623\t121\n-ZNF624\t28\n-ZNF625\t1\n-ZNF626\t14\n-ZNF627\t95\n-ZNF629\t336\n-ZNF630\t0\n-ZNF638\t1201\n-ZNF639\t225\n-ZNF641\t60\n-ZNF642\t26\n-ZNF643\t0\n-ZNF644\t151\n-ZNF646\t5\n-ZNF649\t124\n-ZNF650\t136\n-ZNF651\t263\n-ZNF652\t14\n-ZNF653\t20\n-ZNF654\t2\n-ZNF655\t416\n-ZNF658\t23\n-ZNF659\t0\n-ZNF66\t1\n-ZNF660\t44\n-ZNF663\t0\n-ZNF664\t241\n-ZNF665\t0\n-ZNF667\t120\n-ZNF668\t8\n-ZNF669\t15\n-ZNF670\t11\n-ZNF671\t312\n-ZNF672\t81\n-ZNF673\t31\n-ZNF677\t6\n-ZNF678\t4\n-ZNF680\t22\n-ZNF681\t0\n-ZNF682\t0\n-ZNF684\t0\n-ZNF687\t59\n-ZNF688\t15\n-ZNF689\t10\n-ZNF69\t48\n-ZNF690\t22\n-ZNF691\t129\n-ZNF692\t10\n-ZNF694\t86\n-ZNF695\t4\n-ZNF697\t22\n-ZNF7\t80\n-ZNF70\t0\n-ZNF700\t100\n-ZNF701\t10\n-ZNF702\t20\n-ZNF703\t22\n-ZNF704\t0\n-ZNF706\t996\n-ZNF707\t15\n-ZNF708\t67\n-ZNF71\t77\n-ZNF710\t260\n-ZNF713\t0\n-ZNF714\t0\n-ZNF718\t1\n-ZNF720\t170\n-ZNF721\t21\n-ZNF722\t0\n-ZNF74\t112\n-ZNF740\t28\n-ZNF75\t88\n-ZNF75A\t349\n-ZNF76\t407\n-ZNF77\t19\n-ZNF79\t34\n-ZNF8\t2\n-ZNF80\t0\n-ZNF81\t4\n-ZNF83\t470\n-ZNF84\t161\n-ZNF85\t113\n-ZNF9\t249\n-ZNF91\t86\n-ZNF92\t69\n-ZNF93\t53\n-ZNF96\t0\n-ZNFN1A2\t1\n-ZNFN1A3\t0\n-ZNFN1A4\t62\n-ZNFN1A5\t120\n-ZNFX1\t31\n-ZNHIT1\t6\n-ZNHIT2\t48\n-ZNHIT3\t181\n-ZNHIT4\t30\n-ZNRD1\t84\n-ZNRF1\t180\n-ZNRF2\t109\n-ZNRF3\t218\n-ZP3\t176\n-ZPBP\t0\n-ZPLD1\t0\n-ZRANB1\t144\n-ZRANB3\t55\n-ZRF1\t104\n-ZSCAN1\t208\n-ZSCAN2\t51\n-ZSCAN5\t32\n-ZSWIM1\t116\n-ZSWIM3\t0\n-ZSWIM4\t35\n-ZSWIM5\t1\n-ZSWIM6\t629\n-ZW10\t24\n-ZWILCH\t31\n-ZWINT\t371\n-ZXDA\t0\n-ZXDB\t63\n-ZXDC\t35\n-ZYG11B\t2909\n-ZYG11BL\t72\n-ZYX\t3056\n-ZZANK1\t67\n-ZZEF1\t16\n-ZZZ3\t270\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/raw2counts.R
--- a/SMART/DiffExpAnal/DESeqTools/raw2counts.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,20 +0,0 @@
-# raw2counts
-# extract counts only from rawCounts
-# and add rownames to counts
-
-# input : rawCounts
-# output : counts
-
-# created Feb 6th, 2012
-# modified April 12, 2012
-# Marie-Agnes Dillies
-
-
-raw2counts <- function( rawCounts, annot=1 ){
-
-  ex <- 1:annot
-  counts <- as.matrix( rawCounts[,-ex] )
-  rownames(counts) <- rawCounts[,1]
-  infoCounts <- rawCounts[,ex]
-  return( list("counts"=counts, "infoCounts"= infoCounts) )
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/DESeqTools/removeNul.R
--- a/SMART/DiffExpAnal/DESeqTools/removeNul.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,14 +0,0 @@
-# removeNul
-# remove genes with null counts in all samples
-
-# input : counts
-# output : counts
-
-# created Feb 7th, 2012
-# Marie-Agnes Dillies
-
-
-removeNul <- function( counts, info = NULL ){
-
-  return( list(counts[rowSums(counts) > 0,], info[rowSums(counts) > 0,]) )
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/bam_to_sam_parallel.py
--- a/SMART/DiffExpAnal/bam_to_sam_parallel.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,172 +0,0 @@
-#!/usr/bin/env python
-"""
-Converts BAM data to sorted SAM data.
-usage: bam_to_sam.py [options]
-   --input1: SAM file to be converted
-   --output1: output dataset in bam format
-"""
-
-import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
-#from galaxy import eggs
-#import pkg_resources; pkg_resources.require( "bx-python" )
-#from bx.cookbook import doc_optparse
-#from galaxy import util
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-    
-def toTar(tarFileName, samOutputNames):
-    dir = os.path.dirname(tarFileName)    
-    tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
-    currentPath = os.getcwd()
-    os.chdir(dir)
-    for file in samOutputNames:
-        relativeFileName = os.path.basename(file)
-        tfile.add(relativeFileName)
-    os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
-    tfile.close()
-    os.chdir(currentPath)    
-
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option('-t', '--tar', dest='outputTar', default=None, help='output all SAM results in a tar file.' )
-    parser.add_option( '', '--input1', dest='input1', help='The input list of BAM datasets on txt format.' )
-    #parser.add_option( '', '--input1', dest='input1', help='The input BAM dataset' )
-    parser.add_option( '', '--output1', dest='output1', help='The output list of SAM datasets on txt format.' )
-    #parser.add_option( '', '--output1', dest='output1', help='The output SAM dataset' )
-    parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' )
-    ( options, args ) = parser.parse_args()
-
-
-    #Parse the input txt file and read a list of BAM files.
-    file = open(options.input1, "r")
-    lines = file.readlines()
-    inputFileNames = []
-    samOutputNames = []
-    outputName = options.output1
-    resDirName = os.path.dirname(outputName) + '/'
-    #Write output txt file and define all output sam file names.
-    out = open(outputName, "w")
-    for line in lines:
-        tab = line.split()
-        inputFileNames.append(tab[1])
-        samOutName = resDirName + tab[0] + '_samOutput_%s.sam' % random.randrange(0, 10000)
-        samOutputNames.append(samOutName)
-        out.write(tab[0] + '\t' + samOutName  + '\n')
-    file.close()
-    out.close()
-
-    # output version # of tool
-    try:
-        tmp_files = []
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_files.append(tmp)
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( 'Samtools %s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Samtools version\n' )
-
-
-
-    tmp_dirs = []
-    for i in range(len(inputFileNames)):
-        try:
-            # exit if input file empty
-            if os.path.getsize( inputFileNames[i] ) == 0:
-                raise Exception, 'Initial input txt file is empty.'
-            # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command
-            # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted
-            # into memory ( controlled by option -m ).
-            tmp_dir = tempfile.mkdtemp()
-            tmp_dirs.append(tmp_dir)
-            tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )
-            tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name
-            tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name
-            tmp_files.append(tmp_sorted_aligns_file_name)
-            tmp_sorted_aligns_file.close()
-            
-            command = 'samtools sort %s %s' % ( inputFileNames[i], tmp_sorted_aligns_file_base )
-            tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-            tmp_stderr = open( tmp, 'wb' )
-            proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-            # exit if sorted BAM file empty
-            if os.path.getsize( tmp_sorted_aligns_file_name) == 0:
-                raise Exception, 'Intermediate sorted BAM file empty'
-        except Exception, e:
-            stop_err( 'Error sorting alignments from (%s), %s' % ( inputFileNames[i], str( e ) ) )
-            
-        try:
-            # Extract all alignments from the input BAM file to SAM format ( since no region is specified, all the alignments will be extracted ).
-            if options.header:
-                view_options = "-h"
-            else:
-                view_options = ""
-            command = 'samtools view %s -o %s %s' % ( view_options, samOutputNames[i], tmp_sorted_aligns_file_name )
-            tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
-            tmp_stderr = open( tmp, 'wb' )
-            proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
-            returncode = proc.wait()
-            tmp_stderr.close()
-            # get stderr, allowing for case where it's very large
-            tmp_stderr = open( tmp, 'rb' )
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read( buffsize )
-                    if not stderr or len( stderr ) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-        except Exception, e:
-            stop_err( 'Error extracting alignments from (%s), %s' % ( inputFileNames[i], str( e ) ) )
-        if os.path.getsize( samOutputNames[i] ) > 0:
-            sys.stdout.write( 'BAM file converted to SAM' )
-        else:
-            stop_err( 'The output file is empty, there may be an error with your input file.' )
-     
-    if options.outputTar != None:
-        toTar(options.outputTar, samOutputNames)       
-    #clean up temp files
-    for tmp_dir in tmp_dirs:
-        if os.path.exists( tmp_dir ):
-            shutil.rmtree( tmp_dir )
-    #print tmp_files
-    #for tmp in tmp_files:
-    #    os.remove(tmp)            
-    
-
-if __name__=="__main__": __main__()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/bam_to_sam_parallel.xml
--- a/SMART/DiffExpAnal/bam_to_sam_parallel.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,32 +0,0 @@
-<tool id="bam_to_sam_parallel" name="BAM to SAM (for DEA)" version="1.0.0">
-  <description>converts a list of BAM format files to SAM format.</description>
-  <requirements>
- <requirement type="package">samtools</requirement>
-  </requirements>
-  <command interpreter="python"> bam_to_sam_parallel.py
-      --input1=$input1
-      --output1=$output1
-      $header
-      $tar $outputTarFile
-  </command>
-  <inputs>
-    <param name="input1" type="data" format="txt" label="BAM File LIST to Convert" />
-    <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" label="Include header in output" />
-    <param name="tar" type="boolean" truevalue="-t" falsevalue="" checked="false" label="tar option" help="This option creates a tar file for all out results." />
-  </inputs>
-  <outputs>
-   <data format="txt" name="output1" label="converted SAM LIST files " />
-   <data name="outputTarFile" format="tar">
-   <filter>tar</filter>
-   </data>
-  </outputs>
-  <help>
-
-**What it does**
-
-This tool uses the SAMTools_ toolkit to produce a SAM file from a BAM file.
-
-.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
-
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.py
--- a/SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,145 +0,0 @@
-#!/usr/bin/env python
-"""
-Converts BAM data to sorted SAM data.
-usage: bam_to_sam.py [options]
-   --input1: SAM file to be converted
-   --output1: output dataset in bam format
-"""
-
-import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
-from commons.core.launcher.Launcher import Launcher
-from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
-#from galaxy import eggs
-#import pkg_resources; pkg_resources.require( "bx-python" )
-#from bx.cookbook import doc_optparse
-#from galaxy import util
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-    
-def toTar(tarFileName, samOutputNames):
-    dir = os.path.dirname(tarFileName)    
-    tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
-    currentPath = os.getcwd()
-    os.chdir(dir)
-    for file in samOutputNames:
-        relativeFileName = os.path.basename(file)
-        tfile.add(relativeFileName)
-    os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
-    tfile.close()
-    os.chdir(currentPath)    
-    
-def _map(iLauncher, cmd, cmdStart, cmdFinish ):
-    lCmds = []
-    lCmds.extend(cmd)
-    lCmdStart = []
-    lCmdStart.extend(cmdStart)
-    lCmdFinish = []
-    lCmdFinish.extend(cmdFinish)
-    return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
-
-def _createSamToolsViewCmd(iLauncher, inputFile, tmp_sorted_aligns_file_name, header):
-        lArgs = []
-        lArgs.append("-o %s" %  inputFile)
-        lArgs.append("%s" % tmp_sorted_aligns_file_name)
-        if header:
-            lArgs.append("-h")
-        return iLauncher.getSystemCommand("samtools view", lArgs)
-
-def _createSamToolsSortCmd(iLauncher, inputFile, tmp_sorted_aligns_file_base):
-        lArgs = []
-        lArgs.append("%s" % inputFile)
-        lArgs.append("%s" %  tmp_sorted_aligns_file_base)
-        return iLauncher.getSystemCommand("samtools sort", lArgs)
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option('-t', '--tar', dest='outputTar', default=None, help='output all SAM results in a tar file.' )
-    parser.add_option( '', '--input1', dest='input1', help='The input list of BAM datasets on txt format.' )
-    #parser.add_option( '', '--input1', dest='input1', help='The input BAM dataset' )
-    parser.add_option( '', '--output1', dest='output1', help='The output list of SAM datasets on txt format.' )
-    #parser.add_option( '', '--output1', dest='output1', help='The output SAM dataset' )
-    parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' )
-    ( options, args ) = parser.parse_args()
-
-
-    #Parse the input txt file and read a list of BAM files.
-    file = open(options.input1, "r")
-    lines = file.readlines()
-    inputFileNames = []
-    samOutputNames = []
-    outputName = options.output1
-    resDirName = os.path.dirname(outputName) + '/'
-    #Write output txt file and define all output sam file names.
-    out = open(outputName, "w")
-    for line in lines:
-        tab = line.split()
-        inputFileNames.append(tab[1])
-        samOutName = resDirName + tab[0] + '_samOutput_%s.sam' % random.randrange(0, 10000)
-        samOutputNames.append(samOutName)
-        out.write(tab[0] + '\t' + samOutName  + '\n')
-    file.close()
-    out.close()
-
-    # output version # of tool
-    try:
-        tmp_files = []
-        tmp = tempfile.NamedTemporaryFile().name
-        tmp_files.append(tmp)
-        tmp_stdout = open( tmp, 'wb' )
-        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
-        tmp_stdout.close()
-        returncode = proc.wait()
-        stdout = None
-        for line in open( tmp_stdout.name, 'rb' ):
-            if line.lower().find( 'version' ) >= 0:
-                stdout = line.strip()
-                break
-        if stdout:
-            sys.stdout.write( 'Samtools %s\n' % stdout )
-        else:
-            raise Exception
-    except:
-        sys.stdout.write( 'Could not determine Samtools version\n' )
-
-    tmp_dirs = []
-    acronym = "bam_to_sam"
-    jobdb = TableJobAdaptatorFactory.createJobInstance()
-    iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)
-    lCmdsTuples = []
-    for i in range(len(inputFileNames)):    #Construct the lines commands
-        if os.path.getsize( inputFileNames[i] ) == 0:
-            raise Exception, 'Initial input txt file is empty.'
-        tmp_dir = tempfile.mkdtemp(dir="%s" % os.getcwd())
-        tmp_dirs.append(tmp_dir)
-        tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )
-        tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name
-        tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name
-        tmp_files.append(tmp_sorted_aligns_file_name)
-        tmp_sorted_aligns_file.close()
-        
-        inputFile = inputFileNames[i]
-        outputFile = samOutputNames[i]
-        cmd2Launch = []
-        cmd2Launch.append(_createSamToolsSortCmd(iLauncher, inputFile, tmp_sorted_aligns_file_base))
-        cmd2Launch.append(_createSamToolsViewCmd(iLauncher, outputFile, tmp_sorted_aligns_file_name, options.header))
-        cmdStart = []
-        cmdFinish = []
-        lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))    
-
-    iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)
-    
-    if options.outputTar != None:
-        toTar(options.outputTar, samOutputNames)       
-    #clean up temp files
-    for tmp_dir in tmp_dirs:
-        if os.path.exists( tmp_dir ):
-            shutil.rmtree( tmp_dir )
-    #print tmp_files
-    #for tmp in tmp_files:
-    #    os.remove(tmp)            
-    
-
-if __name__=="__main__": __main__()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.xml
--- a/SMART/DiffExpAnal/bam_to_sam_parallel_unSQL.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,32 +0,0 @@
-<tool id="bam_to_sam_parallel_unSQL" name="BAM to SAM (for DEA in parallel)" version="1.0.0">
-  <description>converts a list of BAM format files to SAM format (parallelized).</description>
-  <requirements>
- <requirement type="package">samtools</requirement>
-  </requirements>
-  <command interpreter="python"> bam_to_sam_parallel_unSQL.py
-      --input1=$input1
-      --output1=$output1
-      $header
-      $tar $outputTarFile
-  </command>
-  <inputs>
-    <param name="input1" type="data" format="txt" label="BAM File LIST to Convert" />
-    <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" label="Include header in output" />
-    <param name="tar" type="boolean" truevalue="-t" falsevalue="" checked="false" label="tar option" help="This option creates a tar file for all out results." />
-  </inputs>
-  <outputs>
-   <data format="txt" name="output1" label="converted SAM LIST files " />
-   <data name="outputTarFile" format="tar">
-   <filter>tar</filter>
-   </data>
-  </outputs>
-  <help>
-
-**What it does**
-
-This tool uses the SAMTools_ toolkit to produce a SAM file from a BAM file.
-
-.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
-
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/compareOverlapping_parallel.py
--- a/SMART/DiffExpAnal/compareOverlapping_parallel.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,175 +0,0 @@\n-#! /usr/bin/env python\n-#This program is a wrapp for CompareOverlapping.py.\n-import optparse, os, sys, subprocess, tempfile, shutil, tarfile, glob\n-import os, struct, time, random\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.CompareOverlapping import CompareOverlapping\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.ncList.NCList import NCList\n-from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n-from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n-from SMART.Java.Python.ncList.FileSorter import FileSorter\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-from SMART.Java.Python.misc import Utils\n-\n-\n-\n-def stop_err( msg ):\n-\tsys.stderr.write( "%s\\n" % msg )\n-\tsys.exit()\n-\n-def toTar(tarFileName, overlapOutputNames):\n-\tdir = os.path.dirname(tarFileName)\t\n-\ttfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n-\tcurrentPath = os.getcwd()\n-\tos.chdir(dir)\n-\tfor file in overlapOutputNames:\n-\t\trelativeFileName = os.path.basename(file)\n-\t\ttfile.add(relativeFileName)\n-\tos.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n-\ttfile.close()\n-\tos.chdir(currentPath)\n-\n-def __main__():\n-\tdescription = "Compare Overlapping wrapp script: Get the a list of data which overlap with a reference set. [Category: Data Comparison]"\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--input1",\t\t   dest="inputFileName1", action="store",\t\t\t\t\t type="string", help="input file 1 (for annotation) [compulsory] [format: file in transcript format given by -f]")\n-\tparser.add_option("-f", "--format1",\t\t  dest="format1",\t\taction="store",\t\t\t\t\t type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n-\tparser.add_option("", "--inputTxt", \t\tdest="inputTxt", \t\taction="store", \t\t\t\ttype="string", \thelp="input, a txt file for a list of input reads files. Should identify all reads files format, given by -g [compulsory]")\n-\t#parser.add_option("-j", "--input2",\t\t   dest="inputFileName2", action="store",\tdefault="inputRead",\t type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n-\tparser.add_option("-g", "--format2",\t\t  dest="format2",\t\taction="store",\t\t\t\t type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n-\t#parser.add_option("-o", "--output",\t\t   dest="output",\t\t action="store",\t  default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-\tparser.add_option("-S", "--start1",\t\t   dest="start1",\t\t action="store",\t  default=None,  type="int",\thelp="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]")\n-\tparser.add_option("-s", "--start2",\t\t   dest="start2",\t\t action="store",\t  default=None,  type="int",\thelp="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]")\n-\tparser.add_option("-U", "--end1",\t\t\t dest="end1",\t\t   action="store",\t  default=None,  type="int",\thelp="only consider the n last nucleotides of the transcripts in file 1 (do not use it with -S) [format: int]")\n-\tparser.add_option("-u", "--end2",\t\t\t dest="end2",\t\t   action="store",\t  default=None,  type="int",\thelp="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]")\n-\tparser.add_option("-t", "--intron",\t\t   dest="introns",\t\taction="store_true", default=False,\t\t\t\thelp="also report introns [format: bool] [default: false]")\n-\tparser.add_option("-E", "--5primeExtension1", dest="fivePrime1",\t action="store",\t  default=None,  type="int",\thelp="extension towards 5\' in file 1 [format: int]")\n-\tparser.add_option("-e"'..b'verlapping data [format: bool] [default: false]")\n-\tparser.add_option("-x", "--exclude",\t\t  dest="exclude",\t\taction="store_true", default=False,\t\t\t\thelp="invert the match [format: bool] [default: false]")\n-\tparser.add_option("-v", "--verbosity",\t\tdest="verbosity",\t  action="store",\t  default=1,\t type="int",\thelp="trace level [format: int]")\n-\tparser.add_option(\'\', \'--tar\', dest=\'outputTar\', default=None, help=\'output all SAM results in a tar file.\' )\n-\tparser.add_option( \'\', \'--outTxt\', dest=\'outTxtFile\', help=\'The output list of results files on txt format.[compulsory]\' )\n-\t(options, args) = parser.parse_args()\n-\t\n-\t\n-\t#Parse the input txt file and read a list of BAM files.\n-\tfile = open(options.inputTxt, "r")\n-\tlines = file.readlines()\n-\tinputFileNames = []\n-\toverlapOutputNames = []\n-\toutputName = options.outTxtFile\n-\tresDirName = os.path.dirname(outputName) + "/"\n-\t#Write output txt file and define all output sam file names.\n-\tout = open(outputName, "w")\n-\tfor line in lines:\n-\t\ttab = line.split()\n-\t\tinputFileNames.append(tab[1])\n-\t\toverlapOutName = resDirName + tab[0] + \'_overlapOut_%s.gff3\' % random.randrange(0, 10000)\n-\t\toverlapOutputNames.append(overlapOutName)\n-\t\tout.write(tab[0] + \'\\t\' + overlapOutName  + \'\\n\')\n-\tfile.close()\n-\tout.close()\n-\t\n-\t#construction the commandes for each input file\n-\tcmds = []\n-\tfor i in range(len(inputFileNames)):\n-\t\tabsFile = sys.argv[0]\n-\t\tabsDir = os.path.dirname(absFile)\n-\t\tparentDir = os.path.abspath(os.path.join(absDir, os.path.pardir))\n-\t\tcmd = "python %s/Java/Python/CompareOverlappingSmallQuery.py " % parentDir\n-\t\topts = "-i %s -f %s -j %s -g %s -o %s " % (options.inputFileName1, options.format1, inputFileNames[i], options.format2, overlapOutputNames[i])\n-\t\t#if options.start1 != None:\n-\t\t#\topts += "-S %s " % options.start1\n-\t\t#if options.start2 != None:\n-\t\t#\topts += "-s %s " % options.start2\n-\t\t#if options.end1 != None:\n-\t\t#\topts += "-U %s " % options.end1\n-\t\t#if options.end2 != None:\n-\t\t#\topts += "-u %s " % options.end2\n-\t\t#if options.fivePrime1 != None:\n-\t\t#\topts += "-E %s " % options.fivePrime1\n-\t\t#if options.fivePrime2 != None:\n-\t\t#\topts += "-e %s " % options.fivePrime2\n-\t\t#if options.threePrime1 != None:\n-\t\t#\topts += "-N %s " % options.threePrime1\n-\t\t#if options.threePrime2 != None:\n-\t\t#\topts += "-n %s " % options.threePrime2\n-\t\t#if options.colinear:\n-\t\t#\topts += "-c "\n-\t\t#if options.antisense:\n-\t\t#\topts +="-a "\n-\t\t#if options.included:\n-\t\t#\topts += "-k "\n-\t\t#if options.including:\n-\t\t#\topts += "-K "\n-\t\t#if options.pcOverlap != None:\n-\t\t#\topts += "-p %s " % options.pcOverlap\n-\t\tif options.notOverlapping:\n-\t\t\topts += "-O "\n-\t\tif options.exclude:\n-\t\t\topts += "-x "\n-\t\tif options.distance != None:\n-\t\t\topts += "-d %s " % options.distance\n-\t\t#if options.minOverlap != None:\n-\t\t#\topts += "-m %s " % options.minOverlap\n-\t\tcmd += opts\n-\t\tcmds.append(cmd)\n-\n-\n-\tprint "les commandes sont %s \\n" % cmds\n-\n-\ttmp_files = []\t\n-\tfor i in range(len(cmds)):\n-\t\ttry:\n-\t\t\ttmp_out = tempfile.NamedTemporaryFile().name\n-\t\t\ttmp_files.append(tmp_out)\n-\t\t\ttmp_stdout = open( tmp_out, \'wb\' )\n-\t\t\ttmp_err = tempfile.NamedTemporaryFile().name\n-\t\t\ttmp_files.append(tmp_err)\n-\t\t\ttmp_stderr = open( tmp_err, \'wb\' )\n-\t\t\tproc = subprocess.Popen( args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )\n-\t\t\treturncode = proc.wait()\n-\t\t\ttmp_stderr.close()\n-\t\t\t# get stderr, allowing for case where it\'s very large\n-\t\t\ttmp_stderr = open( tmp_err, \'rb\' )\n-\t\t\tstderr = \'\'\n-\t\t\tbuffsize = 1048576\n-\t\t\ttry:\n-\t\t\t\twhile True:\n-\t\t\t\t\tstderr += tmp_stderr.read( buffsize )\n-\t\t\t\t\tif not stderr or len( stderr ) % buffsize != 0:\n-\t\t\t\t\t\tbreak\n-\t\t\texcept OverflowError:\n-\t\t\t\tpass\n-\t\t\ttmp_stdout.close()\n-\t\t\ttmp_stderr.close()\n-\t\t\tif returncode != 0:\n-\t\t\t\traise Exception, stderr\n-\t\texcept Exception, e:\n-\t\t\tstop_err( \'Error in :\\n\' + str( e ) )\n-\n-\tif options.outputTar != None:\n-\t\ttoTar(options.outputTar, overlapOutputNames)\t\n-\t\n-\tfor tmp_file in tmp_files:\n-\t\tos.remove(tmp_file)\n-\n-\n-if __name__=="__main__": __main__()\t\t\n-\t\t\n-\t\t\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/compareOverlapping_parallel.xml
--- a/SMART/DiffExpAnal/compareOverlapping_parallel.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,251 +0,0 @@\n-<tool id="CompareOverlapping_parallel" name="CompareOverlapping (for DEA)">\n-\t<description>Shrink or extend the sets of genomic coordinates to get the information between starts of reads and starts of genes.</description>\n-\t<command interpreter="python">\n-\t\tcompareOverlapping_parallel.py -i $formatType.inputFileName1\n-\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n-\t\t\t-f bed\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n-\t\t\t-f gff\t\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n-\t\t\t-f gff2\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n-\t\t\t-f gff3\n-\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n-\t\t\t-f sam\n-\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n-\t\t\t-f gtf\n-\t\t#end if\n-\t\t\t\n-\t\t--inputTxt $inputTxt \n-\t\t\n-\t\t-g $format2\n-\n-\t\t--outTxt $outTxtFile\n-\n-\t\t#if $optionNFirstFile1.NFirstForFile1 == \'Yes\':\n-\t\t\t-S $optionNFirstFile1.firstNtFile1\n-\t\t#end if\n-\t\t#if $optionNFirstFile2.NFirstForFile2 == \'Yes\':\n-\t\t\t-s $optionNFirstFile2.firstNtFile2\n-\t\t#end if\n-\t\t#if $optionNLastFile1.NLastForFile1 == \'Yes\':\n-\t\t\t-U $optionNLastFile1.lastNtFile1\n-\t\t#end if\n-\t\t#if $optionNLastFile2.NLastForFile2 == \'Yes\':\n-\t\t\t-u $optionNLastFile2.lastNtFile2\n-\t\t#end if\n-\t\n-\t\t#if $optionExtentionCinqFile1.extentionFile1 == \'Yes\':\n-\t\t\t-E $optionExtentionCinqFile1.extention51\n-\t\t#end if\n-\t\t#if $optionExtentionCinqFile2.extentionFile2 == \'Yes\':\n-\t\t\t-e $optionExtentionCinqFile2.extention52\n-\t\t#end if\n-\n-\t\t#if $optionExtentionTroisFile1.extentionFile1 == \'Yes\':\n-\t\t\t-N $optionExtentionTroisFile1.extention31\n-\t\t#end if\n-\t\t#if $optionExtentionTroisFile2.extentionFile2 == \'Yes\':\n-\t\t\t-n $optionExtentionTroisFile2.extention32\n-\t\t#end if\t\n-\n-\t\t#if $OptionColinearOrAntiSens.OptionCA == \'Colinear\':\n-\t\t\t-c \n-\t\t#elif $OptionColinearOrAntiSens.OptionCA == \'AntiSens\':\n-\t\t\t-a\n-\t\t#end if\t\n-\n-\t\t#if $OptionDistance.Dist == \'Yes\':\n-\t\t\t-d $OptionDistance.distance\n-\t\t#end if\n-\n-\t\t#if $OptionMinOverlap.MO == \'Yes\':\n-\t\t\t-m $OptionMinOverlap.minOverlap\n-\t\t#end if\n-\n-\t\t$InvertMatch\n-\t\t$ReportIntron\n-\t\t$NotOverlapping\n-\t\t$tar $outputTarFile\n-\t</command>\n-\n-\t<inputs>\n-\n-\t\t<conditional name="formatType">\n-\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n-\t\t\t\t<option value="bed">bed</option>\n-\t\t\t\t<option value="gff">gff</option>\n-\t\t\t\t<option value="gff2">gff2</option>\n-\t\t\t\t<option value="gff3">gff3</option>\n-\t\t\t\t<option value="sam">sam</option>\n-\t\t\t\t<option value="gtf">gtf</option>\n-\t\t\t</param>\n-\t\t\t<when value="bed">\n-\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff">\n-\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff2">\n-\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff3">\n-\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="sam">\n-\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gtf">\n-\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n-            </when>\n-\t\t</conditional>\n-\t\t\n-\t\t<param name="inputTxt" type="data" format="txt" label="A txt file contains a list of several input transcripts files." />\n-\t\t\n-\t\t<param name="format2" type="text" value="bed" label="format for  File 2, you can choose [bed, gff, gff2, gff3, sam, gtf]"/>\n-\t\t\n-\t\t<conditional name="optionNFirstFile1">\n-\t\t\t<param name="NFirstForFile1" type="select" label="NFirst for file 1" help="only consider the n first nucleotides of the transcripts in file 1">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="firstNtFile1" type="integer" value="1" label="n first nucleotides for input file 1" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\t\t<conditional name="optionNFirstFile2">\n-\t\t\t<param name="NFirs'..b'tForFile2" type="select" label="NLast for file 2">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="n last nucleotides for input file 2" help="only consider the n last nucleotides of the transcripts in file 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionCinqFile1">\n-\t\t\t<param name="extentionFile1" type="select" label="Extension towards 5 for file 1">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention51" type="integer" value="1" label="in file 1" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionCinqFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extension towards 5 for file 2">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention52" type="integer" value="1" label="in file 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionTroisFile1">\n-\t\t\t<param name="extentionFile1" type="select" label="Extension towards 3 for file 1">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention31" type="integer" value="1" label="in file 1" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionTroisFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extension towards 3 for file 2">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention32" type="integer" value="1" label="in file 2" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionColinearOrAntiSens">\n-\t\t\t<param name="OptionCA" type="select" label="Colinear or anti-sens">\n-\t\t\t\t<option value="Colinear">Colinear</option>\n-\t\t\t\t<option value="AntiSens">AntiSens</option>\n-\t\t\t\t<option value="NONE" selected="true">NONE</option>\n-\t\t\t</param>\n-\t\t\t<when value="Colinear">\n-\t\t\t</when>\n-\t\t\t<when value="AntiSens">\n-\t\t\t</when>\n-\t\t\t<when value="NONE">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionDistance">\n-\t\t\t<param name="Dist" type="select" label="Maximum Distance between two reads">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="distance" type="integer" value="0"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionMinOverlap">\n-\t\t\t<param name="MO" type="select" label="Minimum number of overlapping between two reads">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="minOverlap" type="integer" value="1"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>\n-\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Report intron"/>\n-\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>\n-\t\t<param name="tar" type="boolean" truevalue="--tar" falsevalue="" checked="false" label="tar option" help="This option creates a tar file for all out results." />\n-\t</inputs>\n-\n-\t<outputs>\n-\t\t<data name="outTxtFile" format="txt" label="overlapping output files "/>\n-\t\t<data name="outputTarFile" format="tar">\n-\t\t  <filter>tar</filter>\n-\t  </data>\n-\t</outputs> \n-\t\n-</tool>\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.py
--- a/SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,205 +0,0 @@\n-#! /usr/bin/env python\n-#This program is a wrapp for CompareOverlapping.py.\n-import os, sys, tarfile, optparse\n-from commons.core.launcher.Launcher import Launcher\n-from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n-from optparse import OptionParser\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from SMART.Java.Python.structure.TranscriptList import TranscriptList\n-from commons.core.writer.WriterChooser import WriterChooser\n-\n-def stop_err( msg ):\n-\tsys.stderr.write( "%s\\n" % msg )\n-\tsys.exit()\n-\n-def toTar(tarFileName, overlapOutputNames):\n-\tdir = os.path.dirname(tarFileName)\t\n-\ttfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n-\tcurrentPath = os.getcwd()\n-\tos.chdir(dir)\n-\tfor file in overlapOutputNames:\n-\t\trelativeFileName = os.path.basename(file)\n-\t\ttfile.add(relativeFileName)\n-\tos.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n-\ttfile.close()\n-\tos.chdir(currentPath)\n-\n-def _createCompareOverlappingCmd(iLauncher, options, inputFileName, annotationFile, overlapOutputName):\n-\tlArgs = []\n-\tlArgs.append("-i %s" % annotationFile)\n-\tlArgs.append("-f %s" % options.format1)\n-\tlArgs.append("-j %s" % inputFileName)\n-\tlArgs.append("-g %s" % options.format2)\n-\tlArgs.append("-o %s" % overlapOutputName)\n-\tif options.notOverlapping:\n-\t\tlArgs.append("-O")\n-\tif options.exclude:\n-\t\tlArgs.append("-x")\n-\tif options.distance != None:\n-\t\tlArgs.append("-d %s" % options.distance)\n-\treturn(iLauncher.getSystemCommand("python %s/SMART/Java/Python/CompareOverlappingSmallQuery.py"  %  os.environ["REPET_PATH"], lArgs))\n-\n-def _map(iLauncher, cmd, cmdStart, cmdFinish ):\n-\tlCmds = []\n-\tlCmds.append(cmd)\n-\tlCmdStart = []\n-\tlCmdStart.append(cmdStart)\n-\tlCmdFinish = []\n-\tlCmdFinish.append(cmdFinish)\n-\treturn(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))\n-\n-def split(fileName, nbOfSeqPerBatch):\n-\tfilePrefix, fileExt = os.path.splitext(os.path.basename(fileName))\n-\tresDir = os.path.dirname(fileName)\n-\tlInputName = []\n-\tfileNb = 1\n-\tSeqNb = 0\n-\toutFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)\n-\tlInputName.append(outFileName)\n-\toutFile = open(outFileName, "w")\n-\tf = open(fileName, "r")\n-\tline = f.readline()\n-\tpreviousRefName = ""\n-\twhile line != "":\n-\t\tif not line.startswith(\'@SQ\'):\n-\t\t\tif SeqNb == nbOfSeqPerBatch:\n-\t\t\t\tSeqNb = 0\n-\t\t\t\tfileNb += 1\n-\t\t\t\toutFile.close()\n-\t\t\t\toutFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)\n-\t\t\t\tlInputName.append(outFileName)\n-\t\t\t\toutFile = open(outFileName, "w")\n-\t\t\trefName = line.split("\\t")[2]\n-\t\t\tif previousRefName != refName:\n-\t\t\t\tSeqNb += 1\n-\t\t\t\toutFile.write(line)\n-\t\t\telse:\n-\t\t\t\tpreviousRefName = refName\n-\t\t\t\toutFile.write(line)\n-\t\tline = f.readline()\n-\treturn lInputName\t\t\n-\n-def join(dCutOut2Out, options):\n-\tchooser = ParserChooser()\n-\tchooser.findFormat("gtf")\n-\tgtfParser = chooser.getParser(options.inputFileName1)\n-\tref = {}\n-\tfor transcript in gtfParser.getIterator():\n-\t\tref[transcript.getTagValue("ID")] = transcript\n-\tfor key in dCutOut2Out.keys():\n-\t\twriterChooser = WriterChooser()\n-\t\twriterChooser.findFormat("gff3")\n-\t\tfor inputFile in dCutOut2Out[key]:\n-\t\t\tchooser = ParserChooser()\n-\t\t\tchooser.findFormat("gff")\n-\t\t\tgffParser = chooser.getParser(inputFile)\n-\t\t\tfor transcript in gffParser.getIterator():\n-\t\t\t\t\tfinalTranscript = ref[transcript.getTagValue("ID")]\n-\t\t\t\t\tif finalTranscript.getTagValue("nbOverlaps"):\n-\t\t\t\t\t\tnbOverlap = int(finalTranscript.getTagValue("nbOverlaps")) + int(transcript.getTagValue("nbOverlaps"))\n-\t\t\t\t\t\tfinalTranscript.setTagValue("nbOverlaps", nbOverlap)\n-\t\t\t\t\telse:\n-\t\t\t\t\t\tfinalTranscript.setTagValue("nbOverlaps", transcript.getTagValue("nbOverlaps"))\n-\t\t\t\t\t\n-\t\t\t\t\tif finalTranscript.getTagValue("overlapsWith") and transcript.getTagValue("overlapsWith") != None:\n-\t\t\t\t\t\toverlapName = "--".join([finalTranscript.getTagValue("overlapsWith"), transcript.getTagValue("overlapsWith")])\n-\t\t\t\t\t\tfinalTranscript.set'..b'pe="int",\thelp="extension towards 3\' in file 2 [format: int]")\n-\tparser.add_option("-c", "--colinear",\t\t dest="colinear",\t   action="store_true", default=False,\t\t\t\thelp="colinear only [format: bool] [default: false]")\n-\tparser.add_option("-a", "--antisense",\t\tdest="antisense",\t  action="store_true", default=False,\t\t\t\thelp="antisense only [format: bool] [default: false]")\n-\tparser.add_option("-d", "--distance",\t\t dest="distance",\t   action="store",\t  default=None,\t type="int",\thelp="accept some distance between query and reference [format: int]")\n-\tparser.add_option("-k", "--included",\t\t dest="included",\t   action="store_true", default=False,\t\t\t\thelp="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]")\n-\tparser.add_option("-K", "--including",\t\tdest="including",\t  action="store_true", default=False,\t\t\t\thelp="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]")\n-\tparser.add_option("-m", "--minOverlap",\t   dest="minOverlap",\t action="store",\t  default=None,\t type="int",\thelp="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]")\n-\tparser.add_option("-p", "--pcOverlap",\t\tdest="pcOverlap",\t  action="store",\t  default=None,  type="int",\thelp="minimum percentage of nucleotides to overlap to declare an overlap [format: int]")\n-\tparser.add_option("-O", "--notOverlapping",   dest="notOverlapping", action="store_true", default=False,\t\t\t\thelp="also output not overlapping data [format: bool] [default: false]")\n-\tparser.add_option("-x", "--exclude",\t\t  dest="exclude",\t\taction="store_true", default=False,\t\t\t\thelp="invert the match [format: bool] [default: false]")\n-\tparser.add_option("-v", "--verbosity",\t\tdest="verbosity",\t  action="store",\t  default=1,\t type="int",\thelp="trace level [format: int]")\n-\tparser.add_option(\'\', \'--tar\', dest=\'outputTar\', default=None, help=\'output all SAM results in a tar file.\' )\n-\tparser.add_option( \'\', \'--outTxt\', dest=\'outTxtFile\', help=\'The output list of results files on txt format.[compulsory]\' )\n-\t(options, args) = parser.parse_args()\n-\t\n-\t\n-\t#Parse the input txt file and read a list of BAM files.\n-\tfile = open(options.inputTxt, "r")\n-\tlines = file.readlines()\n-\tinputFileNames = []\n-\toverlapOutputNames = []\n-\toutputName = options.outTxtFile\n-\tresDirName = os.path.dirname(outputName) + "/"\n-\t#Write output txt file and define all output sam file names.\n-\tout = open(outputName, "w")\n-\tfor line in lines:\n-\t\ttab = line.split()\n-\t\tinputFileNames.append(tab[1])\n-\t\toverlapOutName = resDirName + tab[0] + \'_overlapOut.gff3\'\n-\t\toverlapOutputNames.append(overlapOutName)\n-\t\tout.write(tab[0] + \'\\t\' + overlapOutName  + \'\\n\')\n-\tfile.close()\n-\tout.close()\n-\t\n-\t#Launch on nodes\n-\tacronym = "compareOverlapping"\n-\tjobdb = TableJobAdaptatorFactory.createJobInstance()\n-\tiLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "test", acronym, acronym, False, True)\n-\n-\n-\t\n-\n-\t#construction the commandes for each input file\n-\tlCmdsTuples = []\n-\tdCutOut2Out = {}\n-\tlAllFile2remove = []\n-\tfor i in range(len(inputFileNames)):\n-\t\tlCutInputFile = split(inputFileNames[i], 20000)\n-\t\tlAllFile2remove.extend(lCutInputFile)\n-\t\tlCutOutput = []\n-\t\tfor cutInput in lCutInputFile:\n-\t\t\tcutOutput = "%s_out" % cutInput\n-\t\t\tlCutOutput.append(cutOutput)\n-\t\t\tlAllFile2remove.extend(lCutOutput)\n-\t\t\tcmd2Launch = _createCompareOverlappingCmd(iLauncher, options, cutInput, options.inputFileName1, cutOutput)\n-\t\t\tlCmdsTuples.append(_map(iLauncher, cmd2Launch, "", ""))\n-\t\tchooser = ParserChooser()\n-\t\tchooser.findFormat(options.format2)\n-\t\tdCutOut2Out[overlapOutputNames[i]] = lCutOutput\n-\tiLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)\n-\t\n-\tjoin(dCutOut2Out, options)\n-\tFileUtils.removeFilesFromListIfExist(lAllFile2remove)\n-\n-\tif options.outputTar != None:\n-\t\ttoTar(options.outputTar, overlapOutputNames)\t\n-\n-if __name__=="__main__": __main__()\t\t\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.xml
--- a/SMART/DiffExpAnal/compareOverlapping_parallel_unSQL.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,251 +0,0 @@\n-<tool id="CompareOverlapping_parallel_unSQL" name="CompareOverlapping (for DEA in parallel)">\n-\t<description>Shrink or extend the sets of genomic coordinates to get the information between starts of reads and starts of genes (parallelized).</description>\n-\t<command interpreter="python">\n-\t\tcompareOverlapping_parallel_unSQL.py -i $formatType.inputFileName1\n-\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n-\t\t\t-f bed\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n-\t\t\t-f gff\t\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n-\t\t\t-f gff2\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n-\t\t\t-f gff3\n-\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n-\t\t\t-f sam\n-\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n-\t\t\t-f gtf\n-\t\t#end if\n-\t\t\t\n-\t\t--inputTxt $inputTxt \n-\t\t\n-\t\t-g $format2\n-\n-\t\t--outTxt $outTxtFile\n-\n-\t\t#if $optionNFirstFile1.NFirstForFile1 == \'Yes\':\n-\t\t\t-S $optionNFirstFile1.firstNtFile1\n-\t\t#end if\n-\t\t#if $optionNFirstFile2.NFirstForFile2 == \'Yes\':\n-\t\t\t-s $optionNFirstFile2.firstNtFile2\n-\t\t#end if\n-\t\t#if $optionNLastFile1.NLastForFile1 == \'Yes\':\n-\t\t\t-U $optionNLastFile1.lastNtFile1\n-\t\t#end if\n-\t\t#if $optionNLastFile2.NLastForFile2 == \'Yes\':\n-\t\t\t-u $optionNLastFile2.lastNtFile2\n-\t\t#end if\n-\t\n-\t\t#if $optionExtentionCinqFile1.extentionFile1 == \'Yes\':\n-\t\t\t-E $optionExtentionCinqFile1.extention51\n-\t\t#end if\n-\t\t#if $optionExtentionCinqFile2.extentionFile2 == \'Yes\':\n-\t\t\t-e $optionExtentionCinqFile2.extention52\n-\t\t#end if\n-\n-\t\t#if $optionExtentionTroisFile1.extentionFile1 == \'Yes\':\n-\t\t\t-N $optionExtentionTroisFile1.extention31\n-\t\t#end if\n-\t\t#if $optionExtentionTroisFile2.extentionFile2 == \'Yes\':\n-\t\t\t-n $optionExtentionTroisFile2.extention32\n-\t\t#end if\t\n-\n-\t\t#if $OptionColinearOrAntiSens.OptionCA == \'Colinear\':\n-\t\t\t-c \n-\t\t#elif $OptionColinearOrAntiSens.OptionCA == \'AntiSens\':\n-\t\t\t-a\n-\t\t#end if\t\n-\n-\t\t#if $OptionDistance.Dist == \'Yes\':\n-\t\t\t-d $OptionDistance.distance\n-\t\t#end if\n-\n-\t\t#if $OptionMinOverlap.MO == \'Yes\':\n-\t\t\t-m $OptionMinOverlap.minOverlap\n-\t\t#end if\n-\n-\t\t$InvertMatch\n-\t\t$ReportIntron\n-\t\t$NotOverlapping\n-\t\t$tar $outputTarFile\n-\t</command>\n-\n-\t<inputs>\n-\n-\t\t<conditional name="formatType">\n-\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n-\t\t\t\t<option value="bed">bed</option>\n-\t\t\t\t<option value="gff">gff</option>\n-\t\t\t\t<option value="gff2">gff2</option>\n-\t\t\t\t<option value="gff3">gff3</option>\n-\t\t\t\t<option value="sam">sam</option>\n-\t\t\t\t<option value="gtf">gtf</option>\n-\t\t\t</param>\n-\t\t\t<when value="bed">\n-\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff">\n-\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff2">\n-\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff3">\n-\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="sam">\n-\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gtf">\n-\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n-            </when>\n-\t\t</conditional>\n-\t\t\n-\t\t<param name="inputTxt" type="data" format="txt" label="A txt file contains a list of several input transcripts files." />\n-\t\t\n-\t\t<param name="format2" type="text" value="bed" label="format for  File 2, you can choose [bed, gff, gff2, gff3, sam, gtf]"/>\n-\t\t\n-\t\t<conditional name="optionNFirstFile1">\n-\t\t\t<param name="NFirstForFile1" type="select" label="NFirst for file 1" help="only consider the n first nucleotides of the transcripts in file 1">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="firstNtFile1" type="integer" value="1" label="n first nucleotides for input file 1" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\t\t<conditional name="opt'..b'tForFile2" type="select" label="NLast for file 2">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="n last nucleotides for input file 2" help="only consider the n last nucleotides of the transcripts in file 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionCinqFile1">\n-\t\t\t<param name="extentionFile1" type="select" label="Extension towards 5 for file 1">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention51" type="integer" value="1" label="in file 1" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionCinqFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extension towards 5 for file 2">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention52" type="integer" value="1" label="in file 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionTroisFile1">\n-\t\t\t<param name="extentionFile1" type="select" label="Extension towards 3 for file 1">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention31" type="integer" value="1" label="in file 1" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionTroisFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extension towards 3 for file 2">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention32" type="integer" value="1" label="in file 2" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionColinearOrAntiSens">\n-\t\t\t<param name="OptionCA" type="select" label="Colinear or anti-sens">\n-\t\t\t\t<option value="Colinear">Colinear</option>\n-\t\t\t\t<option value="AntiSens">AntiSens</option>\n-\t\t\t\t<option value="NONE" selected="true">NONE</option>\n-\t\t\t</param>\n-\t\t\t<when value="Colinear">\n-\t\t\t</when>\n-\t\t\t<when value="AntiSens">\n-\t\t\t</when>\n-\t\t\t<when value="NONE">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionDistance">\n-\t\t\t<param name="Dist" type="select" label="Maximum Distance between two reads">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="distance" type="integer" value="0"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionMinOverlap">\n-\t\t\t<param name="MO" type="select" label="Minimum number of overlapping between two reads">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="minOverlap" type="integer" value="1"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>\n-\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Report intron"/>\n-\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>\n-\t\t<param name="tar" type="boolean" truevalue="--tar" falsevalue="" checked="false" label="tar option" help="This option creates a tar file for all out results." />\n-\t</inputs>\n-\n-\t<outputs>\n-\t\t<data name="outTxtFile" format="txt" label="overlapping output files "/>\n-\t\t<data name="outputTarFile" format="tar">\n-\t\t  <filter>tar</filter>\n-\t  </data>\n-\t</outputs> \n-\t\n-</tool>\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/countNumber.pl
--- a/SMART/DiffExpAnal/countNumber.pl Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,34 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-
-my $in_file = $ARGV[0];
-my $out_file = $ARGV[1];
-my $sort_type = $ARGV[2]; # n(umeric) or a(lphanumeric)
-my ($line,$ID,$nbOverlaps,%hash);
-
-open(IN, $in_file);
-while ($line = <IN>){
- chomp($line);
- $line=~s/\t/|/g;
- my @part=split(/\|/,$line);
- my @split=split(";",$part[$#part]);
- $split[0] =~ m/^(\w+).+$/;
-
- foreach my $i (@split){
- if ($i=~m/nbOverlaps=(.+)/){
- $nbOverlaps=$1;
- }
- if ($i=~m/gene_id=(.+)/){
- $ID=$1;
- $hash{$ID}=$nbOverlaps;
- }
- }
-}
-close(IN);
-
-open(OUT, ">$out_file");
-foreach my $key ( sort keys %hash) {
- print OUT "$key\t$hash{$key}\n";
-}
-close(OUT);
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/countNumber.xml
--- a/SMART/DiffExpAnal/countNumber.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,16 +0,0 @@
-<tool id="countNumber" name="countNumber">
- <description>Calculate the number of reads(annotations) overlapping for each transcript.</description>
- <command interpreter="perl"> countNumber.pl $input $outputCSV
- </command>
-
- <inputs>
- <param name="input" type="data" format="gff3" label="Please choose your gff3 format file (which contains the number of overlaps)."/>
- </inputs>
-
- <outputs>
- <data format="csv" name="outputCSV" label="countNumber Output"/>
- </outputs>
-
- <help>
- </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/countNumber_parallel.py
--- a/SMART/DiffExpAnal/countNumber_parallel.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,96 +0,0 @@
-#! /usr/bin/env python
-
-
-import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
-from optparse import OptionParser
-
-def stop_err(msg):
- sys.stderr.write('%s\n' % msg)
- sys.exit()
-
-def toTar(tarFileName, outCountNames):
- dir = os.path.dirname(tarFileName)
- tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
- currentPath = os.getcwd()
- os.chdir(dir)
- for file in outCountNames:
- relativeFileName = os.path.basename(file)
- tfile.add(relativeFileName)
- os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
- tfile.close()
- os.chdir(currentPath)
-
-
-def __main__():
- #Parse Command Line
- parser = optparse.OptionParser()
- parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.")
- parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.")
- parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.")
- (options, args) = parser.parse_args()
-
- #Parse the input txt file and read a list of transcripts files.
- file = open(options.inputFile, "r")
- lines = file.readlines()
- inputFileNames = []
- outCountNames = []
- outputName = options.outputFile
- resDirName = os.path.dirname(outputName) + '/'
-
- #Write output txt file and define all output count file names
- out = open(outputName, "w")
- out.write("label\tfiles\tgroup\n")
- for line in lines:
- tab = line.split()
- inputFileNames.append(tab[1])
- outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000)
- outCountNames.append(outCountName)
- out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n')
- file.close()
- out.close()
-
- #Construct the lines commands
- cmds = []
- for i in range(len(inputFileNames)):
- cmd = "perl %s/SMART/DiffExpAnal/countNumber.pl " %  os.environ["REPET_PATH"]
- opts = "%s %s " % (inputFileNames[i], outCountNames[i])
- cmd += opts
- cmds.append(cmd)
-
- tmp_files = []
- for i in range(len(cmds)):
- try:
- tmp_out = tempfile.NamedTemporaryFile().name
- tmp_files.append(tmp_out)
- tmp_stdout = open(tmp_out, 'wb')
- tmp_err = tempfile.NamedTemporaryFile().name
- tmp_files.append(tmp_err)
- tmp_stderr = open(tmp_err, 'wb')
- proc = subprocess.Popen(args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr)
- returncode = proc.wait()
- tmp_stderr.close()
- #get stderr, allowing for case where it's very large
- tmp_stderr = open(tmp_err, 'rb')
- stderr = ''
- buffsize = 1048576
- try:
- while True:
- stderr += tmp_stderr.read(buffsize)
- if not stderr or len(stderr) % buffsize != 0:
- break
- except OverflowError:
- pass
- tmp_stdout.close()
- tmp_stderr.close()
- if returncode != 0:
- raise Exception, stderr
- except Exception, e:
- stop_err('Error in :\n' + str(e))
-
- if options.outputTar != None:
- toTar(options.outputTar, outCountNames)
-
- for tmp_file in tmp_files:
- os.remove(tmp_file)
-
-if __name__=="__main__":__main__()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/countNumber_parallel.xml
--- a/SMART/DiffExpAnal/countNumber_parallel.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,19 +0,0 @@
-<tool id="countNumber_parallel" name="countNumber (for DEA)">
-
- <description>Calculate the number of reads(annotations) overlapping for each transcript.</description>
- <command interpreter="python"> countNumber_parallel.py -i $inputTxt -o $outputTxt $tar $outputTarFile
- </command>
-
- <inputs>
- <param name="inputTxt" type="data" format="txt" label="Please choose your txt format file (which contains a list of gff3 overlapping results files)."/>
- <param name="tar" type="boolean" truevalue="-t" falsevalue="" checked="False" label="tar option" help="This option creates a tar file for all out results" />
- </inputs>
-
- <outputs>
- <data format="txt" name="outputTxt" label="countNumber Output"/>
- <data name="outputTarFile" format="tar">
- <filter>tar</filter>
- </data>
- </outputs>
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/countNumber_parallel_unSQL.py
--- a/SMART/DiffExpAnal/countNumber_parallel_unSQL.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,91 +0,0 @@
-#! /usr/bin/env python
-
-
-import optparse, os, sys, tarfile, random
-from optparse import OptionParser
-from commons.core.launcher.Launcher import Launcher
-from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
-
-def stop_err(msg):
- sys.stderr.write('%s\n' % msg)
- sys.exit()
-
-def toTar(tarFileName, outCountNames):
- dir = os.path.dirname(tarFileName)
- tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
- currentPath = os.getcwd()
- os.chdir(dir)
- for file in outCountNames:
- relativeFileName = os.path.basename(file)
- tfile.add(relativeFileName)
- os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
- tfile.close()
- os.chdir(currentPath)
-
-def _map(iLauncher, cmd, cmdStart, cmdFinish ):
- lCmds = []
- lCmds.append(cmd)
- lCmdStart = []
- lCmdStart.append(cmdStart)
- lCmdFinish = []
- lCmdFinish.append(cmdFinish)
- return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
-
-def _createCountNumberCommand(iLauncher, inputFile, outputFile):
- lArgs = []
- lArgs.append("%s" % inputFile)
- lArgs.append("%s" %  outputFile)
- return iLauncher.getSystemCommand("perl %s/SMART/DiffExpAnal/countNumber.pl " %  os.environ["REPET_PATH"], lArgs)
-
-def __main__():
- #Parse Command Line
- parser = optparse.OptionParser()
- parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.")
- parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.")
- parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.")
- (options, args) = parser.parse_args()
-
- #Parse the input txt file and read a list of transcripts files.
- file = open(options.inputFile, "r")
- lines = file.readlines()
- inputFileNames = []
- outCountNames = []
- outputName = options.outputFile
- resDirName = os.path.dirname(outputName) + '/'
-
- #Write output txt file and define all output count file names
- out = open(outputName, "w")
- out.write("label\tfiles\tgroup\n")
- for line in lines:
- tab = line.split()
- inputFileNames.append(tab[1])
- outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000)
- outCountNames.append(outCountName)
- out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n')
- file.close()
- out.close()
-
- #Launch on nodes
- acronym = "countNumber"
- jobdb = TableJobAdaptatorFactory.createJobInstance()
- iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)
- lCmdsTuples = []
- for i in range(len(inputFileNames)): #Construct the lines commands
- inputFile = inputFileNames[i]
- outputFile = outCountNames[i]
- cmd2Launch = _createCountNumberCommand(iLauncher, inputFile, outputFile)
- cmdStart = ""
- cmdFinish = ""
- lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))
-
-
-
- iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)
-
-
-
- if options.outputTar != None:
- toTar(options.outputTar, outCountNames)
-
-
-if __name__=="__main__":__main__()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/countNumber_parallel_unSQL.xml
--- a/SMART/DiffExpAnal/countNumber_parallel_unSQL.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,19 +0,0 @@
-<tool id="countNumber_parallel_unSQL" name="countNumber (for DEA in parallel)">
-
- <description>Calculate the number of reads(annotations) overlapping for each transcript (parallelized).</description>
- <command interpreter="python"> countNumber_parallel_unSQL.py -i $inputTxt -o $outputTxt $tar $outputTarFile
- </command>
-
- <inputs>
- <param name="inputTxt" type="data" format="txt" label="Please choose your txt format file (which contains a list of gff3 overlapping results files)."/>
- <param name="tar" type="boolean" truevalue="-t" falsevalue="" checked="False" label="tar option" help="This option creates a tar file for all out results" />
- </inputs>
-
- <outputs>
- <data format="txt" name="outputTxt" label="countNumber Output"/>
- <data name="outputTarFile" format="tar">
- <filter>tar</filter>
- </data>
- </outputs>
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/deseq.sh
--- a/SMART/DiffExpAnal/deseq.sh Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-#! /bin/sh
-
-#Arguments :
-#$1=targetFile(the list of files) 
-#$2=with or without header
-#$3=with or without replicates
-#$4=OUT_HTML.html
-#$5=OUT_HTML images directory
-#$6=OUT_complete.xls
-#$7=OUT_up.xls
-#$8=OUT_down.xls
-
-#run example: 
-#bash deseq.sh DESeqTools/targetTest.txt 1 1 testOUT_HTML.html /tmp/ testOUT_complet.xls testOUT_up.xls testOUT_down.xls
-
-#echo $5
-#mkdir -p $5 #First, create the images tmp directory given by Galaxy, -p option can create the parent directory which dosen't exist.
-
-mkdir -p $5
-MY_PATH=`dirname $0` 
-cat $MY_PATH/DESeqTools/anadiffGenes2conds.R | R --slave --args $1 $2 $3 $4 $5 $6 $7 $8 $0 < $MY_PATH/DESeqTools/anadiffGenes2conds.R
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/deseq.xml
--- a/SMART/DiffExpAnal/deseq.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,22 +0,0 @@
-<tool id="DESEQ" name="DESEQ for differential expression analysis">
-  <description>Differential expression analysis for reads count data</description>
-  <command interpreter="bash"> deseq.sh $inputFile $header $withOutReplicates $outHTML $outHTML.files_path $outComplete $outUP $outDown 2> $log </command>
-
-  <inputs>
-      <param name="inputFile" type="data" label="Input File list" format="txt"/>
-      <param name="header" type="boolean" truevalue="1" falsevalue="0" checked="false" label="If there is a header for your count files, please choose this case."/>
-      <param name="withOutReplicates" type="boolean" truevalue="1" falsevalue="0" checked="false" label="If your data has not replicates, please choose this case."/>
-
-  </inputs>
-
-  <outputs>
-      <data format="HTML" name="outHTML" label="[DESEQ] Output HTML File" help="This output file shows all results images by DESeq analysis"/> 
-      <data format="tabular" name="outComplete" label="[DESEQ] Output complete File"/> 
-      <data format="tabular" name="outUP" label="[DESEQ] Output up File" help="This output file shows the genes of group1 which are overexpressed than those of group2"/> 
-      <data format="tabular" name="outDown" label="[DESEQ] Output down File" help="This output file shows the  genes of group1 which are less expressed than those of group2"/>
-      <data format="txt" name="log" label="[DESEQ] Output log File"/> 
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/fastq_groomer_parallel.py
--- a/SMART/DiffExpAnal/fastq_groomer_parallel.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,115 +0,0 @@
-import sys, os, optparse, random
-from galaxy_utils.sequence.fastq import fastqReader, fastqVerboseErrorReader, fastqAggregator, fastqWriter
-
-def stop_err(msg):
- sys.stderr.write("%s\n" % msg)
- sys.exit()
-
-def main():
-
-    input_filename = sys.argv[1]  #a txt file
-    input_type = sys.argv[2]
-    output_filename = sys.argv[3] #a txt file
-    output_type = sys.argv[4]
-    force_quality_encoding = sys.argv[5]
-    summarize_input = sys.argv[6] == 'summarize_input'
-    pairedEnd_input = sys.argv[7] 
-    if pairedEnd_input == 'None':
-     pairedEnd_input = None
-    else:
- output_pairedEndFileName = sys.argv[8]
-
-    if force_quality_encoding == 'None':
-        force_quality_encoding = None
-
-    #Parse the input txt file and read a list of fastq files
-    file = open(input_filename, "r")
-    lines = file.readlines()
-    inputFileNames = []
-    outGroomerNames = []
-    resDirName = os.path.dirname(output_filename) + "/"
-    #Write output txt file and define all output groomer file names
-    outFile = open(output_filename, "w")
-    for line in lines:
- tab = line.split()
- inputFileNames.append(tab[1])
- outGroomerName = resDirName + tab[0] + '_outGroomer_%s.fastq' % random.randrange(0, 10000)
- outGroomerNames.append(outGroomerName)
- outFile.write(tab[0] + '\t' + outGroomerName + '\n')
-    outFile.close()
-    file.close()
-
-    if pairedEnd_input != None:
- inPairedFile = open(pairedEnd_input, "r")
- lines = inPairedFile.readlines()
- inputPairedEndFileNames = []
- outGroomerPairedEndNames = []
- outPairedEndFile = open(output_pairedEndFileName, "w")
- for line in lines:
- tab = line.split()
- inputPairedEndFileNames.append(tab[1])
- outGroomerPairedEndName = resDirName + tab[0] + '_outGroomer_pairedEnd_%s.fastq' % random.randrange(0, 10000)
- outGroomerPairedEndNames.append(outGroomerPairedEndName)
- outPairedEndFile.write(tab[0] + '\t' + outGroomerPairedEndName + '\n')
- outPairedEndFile.close()
-        inPairedFile.close()
-    
-    # Write output file
-    aggregator = fastqAggregator()
-    for i in range(len(outGroomerNames)):
- out = fastqWriter( open( outGroomerNames[i], 'wb' ), format = output_type, force_quality_encoding = force_quality_encoding )
- read_count = None
- if summarize_input:
-     reader = fastqVerboseErrorReader
- else:
-     reader = fastqReader
- for read_count, fastq_read in enumerate( reader( open( inputFileNames[i] ), format = input_type, apply_galaxy_conventions = True ) ):
-     if summarize_input:
-         aggregator.consume_read( fastq_read )
-     out.write( fastq_read )
- out.close()
-     
- if read_count is not None:
-     print "Groomed %i %s reads into %s reads." % ( read_count + 1, input_type, output_type )
-     if input_type != output_type and 'solexa' in [ input_type, output_type ]:
-         print "Converted between Solexa and PHRED scores."
-     if summarize_input:
-         print "Based upon quality and sequence, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() )  or "None" )
-         ascii_range = aggregator.get_ascii_range()
-         decimal_range =  aggregator.get_decimal_range()
-         print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed
-         print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] )        
- else:
-      print "No valid FASTQ reads were provided."
-
-
-    # Write output pairedEnd file
-    if pairedEnd_input != None:
-     aggregator = fastqAggregator()
-     for i in range(len(outGroomerPairedEndNames)):
-     outPair = fastqWriter(open(outGroomerPairedEndNames[i], 'wb'), format = output_type, force_quality_encoding = force_quality_encoding)
-     read_count = None
-     if summarize_input:
-     reader = fastqVerboseErrorReader
-     else:
-     reader = fastqReader
-     for read_count, fastq_reader in enumerate(reader(open(inputPairedEndFileNames[i]), format=input_type, apply_galaxy_conventions=True)):
-    if summarize_input:
-     aggregator.consume_read(fastq_read)
-    outPair.write(fastq_read)
-     outPair.close()
-
-     if read_count is not None:
-      print "Groomed %i %s reads into %s reads." % ( read_count + 1, input_type, output_type )
-     if input_type != output_type and 'solexa' in [ input_type, output_type ]:
-     print "Converted between Solexa and PHRED scores."
-     if summarize_input:
-     print "Based upon quality and sequence, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() )  or "None" )
- ascii_range = aggregator.get_ascii_range()
- decimal_range =  aggregator.get_decimal_range()
- print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed
- print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] )
-     else:
-      print "No valid paired-end FASTQ reads were provided."
-
-if __name__ == "__main__": main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/fastq_groomer_parallel.xml
--- a/SMART/DiffExpAnal/fastq_groomer_parallel.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,122 +0,0 @@
-<tool id="fastq_groomer_parallel" name="FASTQ Groomer (for DEA)" version="1.0.0">
-  <description>convert between various FASTQ quality formats for a list of inputs.</description>
-  <command interpreter="python">fastq_groomer_parallel.py '$input_file' '$input_type' '$output_file'
-#if str( $options_type['options_type_selector'] ) == 'basic':
-#if str( $input_type ) == 'cssanger':
-'cssanger'
-#else:
-'sanger'
-#end if
-'ascii' 'summarize_input'
-#else:
-'${options_type.output_type}' '${options_type.force_quality_encoding}' '${options_type.summarize_input}'
-#end if
-#if $OptionPairedEnd.pairedEnd == "Yes":
-'$OptionPairedEnd.pairedEnd_input' '$output_pairedEndFile'
-#else:
-'None' 'None'
-#end if
-</command>
-  <inputs>
-    <param name="input_file" type="data" format="txt" label="The File list to groom" />
-    <param name="input_type" type="select" label="Input FASTQ quality scores type">
-      <option value="solexa">Solexa</option>
-      <option value="illumina">Illumina 1.3-1.7</option>
-      <option value="sanger" selected="True">Sanger</option>
-      <option value="cssanger">Color Space Sanger</option>
-    </param>
-    <conditional name="options_type">
-    <param name="options_type_selector" type="select" label="Advanced Options">
-      <option value="basic" selected="True">Hide Advanced Options</option>
-      <option value="advanced">Show Advanced Options</option>
-    </param>
-    <when value="basic">
-      <!-- no options -->
-    </when>
-    <when value="advanced">
-      <param name="output_type" type="select" label="Output FASTQ quality scores type" help="Galaxy tools are designed to work with the Sanger Quality score format.">
-        <option value="solexa">Solexa</option>
-        <option value="illumina">Illumina 1.3+</option>
-        <option value="sanger" selected="True">Sanger (recommended)</option>
-        <option value="cssanger">Color Space Sanger</option>
-      </param>
-      <param name="force_quality_encoding" type="select" label="Force Quality Score encoding">
-        <option value="None">Use Source Encoding</option>
-        <option value="ascii" selected="True">ASCII</option>
-        <option value="decimal">Decimal</option>
-      </param>
-      <param name="summarize_input" type="select" label="Summarize input data">
-        <option value="summarize_input" selected="True">Summarize Input</option>
-        <option value="dont_summarize_input">Do not Summarize Input (faster)</option>
-      </param>
-    </when>
-  </conditional>
-
-  <conditional name="OptionPairedEnd">
-   <param name="pairedEnd" type="select" label="For paired-end analysis.">
-   <option value="Yes">Yes</option>
-   <option value="No" selected="true">No</option>
-   </param>
-   <when value="Yes">
-   <param name="pairedEnd_input" type="data" format="txt" label="input paired-end files list"/>
-   </when>
-   <when value="No">
-   </when>
-  </conditional>
-
-  </inputs>
-
-  <outputs>
-    <data name="output_file" format="txt">
-    </data>
-    <data format="txt" name="output_pairedEndFile" label="output Paired-end fastq files">
-     <filter>(OptionPairedEnd['pairedEnd']=='Yes')</filter>
-    </data>
-  </outputs>
-  <help>
-**What it does**
-
-This tool offers several conversions options relating to the FASTQ format.
-
-When using *Basic* options, the output will be *sanger* formatted or *cssanger* formatted (when the input is Color Space Sanger).
-
-When converting, if a quality score falls outside of the target score range, it will be coerced to the closest available value (i.e. the minimum or maximum). 
-
-When converting between Solexa and the other formats, quality scores are mapped between Solexa and PHRED scales using the equations found in `Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.`_
-
-When converting between color space (csSanger) and base/sequence space (Sanger, Illumina, Solexa) formats, adapter bases are lost or gained; if gained, the base 'G' is used as the adapter. You cannot convert a color space read to base space if there is no adapter present in the color space sequence. Any masked or ambiguous nucleotides in base space will be converted to 'N's when determining color space encoding.
-
------
-
-**Quality Score Comparison**
-
-::
-
-    SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS
-    ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
-    ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-    !"#$%&amp;'()*+,-./0123456789:;&lt;=&gt;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
-    |                         |    |        |                              |                     |
-   33                        59   64       73                            104                   126
-  
-   S - Sanger       Phred+33,  93 values  (0, 93) (0 to 60 expected in raw reads)
-   I - Illumina 1.3 Phred+64,  62 values  (0, 62) (0 to 40 expected in raw reads)
-   X - Solexa       Solexa+64, 67 values (-5, 62) (-5 to 40 expected in raw reads)
-
-Diagram adapted from http://en.wikipedia.org/wiki/FASTQ_format
-
-.. class:: infomark
-
-Output from Illumina 1.8+ pipelines are Sanger encoded.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-.. _Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.: http://www.ncbi.nlm.nih.gov/pubmed/20015970
-
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.py
--- a/SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,168 +0,0 @@
-import sys, os, optparse,shutil, random
-from commons.core.launcher.Launcher import Launcher
-from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory
-from commons.core.utils.FileUtils import FileUtils
-
-def _map(iLauncher, cmd, cmdStart, cmdFinish ):
- lCmds = []
- lCmds.extend(cmd)
- lCmdStart = []
- lCmdStart.extend(cmdStart)
- lCmdFinish = []
- lCmdFinish.extend(cmdFinish)
- return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))
-
-def splitFastQ(fileName, nbOfSeqPerBatch):
- nbOfLinesPerFile = nbOfSeqPerBatch * 4
- lOutput = []
- filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))
- resDir = os.path.dirname(fileName)
- with open(fileName) as inF:
- fileNb = 1
- line = inF.readline()
- if not line or nbOfLinesPerFile == 0:
- outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)
- lOutput.append(outFileName)
- f = open(outFileName, "wb")
- shutil.copyfileobj(open(fileName, "rb"), f)
- f.close()
- else:
- while line:
- outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)
- lOutput.append(outFileName)
- with open(outFileName, "w") as outF:
- lineNb = 1
- while lineNb <= nbOfLinesPerFile and line:
- outF.write(line)
- line = inF.readline()
- lineNb += 1
- fileNb += 1
- return lOutput
-
-def joinFastQ(dCutOut2Out):
- for key in dCutOut2Out.keys():
- FileUtils.catFilesFromList(dCutOut2Out[key],key, False)
-
-def _createFastqGroomerCode(outGroomerNames, inputFileNames, input_type, output_type, force_quality_encoding, summarize_input):
- cmd2Launch = []
- cmd2Launch.append("log = 0")
- cmd2Launch.append("from galaxy_utils.sequence.fastq import fastqReader, fastqVerboseErrorReader, fastqAggregator, fastqWriter")
- cmd2Launch.append("aggregator = fastqAggregator()")
- cmd2Launch.append("out = fastqWriter( open( '%s', 'wb' ), format = '%s', force_quality_encoding = '%s')" % (outGroomerNames,output_type,force_quality_encoding))
- cmd2Launch.append("read_count = None")
- if summarize_input:
- cmd2Launch.append("reader = fastqVerboseErrorReader")
- else:
- cmd2Launch.append("reader = fastqReader")
- cmd2Launch.append("for read_count, fastq_read in enumerate( reader( open( '%s' ), format = '%s', apply_galaxy_conventions = True ) ):" % (inputFileNames, input_type))
- if summarize_input:
- cmd2Launch.append("\taggregator.consume_read( fastq_read )")
- cmd2Launch.append("\tout.write( fastq_read )")
- cmd2Launch.append("out.close()")
- cmd2Launch.append("if read_count is not None:")
- #cmd2Launch.append("\tprint 'Groomed %s %s reads into %s reads.' % ( read_count + 1, %s, %s )" % ('%i', '%s', '%s', input_type,output_type))
- cmd2Launch.append("\tif '%s' != '%s' and 'solexa' in [ '%s', '%s' ]:" % (input_type, output_type, input_type, output_type))
- cmd2Launch.append("\t\tprint 'Converted between Solexa and PHRED scores.'")
- if summarize_input:
- cmd2Launch.append("\tprint 'Based upon quality and sequence, the input data is valid for: %s' % ( ', '.join( aggregator.get_valid_formats() )  or 'None' )")
- cmd2Launch.append("\tascii_range = aggregator.get_ascii_range()")
- cmd2Launch.append("\tdecimal_range =  aggregator.get_decimal_range()")
- cmd2Launch.append("\tprint 'Input ASCII range: %s(%i) - %s(%i)' % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) )")
- cmd2Launch.append("\tprint 'Input decimal range: %i - %i' % ( decimal_range[0], decimal_range[1] ) ")
- cmd2Launch.append("else:")
- cmd2Launch.append("\tprint 'No valid FASTQ reads were provided.'")
- cmd2Launch.append("\tlog = 255")
- return cmd2Launch
-
-def stop_err(msg):
- sys.stderr.write("%s\n" % msg)
- sys.exit()
-
-def main():
-
- input_filename = sys.argv[1]  #a txt file
- input_type = sys.argv[2]
- output_filename = sys.argv[3] #a txt file
- output_type = sys.argv[4]
- force_quality_encoding = sys.argv[5]
- summarize_input = sys.argv[6] == 'summarize_input'
- pairedEnd_input = sys.argv[7]
- if pairedEnd_input == 'None':
- pairedEnd_input = None
- else:
- output_pairedEndFileName = sys.argv[8]
-
- if force_quality_encoding == 'None':
- force_quality_encoding = None
-
- #Parse the input txt file and read a list of fastq files
- file = open(input_filename, "r")
- lines = file.readlines()
- inputFileNames = []
- outGroomerNames = []
- resDirName = os.path.dirname(output_filename) + "/"
- #Write output txt file and define all output groomer file names
- outFile = open(output_filename, "w")
- for line in lines:
- tab = line.split()
- inputFileNames.append(tab[1])
- outGroomerName = resDirName + tab[0] + '_outGroomer_%s.fastq' % random.randrange(0, 10000)
- outGroomerNames.append(outGroomerName)
- outFile.write(tab[0] + '\t' + outGroomerName + '\n')
- outFile.close()
- file.close()
-
- if pairedEnd_input != None:
- inPairedFile = open(pairedEnd_input, "r")
- lines = inPairedFile.readlines()
- inputPairedEndFileNames = []
- outGroomerPairedEndNames = []
- outPairedEndFile = open(output_pairedEndFileName, "w")
- for line in lines:
- tab = line.split()
- inputPairedEndFileNames.append(tab[1])
- outGroomerPairedEndName = resDirName + tab[0] + '_outGroomer_pairedEnd_%s.fastq' % random.randrange(0, 10000)
- outGroomerPairedEndNames.append(outGroomerPairedEndName)
- outPairedEndFile.write(tab[0] + '\t' + outGroomerPairedEndName + '\n')
- outPairedEndFile.close()
- inPairedFile.close()
-
- acronym = "fastqGroomer"
- jobdb = TableJobAdaptatorFactory.createJobInstance()
- iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)
- lCmdsTuples = []
- dCutOut2Out = {}
- lAllFile2remove = []
- # Write output file
- for i in range(len(outGroomerNames)):
- lCutInputFile = splitFastQ(inputFileNames[i], 20000)
- lAllFile2remove.extend(lCutInputFile)
- lCutOutput = []
- for cutInput in lCutInputFile:
- cutOutput = "%s_out" % cutInput
- lCutOutput.append(cutOutput)
- lAllFile2remove.extend(lCutOutput)
- cmd2Launch = _createFastqGroomerCode(cutOutput, cutInput, input_type, output_type, force_quality_encoding, summarize_input)
- cmdStart = []
- cmdFinish = []
- lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))
- dCutOut2Out[outGroomerNames[i]] = lCutOutput
- if pairedEnd_input != None:
- lCutInputFile = splitFastQ(inputPairedEndFileNames[i], 20000)
- lAllFile2remove.extend(lCutInputFile)
- lCutOutput = []
- for cutInput in lCutInputFile:
- cutOutput = "%s_out" % cutInput
- lCutOutput.append(cutOutput)
- lAllFile2remove.extend(lCutOutput)
- cmd2Launch = _createFastqGroomerCode(cutOutput, cutInput, input_type, output_type, force_quality_encoding, summarize_input)
- cmdStart = []
- cmdFinish = []
- lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))
- dCutOut2Out[outGroomerPairedEndNames[i]] =  lCutOutput
- iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, False)
-
- joinFastQ(dCutOut2Out)
- FileUtils.removeFilesFromListIfExist(lAllFile2remove)
-
-if __name__ == "__main__": main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.xml
--- a/SMART/DiffExpAnal/fastq_groomer_parallel_unSQL.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,122 +0,0 @@
-<tool id="fastq_groomer_parallel_unSQL" name="FASTQ Groomer (for DEA in parallel)" version="1.0.0">
-  <description>convert between various FASTQ quality formats for a list of inputs (parallelized).</description>
-  <command interpreter="python">fastq_groomer_parallel_unSQL.py '$input_file' '$input_type' '$output_file'
-#if str( $options_type['options_type_selector'] ) == 'basic':
-#if str( $input_type ) == 'cssanger':
-'cssanger'
-#else:
-'sanger'
-#end if
-'ascii' 'summarize_input'
-#else:
-'${options_type.output_type}' '${options_type.force_quality_encoding}' '${options_type.summarize_input}'
-#end if
-#if $OptionPairedEnd.pairedEnd == "Yes":
-'$OptionPairedEnd.pairedEnd_input' '$output_pairedEndFile'
-#else:
-'None' 'None'
-#end if
-</command>
-  <inputs>
-    <param name="input_file" type="data" format="txt" label="The File list to groom" />
-    <param name="input_type" type="select" label="Input FASTQ quality scores type">
-      <option value="solexa">Solexa</option>
-      <option value="illumina">Illumina 1.3-1.7</option>
-      <option value="sanger" selected="True">Sanger</option>
-      <option value="cssanger">Color Space Sanger</option>
-    </param>
-    <conditional name="options_type">
-    <param name="options_type_selector" type="select" label="Advanced Options">
-      <option value="basic" selected="True">Hide Advanced Options</option>
-      <option value="advanced">Show Advanced Options</option>
-    </param>
-    <when value="basic">
-      <!-- no options -->
-    </when>
-    <when value="advanced">
-      <param name="output_type" type="select" label="Output FASTQ quality scores type" help="Galaxy tools are designed to work with the Sanger Quality score format.">
-        <option value="solexa">Solexa</option>
-        <option value="illumina">Illumina 1.3+</option>
-        <option value="sanger" selected="True">Sanger (recommended)</option>
-        <option value="cssanger">Color Space Sanger</option>
-      </param>
-      <param name="force_quality_encoding" type="select" label="Force Quality Score encoding">
-        <option value="None">Use Source Encoding</option>
-        <option value="ascii" selected="True">ASCII</option>
-        <option value="decimal">Decimal</option>
-      </param>
-      <param name="summarize_input" type="select" label="Summarize input data">
-        <option value="summarize_input" selected="True">Summarize Input</option>
-        <option value="dont_summarize_input">Do not Summarize Input (faster)</option>
-      </param>
-    </when>
-  </conditional>
-
-  <conditional name="OptionPairedEnd">
-   <param name="pairedEnd" type="select" label="For paired-end analysis.">
-   <option value="Yes">Yes</option>
-   <option value="No" selected="true">No</option>
-   </param>
-   <when value="Yes">
-   <param name="pairedEnd_input" type="data" format="txt" label="input paired-end files list"/>
-   </when>
-   <when value="No">
-   </when>
-  </conditional>
-
-  </inputs>
-
-  <outputs>
-    <data name="output_file" format="txt">
-    </data>
-    <data format="txt" name="output_pairedEndFile" label="output Paired-end fastq files">
-     <filter>(OptionPairedEnd['pairedEnd']=='Yes')</filter>
-    </data>
-  </outputs>
-  <help>
-**What it does**
-
-This tool offers several conversions options relating to the FASTQ format.
-
-When using *Basic* options, the output will be *sanger* formatted or *cssanger* formatted (when the input is Color Space Sanger).
-
-When converting, if a quality score falls outside of the target score range, it will be coerced to the closest available value (i.e. the minimum or maximum). 
-
-When converting between Solexa and the other formats, quality scores are mapped between Solexa and PHRED scales using the equations found in `Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.`_
-
-When converting between color space (csSanger) and base/sequence space (Sanger, Illumina, Solexa) formats, adapter bases are lost or gained; if gained, the base 'G' is used as the adapter. You cannot convert a color space read to base space if there is no adapter present in the color space sequence. Any masked or ambiguous nucleotides in base space will be converted to 'N's when determining color space encoding.
-
------
-
-**Quality Score Comparison**
-
-::
-
-    SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS
-    ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
-    ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-    !"#$%&amp;'()*+,-./0123456789:;&lt;=&gt;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
-    |                         |    |        |                              |                     |
-   33                        59   64       73                            104                   126
-  
-   S - Sanger       Phred+33,  93 values  (0, 93) (0 to 60 expected in raw reads)
-   I - Illumina 1.3 Phred+64,  62 values  (0, 62) (0 to 40 expected in raw reads)
-   X - Solexa       Solexa+64, 67 values (-5, 62) (-5 to 40 expected in raw reads)
-
-Diagram adapted from http://en.wikipedia.org/wiki/FASTQ_format
-
-.. class:: infomark
-
-Output from Illumina 1.8+ pipelines are Sanger encoded.
-
-------
-
-**Citation**
-
-If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_
-
-
-.. _Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.: http://www.ncbi.nlm.nih.gov/pubmed/20015970
-
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/gsnap.xml
--- a/SMART/DiffExpAnal/gsnap.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,42 +0,0 @@
-<tool id="gsnap" name="gsnap">
-
- <description>GSNAP version 2012-12-20. 
-              GMAP: A Genomic Mapping and Alignment Program for mRNA and EST Sequences, and
-                 GSNAP: Genomic Short-read Nucleotide Alignment Program 
-    </description>
-    
- <command interpreter="python"> wrappGSNAP.py 
- -d $genomeName -i $inputFasta -k $kmer -q $inputFastq -A $outputFormat -o $outputSam
-
- #if $optionPairedEnd.paire == 'Yes':
- -p $optionPairedEnd.pairedEndFile
- #end if
-
- </command>
-
- <inputs>
- <param name="inputFasta" type="data" format="fasta" label="Reference genome file, fasta format."/>
- <param name="genomeName" type="text" value="Arabidopsis_Thaliana" label="Please give the reference genome a name! (Ex. Arabidopsis_Thaliana)"/>
- <param name="kmer" type="integer" value="12" label="Choose kmer value (superior or egal at 16), a big kmer value can take more RAM(4Go)." />
- <param name="inputFastq" type="data" format="fastq" label="Input fastq file."/>
- <param name="outputFormat" type="text" format="sam" label="Choose an output format [sam, goby (need to re-compile with appropriate options)]."/>
-
- <conditional name="optionPairedEnd">
- <param name="paire" type="select" label="pairedEnd fastq file">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="pairedEndFile" type="data" format="fastq"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
- <outputs>
- <data format="sam" name="outputSam" label="gsnap Output"/>
- </outputs>
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/gsnap_parallel_unSQL.py
--- a/SMART/DiffExpAnal/gsnap_parallel_unSQL.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,195 +0,0 @@\n-#!/usr/bin/env python\n-\n-import optparse, os, shutil, subprocess, sys, tempfile, fileinput, tarfile, glob \n-import time\n-from commons.core.launcher.Launcher import Launcher\n-from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n-from commons.core.utils.FileUtils import FileUtils\n-from optparse import OptionParser\n-\n-def stop_err( msg ):\n-    sys.stderr.write( "%s\\n" % msg )\n-    sys.exit()\n-\n-def toTar(tarFileName, accepted_hits_outputNames):\n-    tfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n-    currentPath = os.getcwd()\n-    os.chdir(dir)\n-    for file in accepted_hits_outputNames:\n-        relativeFileName = os.path.basename(file)\n-        tfile.add(relativeFileName)\n-    os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n-    tfile.close()\n-    os.chdir(currentPath)\n-    \n-def joinSAM(dCutOut2Out):\n-    for key in dCutOut2Out.keys():\n-        FileUtils.catFilesFromList(dCutOut2Out[key],key, False)\n-        \n-def _map(iLauncher, cmd, cmdStart, cmdFinish ):\n-    lCmds = []\n-    lCmds.extend(cmd)\n-    lCmdStart = []\n-    lCmdStart.extend(cmdStart)\n-    lCmdFinish = []\n-    lCmdFinish.extend(cmdFinish)\n-    return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))\n-\n-def _createGsnapSplicingOptions(options):\n-    lArgs = []\n-    lArgs.append("-N %s" % options.novelsplicing)\n-    if options.useSplicing:\n-        lArgs.append("-s %s" % options.useSplicing)\n-    lArgs.append("-w %s" % options.localsplicedist)\n-    lArgs.append("-e %s" % options.localSplicePenality)\n-    lArgs.append("-E %s" % options.distantSplicePenality)\n-    lArgs.append("-K %s" % options.distantSpliceEndlength)\n-    lArgs.append("-l %s" % options.shortendSpliceEndlength)\n-    \n-    \n-    return lArgs\n-\n-def _createGsnapPairedEndOptions(options):\n-    lArgs = []\n-    if not(options.useSplicing or options.pairedEndFile):\n-        lArgs.append("--pairmax-dna %s" % options.pairmaxRna)\n-    if options.useSplicing or options.pairedEndFile:\n-        lArgs.append("--pairmax-rna %s" % options.pairmaxRna)\n-    lArgs.append("--pairexpect=%s" % options.pairexpect)\n-    lArgs.append("--pairdev=%s" % options.pairedev)\n-    \n-    \n-\n-def _createGsnapCommand(iLauncher, options, workingDir, inputFileNames, inputRevFilesNames, outputFileName, batchNumber, numberOfBatch):\n-    lArgs = []\n-    lArgs.append("-d %s" % options.genomeName)\n-    lArgs.append("-k %s" % options.kmer)\n-    lArgs.append("-D %s" % workingDir)\n-    lArgs.append("-A %s" % options.outputFormat)\n-    lArgs.append("-q %s/%s" % (batchNumber, numberOfBatch))\n-    lArgs.append("--no-sam-headers")\n-    lArgs.append(inputFileNames)\n-    print \'N option: %s, pairedEndFile option: %s\' %(options.novelsplicing, options.pairedEndFile)\n-    if options.pairedEndFile:\n-        lArgs.append(inputRevFilesNames)\n-    if options.novelsplicing == \'1\':\n-        lArgs.extend(_createGsnapSplicingOptions(options))\n-    elif options.pairedEndFile:\n-        lArgs.extend(_createGsnapPairedEndOptions(options))\n-    \n-    lArgs.append("> %s" % outputFileName)  \n-    return iLauncher.getSystemCommand("gsnap", lArgs)   \n-\n-def __main__():\n-    #Parse Command Line\n-    description = "GMAP/GSNAP version:2012-12-20."\n-    parser = OptionParser(description = description)\n-    parser.add_option(\'-o\', \'--outputTxtFile\', dest=\'outputTxtFile\', help=\'for Differential expression analysis pipeline, new output option gives a txt output containing the list of mapping results.\')\n-    parser.add_option("-q", "--inputTxt",         dest="inputTxt",         action="store",                 type="string",     help="input, a txt file for a list of input reads files [compulsory]")\n-    parser.add_option(\'-t\', \'--tar\', dest=\'outputTar\', default=None, help=\'output all accepted hits results in a tar file.\' )\n-    parser.add_option("-d", "--genomeName", dest="genomeName", help="Define the reference genome name.[compulsory]")\n-#    parser.add_option("-o", "--outputFile", dest='..b'h for RNA-Seq paired reads, or other reads that could have a splice (default 200000).")\n-    parser.add_option("--pairexpect", dest="pairexpect", default=200, help="Expected paired-end length, used for calling splices in medial part of paired-end reads (default 200)")\n-    parser.add_option("--pairdev", dest="pairdev", default=25, help="Allowable deviation from expected paired-end length, used for calling splices in medial part of paired-end reads (default 25)")\n-    \n-    (options, args) = parser.parse_args()    \n-\n-    workingDir = os.path.dirname(options.inputFastaFile)\n-    \n-    file = open(options.inputTxt,"r")\n-    lines = file.readlines()\n-    inputFileNames = []\n-    gsnapOutputNames = []\n-    outputName = options.outputTxtFile\n-    resDirName = os.path.dirname(outputName) + \'/\'\n-    out = open(outputName, "w")\n-    for line in lines:\n-        timeId = time.strftime("%Y%m%d%H%M%S")\n-        tab = line.split()\n-        inputFileNames.append(tab[1])\n-        OutputName = resDirName + tab[0] + \'_samOutput_%s.sam\' % timeId\n-        gsnapOutputNames.append(OutputName) \n-        out.write(tab[0] + \'\\t\' + OutputName + \'\\n\')\n-    file.close()\n-    out.close()\n-    \n-    if options.pairedEndFile:\n-        revFile = open(options.pairedEndFile,"r")\n-        lines = revFile.readlines()\n-        inputRevFileNames = []\n-        for line in lines:\n-            revTab = line.split()\n-            inputRevFileNames.append(revTab[1])\n-        revFile.close()\n-\n-    #Create gsnap make \n-    lCmdsTuples =[]\n-    acronym = "gsnap_make"\n-    jobdb = TableJobAdaptatorFactory.createJobInstance()\n-    iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)\n-    cmds = []\n-    cmd_setup = "gmap_setup -d %s -D %s -k %s %s;" % (options.genomeName, workingDir, options.kmer, options.inputFastaFile)\n-    cmds.append(cmd_setup)\n-    cmd_make_coords = "make -f Makefile.%s coords;" % options.genomeName \n-    cmds.append(cmd_make_coords)\n-    cmd_make_gmapdb = "make -f Makefile.%s gmapdb;" % options.genomeName\n-    cmds.append(cmd_make_gmapdb)\n-    cmd_make_install = "make -f Makefile.%s install;" % options.genomeName\n-    cmds.append(cmd_make_install)\n-    cmd_index = iLauncher.getSystemCommand("", cmds)\n-    cmd2Launch = []\n-    cmdStart = []\n-    cmdFinish = []\n-    cmd2Launch.append(cmd_index)\n-    lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish)) \n-    iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)    \n-    \n-    acronym = "gsnap"\n-    jobdb = TableJobAdaptatorFactory.createJobInstance()\n-    iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)\n-    lCmdsTuples = []\n-    dCutOut2Out = {}\n-    lAllFile2remove = []\n-    numberOfBatch = 20  #usually for testing, working on to find a value for default launch on galaxy\n-    for i in range(len(inputFileNames)):\n-        lCutOutput = []\n-        for j in range(numberOfBatch):\n-            cutOutput = "%s_out_%s" % (inputFileNames[i], j)\n-            lCutOutput.append(cutOutput)\n-            lAllFile2remove.extend(lCutOutput)\n-            cmd2Launch = []\n-            if options.pairedEndFile: \n-                inputRevFile = inputRevFileNames[i]\n-            else:\n-                inputRevFile = ""\n-            cmd2Launch.append(_createGsnapCommand(iLauncher, options, workingDir, inputFileNames[i], inputRevFile, cutOutput, j, numberOfBatch))\n-            cmdStart = []\n-            cmdFinish = []\n-            lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))    \n-            dCutOut2Out[gsnapOutputNames[i]] = lCutOutput\n-    iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)\n-    \n-    joinSAM(dCutOut2Out) \n-    FileUtils.removeFilesFromListIfExist(lAllFile2remove)   \n-                 \n-    if options.outputTar != None:\n-        toTar(options.outputTar, gsnapOutputNames)\n-\n-\n-if __name__=="__main__": __main__()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/gsnap_parallel_unSQL.xml
--- a/SMART/DiffExpAnal/gsnap_parallel_unSQL.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,46 +0,0 @@
-<tool id="gsnap_parallel_unSQL" name="GSNAP (for DEA in parallel)" version="1.0.0">
-  <description>Genomic Short-read Nucleotide Alignment Program in parallel for Differential Expression Analysis (DEA)</description>
-  <command interpreter="python">gsnap_parallel_unSQL.py 
- -i $genome_file
- -q $fastq_file_list  
- -o $output_file_list
- -d $genome_prefix 
- -k $kmer_size
- #if $OptionPairedEnd.pairedEnd == 'Yes':
-  -p $pairedEnd_input
- #end if
-
-</command>
-  <inputs>
-    <param name="genome_file" type="data" format="fasta" label="Genome fasta file" />
-    <param name="fastq_file_list" type="data" format="txt" label="Fastq file list" />
-    <param name="genome_prefix" type="text" format="txt" label="Prefix used to name genome index " />
-    <param name="kmer_size" type="integer" value="12" label="Kmer size"/>
-    
-  <conditional name="OptionPairedEnd">
-   <param name="pairedEnd" type="select" label="For paired-end analysis.">
-   <option value="Yes">Yes</option>
-   <option value="No" selected="true">No</option>
-   </param>
-   <when value="Yes">
-   <param name="pairedEnd_input" type="data" format="txt" label="input paired-end files list"/>
-   </when>
-   <when value="No">
-   </when>
-  </conditional>
-
-  </inputs>
-
-  <outputs>
-    <data name="output_file_list" format="txt"/>
-  </outputs>
-  <help>
-**What it does**
-
-To complete
-
-**Citation**
-
-If you use this tool, please cite "Thomas D. Wu and Serban Nacu, Fast and SNP-tolerant detection of complex variants and splicing in short reads, Bioinformatics 2010 26:873-881"
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/listInputs.pl
--- a/SMART/DiffExpAnal/listInputs.pl Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,13 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-
-my $in_file1 = $ARGV[0];
-my $in_file2 = $ARGV[1];
-my $out_file = $ARGV[2];
-
-open(OUT, ">$out_file");
-print OUT "label\tfiles\tgroup\n";
-print OUT "fileID=1\t$in_file1\tgroup1\n";
-print OUT "fileID=2\t$in_file2\tgroup2\n";
-close(OUT);
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/listInputs.xml
--- a/SMART/DiffExpAnal/listInputs.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,25 +0,0 @@
-<tool id="listInputs" name="listInputs">
- <description>Give a list of input files from different conditions/groups for DESeq analysis, DESeq can then charge these input files from the given list.</description>
- <command interpreter="perl"> listInputs.pl $inputFromGroup1 $inputFromGroup2 $output
- </command>
-
- <inputs>
- <param name="inputFromGroup1" type="data" format="tabular" label="Please choose your file from group1."/>
- <param name="inputFromGroup2" type="data" format="tabular" label="Please choose your file from group2."/>
- </inputs>
-
- <outputs>
- <data format="txt" name="output" label="listInputs Output"/>
- </outputs>
-
- <help>
- This tool can facilate the the chargement for DESeq tool.
- Example:
- From group1, we have input1.
- From group2, we have input2.
- This tool will give us a list like:
- fileID=1 input1 group1
- fileID=2 input2 group2
- Where the value of fileID is unique for each input file. 
- </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/loadHTSeqResultFiles.py
--- a/SMART/DiffExpAnal/loadHTSeqResultFiles.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,40 +0,0 @@
-#!/usr/bin/env python
-
-import optparse, sys
-
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option('-i', '--inputs', dest='inputFiles', default=None, help='several input files. (seperated by @ or @@' )
-    parser.add_option( '-o', '--output', dest='outputFile', default=None, help='The output list of HTSeq results files(.tabular) on txt format.' )
-    ( options, args ) = parser.parse_args()
-
-    
-    out = open(options.outputFile, 'w')
-    out.write("label\tfiles\tgroup\n")
-    if options.inputFiles == None:
-        raise Exception, 'input file name is not defined!'
-    
-    groupCount = 1
-    fileCount = 0        
-    
-    inputFiles = sys.argv[6:]
-    print '\n\nthe length of inputfiles is : %s \n' % len(inputFiles)
-    i = 0
-    while i < (len(inputFiles)-1):
-        if inputFiles[i] == "@":
-            i += 1
-            fileCount = 1
-            groupCount += 1
-            out.write("Group%s_%s\t%s\t%s\n" % (groupCount, fileCount, inputFiles[i], groupCount))
-        else:
-     fileCount += 1
-            out.write("Group%s_%s\t%s\t%s\n" % (groupCount, fileCount, inputFiles[i], groupCount))
-        i += 1
-           
-    out.close()   
-    
-        
-
-if __name__=="__main__": __main__()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/loadHTSeqResultFiles.xml
--- a/SMART/DiffExpAnal/loadHTSeqResultFiles.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,29 +0,0 @@
-<tool id="load_HTSeqResultFiles" name="load HTSeqResultFiles" >
-  <description>To load several HTSeq result files from different conditions.</description>
-  <command interpreter="python"> loadHTSeqResultFiles.py -o $htseqRes_out
- -i
- #for $i in $condition_groups
- #for $j in $i.replicates
- $j.tabular_file
- #end for
- @
- #end for
-
-</command>
-  <inputs>
-   <repeat name="condition_groups" title="Condition group" min="2">
-      <repeat name="replicates" title="Replicate">
-     <param name="tabular_file" format="tabular" type="data" label="TABULAR file."/>
-          </repeat>
-        </repeat>
-  </inputs>
-
-  <outputs>
-    <data format="txt" name="htseqRes_out" label="HTSeq result files" help="This program gives you a list of files you choose for the following data analysis."/>
-
-</outputs>
-<help>
-</help>
-
-</tool>
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/loadMultiFastqFiles.py
--- a/SMART/DiffExpAnal/loadMultiFastqFiles.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,70 +0,0 @@
-#!/usr/bin/env python
-
-import optparse, sys
-
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option('-i', '--inputs', dest='inputFiles', default=None, help='several input files. (seperated by @ or @@' )
-    parser.add_option( '-o', '--output', dest='outputSingleFile', default=None, help='The output list of fastq files on txt format.' )
-    parser.add_option( '', '--pairedEnd', dest='outputPaireFile', default=None, help='paired end option help to upload the corresponding paired end complementary fastq files' )
-    ( options, args ) = parser.parse_args()
-
-    
-
-    if options.outputSingleFile == None: 
-        raise Exception, 'OutSingleFile txt file name is not defined!'
-    else:
-        outSingle = open(options.outputSingleFile, 'w')
-    
-    if options.inputFiles == None:
-        raise Exception, 'input file name is not defined!'
-    
-    groupCount = 1
-    fileCount = 0        
-    
-    if options.outputPaireFile == None:
-        inputFiles = sys.argv[4:]
-        i = 0
-        while i < (len(inputFiles)-1):
-     if inputFiles[i] == "@":
-                i += 1
-                fileCount = 1
-                groupCount += 1
-                outSingle.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
-                
-            else:
-                fileCount += 1
-                outSingle.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
-                
-            i += 1
-    else:
-        inputFiles = sys.argv[6:]
-        print '\n\nthe length of inputfiles is : %s \n' % len(inputFiles)
-        outPaire = open(options.outputPaireFile, 'w')
-        i = 0
-        while i < (len(inputFiles)-1):
-            if inputFiles[i] == "@@":
-                i += 1
-                outPaire.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
-            elif inputFiles[i] == "@":
-                i += 1
-                fileCount = 1
-                groupCount += 1
-                outSingle.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
-            else:
-                fileCount += 1
-                outSingle.write("Group%s_%s\t%s\n" % (groupCount, fileCount, inputFiles[i]))
-                
-            i += 1
-                
-        
-        
-        outPaire.close()
-           
-    outSingle.close()   
-    
-        
-
-if __name__=="__main__": __main__()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/loadMultiFastqFiles.sh
--- a/SMART/DiffExpAnal/loadMultiFastqFiles.sh Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-OUTFile=${1}
-shift
-groupCount=1
-replicateNumber=1
-
-arrayZ=( $@ )
-#remove the last symble '@' given by commande line
-unset arrayZ[${#arrayZ[@]}-1]
-
-for FILE in ${arrayZ[@]}
-do
- #if a new group of fastq, re-count the replicateNumber
- if echo $FILE | grep -q "@" 
- then 
- groupCount=$(($groupCount + 1))
- replicateNumber=1
- else
- echo -e "Group${groupCount}_${replicateNumber}\t${FILE}" >>${OUTFile}
- replicateNumber=$(($replicateNumber + 1))
-   fi
-done
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/loadMultiFastqFiles.xml
--- a/SMART/DiffExpAnal/loadMultiFastqFiles.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,75 +0,0 @@
-<tool id="load_multiFASTQFiles" name="load_multiFASTQfiles" >
-  <description>To load several FASTQ files from different conditions.</description>
-  <command interpreter="python"> loadMultiFastqFiles.py -o $multiFASTQfiles_out
-#if $single_end_paired_end.mapping_mode == 'single':
- -i
- #for $i in $single_end_paired_end.condition_groups
- #for $j in $i.replicates
- $j.fastq_file
- #end for
- @
- #end for
-
-#elif $single_end_paired_end.mapping_mode == 'paired':
-
- --pairedEnd $multiFASTQfiles_paired_end_out
- -i
- #for $i in $single_end_paired_end.condition_groups
- #for $j in $i.replicates
- $j.fastq_file
- @@
- $j.fastq_paired_end_file
- #end for
- @
- #end for
-#end if
-
-</command>
-  <inputs>
-   <conditional name="single_end_paired_end">
-    <param name="mapping_mode" type="select" label="The uploading fastq files for single-end or paired-end mapping mode.">
- <option value="single">Single-End</option>
- <option value="paired">Paire-End</option>
-    </param>
-            <when value="single">
-    <repeat name="condition_groups" title="Condition group" min="2">
-        <repeat name="replicates" title="Replicate">
-         <param name="fastq_file" format="fastq" type="data" label="FASTQ file. Can show the sequences quality."/>
-         </repeat>
-         </repeat>
-    </when>
-    <when value="paired">
-    <repeat name="condition_groups" title="Condition group" min="2">
-           <repeat name="replicates" title="Replicate">
-         <param name="fastq_file" format="fastq" type="data" label="FASTQ file. Can show the sequences quality."/>
-         <param name="fastq_paired_end_file" format="fastq" type="data" label="fastq paired end complementary file" help="Add the corresponding paired end file for paired end mapping"/>
-         </repeat>
-         </repeat>
-    </when>
-
-     </conditional>
-  </inputs>
-
-  <outputs>
-    <data format="txt" name="multiFASTQfiles_out" label="loadMultiFASTQFiles result" help="This program gives you a list of files you choose for the following data analysis."/>
-    <data format="txt" name="multiFASTQfiles_paired_end_out" label="loadMultiFASTQFiles for paired end result" help="This program gives you a list of files you choose for the following data analysis.">
-     <filter>(single_end_paired_end['mapping_mode']=='paired')</filter>
-
-    </data>
-</outputs>
-<help>
- **This tool is to help upload several data for differential expression pipeline. Before click 'Execute', you should Click** Ctrl + here_ **first to open the pipeline in a new page.**
-
- .. _here: http://127.0.0.1:8085/u/yufei-luo/w/differentialexpressiondeseq-with-replicates 
-</help>
-
-</tool>
-
-
-
-
-
-
-
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/testR.R
--- a/SMART/DiffExpAnal/testR.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,93 +0,0 @@
-#!/usr/bin
-
-library(DESeq)
-library(hexbin)
-library(latticeExtra)
-library(gplots)
-library(geneplotter)
-library(Biobase)
-
-##In a file called test_args.R
-args <- commandArgs()
-
-
-fileName <- args[4]
-colNames <- as.integer(unlist(strsplit(args[5], ",")))
-colCond1 <- as.integer(unlist(strsplit(args[6], ",")))
-colCond2 <- as.integer(unlist(strsplit(args[7], ",")))
-OUTPUTCSV <- args[8]
-OUTPUTPNG <- args[9]
-
-if(colNames[1]!=0){
- countsTable <- read.delim(fileName, row.names=1)
- conditions <- c((colNames[length(colNames)]+1):ncol(countsTable))
-} else if(colNames[1]==0){
- countsTable <- read.delim(fileName)
- conditions <- c(1:ncol(countsTable))
- rownames(countsTable) <- paste( "Gene", 1:nrow(countsTable), sep="_" )}
-
-for(i in colCond1){conditions[i] = "A"}
-for(i in colCond2){conditions[i] = "B"}
-conditions
-#analysis with DESeq
-cds <- newCountDataSet( countsTable, conditions )
-cds <- estimateSizeFactors( cds )
-cds <- estimateVarianceFunctions( cds )
-result <- nbinomTest( cds, "A", "B" )
-#stock the result dans un .tsv as output file
-write.table(result, OUTPUTCSV, sep = " ", quote = FALSE, col.names = NA)
-
-#figures for DE analysis
-#pdf( OUTPUTPNG, width=4, height=4 )
-png( filename=OUTPUTPNG, width=700, height=700 )
-#png format is not as clear as pdf format!!!!!!!!!!!!!!!!!!!!!!!!!
-print(xyplot(
- log2FoldChange ~ I(baseMean),
- result,
- pch=16, cex=.3,
- col=ifelse(result$padj < .1, "#FF000040","#00000040" ),
- panel = function( x, y, col, ...) {
- above <- (y > 5.8)
- below <- (y < -5.8)
- inside <- !( above | below )
- panel.xyplot( x=x[inside], y=y[inside], col=col[inside], ...)
- panel.arrows( x[above], 5.8, x[above], 5.95, col=col[above],length=".1", unit="native" )
- panel.arrows( x[below], -5.8, x[below], -5.95, col=col[below],length=".1", unit="native" ) },
- axis = function(side, ...) {
- if( side=="left") {
- panel.axis( side, outside=TRUE, at=seq(-14,14,by=1), labels=FALSE )
- panel.axis( side, outside=TRUE, at=seq(-10,10,by=5), labels=TRUE )
- }
- if( side=="bottom") {
- panel.axis( side, outside=TRUE, at=seq(-2,10,by=1), rot=0,
- labels = do.call( expression,
- lapply( seq(-2,10,by=1), function(a)
- substitute( 10^b, list(b=a) ) ) ) )
- } },
- xlab = "mean", ylab = "log2 fold change",
- scales = list(x = list( log=TRUE ),y = list( log=FALSE, limits=c( -6, 6 ) ) ) ))
-dev.off()
-
-#The volcano plot 
-#pdf( "vulcano_fly.pdf", width=4, height=4 )
-#print(xyplot( -log10( pval ) ~ log2FoldChange,
-# result,
-# pch=20, cex=.2,
-# col=ifelse( result$padj<.1, "#FF000050", "#00000050" ),
-# axis = function( side, ... ) {
-# if( side=="bottom") {
-# panel.axis( side, outside=TRUE, at=seq(-14,14,by=1), labels=FALSE )
-# panel.axis( side, outside=TRUE, at=seq(-10,10,by=5), labels=TRUE )
-# }
-# if( side=="left") {
-# panel.axis( side, outside=TRUE, at=seq(0,25,by=1), labels=FALSE )
-# panel.axis( side, outside=TRUE, at=seq(0,25,by=5),
-# labels = do.call( expression,
-# lapply( seq(0,25,by=5), function(a)
-# substitute( 10^-b, list(b=a) ) ) ) )
-# } },
-# xlab = "log2 fold change", ylab = "p value",
-# scales = list(
-# x = list( limits=c( -6, 6 ) ),
-# y = list( limits=c( 0, 25 ) ) ) ))
-#dev.off()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/testR.sh
--- a/SMART/DiffExpAnal/testR.sh Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,11 +0,0 @@
-#! /bin/sh
-
-#cat testR.R | R --slave --args $1 $2 $3 $4 $5 $6 < DiffExpAnal/testR.R
-
-#ex1. sh testR.sh fly_RNA_counts.tsv 0 1,3 2,4 output_fly.csv output_fly.png
-#ex2. sh testR.sh NeuralStemCellData.tab 1 2,3,4 5,6 output_modif.csv output_modif.png
-
-#cat /share/apps/galaxy-dist/tools/repet_pipe/SMART/DiffExpAnal/testR.R | R --slave --args $1 $2 $3 $4 $5 $6 < /share/apps/galaxy-dist/tools/repet_pipe/SMART/DiffExpAnal/testR.R
-
-#$1=targetFile(the list of files) $2=with or without replicate
-cat /share/apps/galaxy-dist/tools/repet_pipe/SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R| R --slave --args $1 $2 < /share/apps/galaxy-dist/tools/repet_pipe/SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/tophat_parallel.py
--- a/SMART/DiffExpAnal/tophat_parallel.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,325 +0,0 @@\n-\n-#!/usr/bin/env python\n-\n-import optparse, os, shutil, subprocess, sys, tempfile, fileinput, tarfile,random\n-\n-def stop_err( msg ):\n-    sys.stderr.write( "%s\\n" % msg )\n-    sys.exit()\n-\n-def toTar(tarFileName, accepted_hits_outputNames):\n-    fileName = os.path.splitext(tarFileName)[0]\n-    fileNameBaseName = os.path.basename(fileName)\n-    dir = os.path.dirname(tarFileName)    \n-    tfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n-    currentPath = os.getcwd()\n-    os.chdir(dir)\n-    for file in accepted_hits_outputNames:\n-        relativeFileName = os.path.basename(file)\n-        tfile.add(relativeFileName)\n-    os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n-    tfile.close()\n-    os.chdir(currentPath)\n-    \n-\n-def __main__():\n-    #Parse Command Line\n-    parser = optparse.OptionParser()\n-    parser.add_option(\'-o\', \'--outputTxtFile\', dest=\'outputTxtFile\', help=\'for Differential expression analysis pipeline, new output option gives a txt output containing the list of mapping results.\')\n-    parser.add_option(\'-t\', \'--tar\', dest=\'outputTar\', default=None, help=\'output all accepted hits results in a tar file.\' )\n-    parser.add_option( \'-p\', \'--num-threads\', dest=\'num_threads\', help=\'Use this many threads to align reads. The default is 1.\' )\n-    parser.add_option( \'-C\', \'--color-space\', dest=\'color_space\', action=\'store_true\', help=\'This indicates color-space data\' )\n-    parser.add_option( \'-J\', \'--junctions-output\', dest=\'junctions_output_file\', default=\'junctions_output.bed\', help=\'Junctions output file; formate is BED.\' )\n-    parser.add_option( \'-H\', \'--hits-output\', dest=\'accepted_hits_output_file\', default=\'hits_output_%s.bam\' % random.randrange(0, 10000), help=\'Accepted hits output file; formate is BAM.\' )\n-    parser.add_option( \'\', \'--own-file\', dest=\'own_file\', help=\'\' )\n-    parser.add_option( \'-D\', \'--indexes-path\', dest=\'index_path\', help=\'Indexes directory; location of .ebwt and .fa files.\' )\n-    parser.add_option( \'-r\', \'--mate-inner-dist\', dest=\'mate_inner_dist\', help=\'This is the expected (mean) inner distance between mate pairs. \\\n-                                                                                For, example, for paired end runs with fragments selected at 300bp, \\\n-                                                                                where each end is 50bp, you should set -r to be 200. There is no default, \\\n-                                                                                and this parameter is required for paired end runs.\')\n-    parser.add_option( \'\', \'--mate-std-dev\', dest=\'mate_std_dev\', help=\'Standard deviation of distribution on inner distances between male pairs.\' )\n-    parser.add_option( \'-a\', \'--min-anchor-length\', dest=\'min_anchor_length\', \n-                        help=\'The "anchor length". TopHat will report junctions spanned by reads with at least this many bases on each side of the junction.\' )\n-    parser.add_option( \'-m\', \'--splice-mismatches\', dest=\'splice_mismatches\', help=\'The maximum number of mismatches that can appear in the anchor region of a spliced alignment.\' )\n-    parser.add_option( \'-i\', \'--min-intron-length\', dest=\'min_intron_length\', \n-                        help=\'The minimum intron length. TopHat will ignore donor/acceptor pairs closer than this many bases apart.\' )\n-    parser.add_option( \'-I\', \'--max-intron-length\', dest=\'max_intron_length\', \n-                        help=\'The maximum intron length. When searching for junctions ab initio, TopHat will ignore donor/acceptor pairs farther than this many bases apart, except when such a pair is supported by a split segment alignment of a long read.\' )\n-    parser.add_option( \'-F\', \'--junction_filter\', dest=\'junction_filter\', help=\'Filter out junctions supported by too few alignments (number of reads divided by average depth of coverage)\' )\n-    parser.add_option( \'-g\', \'--max_multihits\', dest=\'max_multihits\', help=\'Maximum number of alignmen'..b'overage-search --min-coverage-intron %s --max-coverage-intron %s \' % ( options.min_coverage_intron, options.max_coverage_intron )\n-                else:\n-                    opts += \'--no-coverage-search \'\n-                if options.closure_search:\n-                    opts += \'--closure-search --min-closure-exon %s --min-closure-intron %s --max-closure-intron %s \'  % ( options.min_closure_exon, options.min_closure_intron, options.max_closure_intron ) \n-                else:\n-                    opts += \'--no-closure-search \'\n-                if options.microexon_search:\n-                    opts += \'--microexon-search \'\n-                if options.single_paired == \'paired\':\n-                    opts += \'--mate-std-dev %s \' % options.mate_std_dev\n-                if options.initial_read_mismatches:\n-                    opts += \'--initial-read-mismatches %d \' % int( options.initial_read_mismatches )\n-                if options.seg_mismatches:\n-                    opts += \'--segment-mismatches %d \' % int( options.seg_mismatches )\n-                if options.seg_length:\n-                    opts += \'--segment-length %d \' % int( options.seg_length )\n-                if options.min_segment_intron:\n-                    opts += \'--min-segment-intron %d \' % int( options.min_segment_intron )\n-                if options.max_segment_intron:\n-                    opts += \'--max-segment-intron %d \' % int( options.max_segment_intron )\n-                if options.own_file:\n-                    cmd = cmd % ( opts, index_paths[0], input_files ) #here to add paired end file\n-                else:\n-                    cmd = cmd % ( opts, index_paths[i], input_files ) #here to add paired end file\n-            except Exception, e:\n-                # Clean up temp dirs\n-                if os.path.exists( tmp_index_dir ):\n-                    shutil.rmtree( tmp_index_dir )\n-                stop_err( \'Something is wrong with the alignment parameters and the alignment could not be run\\n\' + str( e ) )\n-                \n-        cmds.append(cmd)\n-\n-    # Run the command line for each file.\n-    for i in range(len(cmds)):\n-        try:\n-            tmp_out = tempfile.NamedTemporaryFile().name\n-            tmp_files.append(tmp_out)\n-            tmp_stdout = open( tmp_out, \'wb\' )\n-            tmp_err = tempfile.NamedTemporaryFile().name\n-            tmp_files.append(tmp_err)\n-            tmp_stderr = open( tmp_err, \'wb\' )\n-            proc = subprocess.Popen( args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )\n-            returncode = proc.wait()\n-            tmp_stderr.close()\n-            # get stderr, allowing for case where it\'s very large\n-            tmp_stderr = open( tmp_err, \'rb\' )\n-            stderr = \'\'\n-            buffsize = 1048576\n-            try:\n-                while True:\n-                    stderr += tmp_stderr.read( buffsize )\n-                    if not stderr or len( stderr ) % buffsize != 0:\n-                        break\n-            except OverflowError:\n-                pass\n-            tmp_stdout.close()\n-            tmp_stderr.close()\n-            if returncode != 0:\n-                raise Exception, stderr\n-                \n-            # Copy output files from tmp directory to specified files.\n-            #shutil.copyfile( os.path.join( "tophat_out", "junctions.bed" ), junctions_outputNames[i] )\n-            shutil.copyfile( os.path.join( "tophat_out", "accepted_hits.bam" ), accepted_hits_outputNames[i] )\n-            # TODO: look for errors in program output.\n-        except Exception, e:\n-            stop_err( \'Error in tophat:\\n\' + str( e ) ) \n-\n-    if options.outputTar != None:\n-        toTar(options.outputTar, accepted_hits_outputNames)\n-\n-    \n-    # Clean up temp dirs\n-    for tmp_index_dir in tmp_index_dirs:\n-        if os.path.exists( tmp_index_dir ):\n-            shutil.rmtree( tmp_index_dir )\n-\n-    for tmp in tmp_files:\n-        os.remove(tmp)\n-\n-\n-if __name__=="__main__": __main__()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/tophat_parallel.xml
--- a/SMART/DiffExpAnal/tophat_parallel.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,577 +0,0 @@\n-<tool id="tophat_parallel" name="Tophat for Illumina (for DEA)" version="1.0.0">\n-    <description>Find splice junctions using RNA-seq data, can have several input RNA-seq data.</description>\n-    <version_command>tophat --version</version_command>\n-    <requirements>\n-        <requirement type="package">tophat</requirement>\n-    </requirements>\n-    <command interpreter="python">\n-\t    tophat_parallel.py\n-            ## Change this to accommodate the number of threads you have available.\n-            --num-threads="4"\n-\n-            ## Provide outputs.\n-            -o $outputFileName\n-            ##--junctions-output=$junctions\n-            ##--hits-output=$accepted_hits\n-\n-            ## Handle reference file.\n-            #if $refGenomeSource.genomeSource == "history":\n-                --own-file=$refGenomeSource.ownFile\n-            #else:\n-                --indexes-path="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ \'tophat_indexes\' ].get_fields() )[0][-1] }"\n-            #end if\n-\n-            ## Are reads single-end or paired?\n-            --single-paired=$singlePaired.sPaired\n-\n-            ## First input file always required.\n-            --input1=$input1\n-\n-            ## Set params based on whether reads are single-end or paired.\n-            #if $singlePaired.sPaired == "single":\n-                --settings=$singlePaired.sParams.sSettingsType\n-                #if $singlePaired.sParams.sSettingsType == "full":\n-                    -a $singlePaired.sParams.anchor_length\n-                    -m $singlePaired.sParams.splice_mismatches\n-                    -i $singlePaired.sParams.min_intron_length\n-                    -I $singlePaired.sParams.max_intron_length\n-                    -F $singlePaired.sParams.junction_filter\n-                    -g $singlePaired.sParams.max_multihits\n-                    --min-segment-intron $singlePaired.sParams.min_segment_intron\n-                    --max-segment-intron $singlePaired.sParams.max_segment_intron\n-                    --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches\n-                    --seg-mismatches=$singlePaired.sParams.seg_mismatches\n-                    --seg-length=$singlePaired.sParams.seg_length\n-                    --library-type=$singlePaired.sParams.library_type\n-                    \n-                    ## Indel search.\n-                    #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":\n-                        ## --allow-indels\n-                        --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length\n-                        --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length\n-                    #else:\n-                        --no-novel-indels\n-                    #end if\n-\n-                    ## Supplying junctions parameters.\n-                    #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":\n-                        #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes":\n-                            -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model\n-                        #end if\n-                        #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes":\n-                            -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs\n-                        #end if\n-                        ## TODO: No idea why a string cast is necessary, but it is:\n-                        #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes":\n-                            --no-novel-juncs\n-                        #end if\n-                    #end if\n-\n-                    #if $singlePaired.sParams.closure_search.use_search == "Yes":\n-                        --closure-search\n-                        --min-closure-exon $singlePaired.sParams.closure_search.min_closure_exon\n-                        --'..b'hen such a pair is supported by a split segment alignment of a long read. The default is 500000.\n-  -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of \n-                                    exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the \n-                                    filter. The default is 0.15.\n-  -g/--max-multihits INT            Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many \n-                                    alignments. The default is 40.\n-  -G/--GTF [GTF 2.2 file]           Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.\n-  -j/--raw-juncs [juncs file]       Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.\n-  -no-novel-juncs                   Only look for junctions indicated in the supplied GFF file. (ignored without -G)\n-  --no-closure-search               Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.\n-  --closure-search                  Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)\n-  --no-coverage-search              Disables the coverage based search for junctions.\n-  --coverage-search                 Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.\n-  --microexon-search                With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.\n-  --butterfly-search                TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.\n-  --segment-mismatches              Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.\n-  --segment-length                  Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.\n-  --min-closure-exon                During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.\n-  --min-closure-intron              The minimum intron length that may be found during closure search. The default is 50.\n-  --max-closure-intron              The maximum intron length that may be found during closure search. The default is 5000.\n-  --min-coverage-intron             The minimum intron length that may be found during coverage search. The default is 50.\n-  --max-coverage-intron             The maximum intron length that may be found during coverage search. The default is 20000.\n-  --min-segment-intron              The minimum intron length that may be found during split-segment search. The default is 50.\n-  --max-segment-intron              The maximum intron length that may be found during split-segment search. The default is 500000.\n-    </help>\n-</tool>\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/tophat_parallel_unSQL.py
--- a/SMART/DiffExpAnal/tophat_parallel_unSQL.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,353 +0,0 @@\n-\n-#!/usr/bin/env python\n-\n-import optparse, os, shutil, subprocess, sys, tempfile, fileinput, tarfile, glob\n-from commons.core.launcher.Launcher import Launcher\n-from commons.core.sql.TableJobAdaptatorFactory import TableJobAdaptatorFactory\n-from commons.core.utils.FileUtils import FileUtils\n-\n-def stop_err( msg ):\n-    sys.stderr.write( "%s\\n" % msg )\n-    sys.exit()\n-\n-def toTar(tarFileName, accepted_hits_outputNames):\n-    tfile = tarfile.open(tarFileName + ".tmp.tar", "w")\n-    currentPath = os.getcwd()\n-    os.chdir(dir)\n-    for file in accepted_hits_outputNames:\n-        relativeFileName = os.path.basename(file)\n-        tfile.add(relativeFileName)\n-    os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))\n-    tfile.close()\n-    os.chdir(currentPath)\n-    \n-def splitFastQ(fileName, nbOfSeqPerBatch):\n-    nbOfLinesPerFile = nbOfSeqPerBatch * 4\n-    lOutput = []\n-    filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))\n-    resDir = os.path.dirname(fileName)\n-    with open(fileName) as inF:\n-        fileNb = 1\n-        line = inF.readline()\n-        if not line or nbOfLinesPerFile == 0:\n-            outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)\n-            lOutput.append(outFileName)\n-            f = open(outFileName, "wb")\n-            shutil.copyfileobj(open(fileName, "rb"), f)\n-            f.close()\n-        else:\n-            while line:\n-                outFileName = "%s/%s-%s%s" %(resDir, filePrefix, fileNb, fileExt)\n-                lOutput.append(outFileName)\n-                with open(outFileName, "w") as outF:\n-                    lineNb = 1\n-                    while lineNb <= nbOfLinesPerFile and line:\n-                        outF.write(line)\n-                        line = inF.readline()\n-                        lineNb += 1\n-                fileNb += 1\n-    return lOutput\n-\n-def joinBAM(dCutOut2Out):\n-    for key in dCutOut2Out.keys():\n-        fh = open(key, "w") \n-        fh.close()\n-        nbFile = 0\n-        cmd = "samtools merge -f %s" % key\n-        for fileName in dCutOut2Out[key]:\n-            nbFile = nbFile + 1\n-            if nbFile < 225:\n-                cmd += " %s" % fileName\n-            else:\n-                nbFile = 0\n-                cmd += ";mv %s tmpBAM;" % (key)\n-                cmd += "samtools merge -f %s tmpBAM %s" %  (key, fileName)\n-        proc = subprocess.Popen( args=cmd , shell=True)\n-        returncode = proc.wait()\n-\n-        \n-def _map(iLauncher, cmd, cmdStart, cmdFinish ):\n-    lCmds = []\n-    lCmds.extend(cmd)\n-    lCmdStart = []\n-    lCmdStart.extend(cmdStart)\n-    lCmdFinish = []\n-    lCmdFinish.extend(cmdFinish)\n-    return(iLauncher.prepareCommands_withoutIndentation(lCmds, lCmdStart, lCmdFinish))\n-\n-def _createTopHatCommand(iLauncher, options, index_paths, inputFileNames, inputRevFilesNames, space):\n-    lArgs = []\n-    lArgs.append(\'-p %s %s\' % ( options.num_threads, space ))\n-    if options.single_paired == \'paired\':\n-        lArgs.append(\'-r %s \' % options.mate_inner_dist)\n-    if options.settings == \'preSet\':\n-        lArgs.append(index_paths)\n-        lArgs.append(inputFileNames)\n-        if options.input2:\n-            lArgs.append(inputRevFilesNames)\n-        return iLauncher.getSystemCommand("tophat", lArgs)\n-    else:\n-        if int( options.min_anchor_length ) >= 3:\n-            lArgs.append(\'-a %s \' % options.min_anchor_length)\n-        else:\n-            raise Exception, \'Minimum anchor length must be 3 or greater\'\n-        lArgs.append(\'-m %s \' % options.splice_mismatches)\n-        lArgs.append(\'-i %s \' % options.min_intron_length)\n-        lArgs.append(\'-I %s \' % options.max_intron_length)\n-        if float( options.junction_filter ) != 0.0:\n-            lArgs.append(\'-F %s \' % options.junction_filter)\n-        lArgs.append(\'-g %s \' % options.max_multihits)\n-        # Custom junctions options.\n-        if options.gene_model_annotations:\n-            lArgs.append(\'-G %s \' % options.gene_'..b'ame + \'\\n\')\n-    file.close()\n-    out.close()\n-    \n-    if options.input2:\n-        revFile = open(options.input2,"r")\n-        lines = revFile.readlines()\n-        inputRevFileNames = []\n-        for line in lines:\n-            revTab = line.split()\n-            inputRevFileNames.append(revTab[1])\n-        revFile.close()\n-\n-    \n-    # Creat bowtie index if necessary.\n-    tmp_index_dirs = []\n-    index_paths = []\n-    tmp_index_dir = tempfile.mkdtemp(dir="%s" % os.getcwd())\n-    tmp_index_dirs.append(tmp_index_dir)\n-    if options.own_file:\n-        index_path = os.path.join( tmp_index_dir, \'.\'.join( os.path.split( options.own_file )[1].split( \'.\' )[:-1] ) )\n-        index_paths.append(index_path)\n-        try:\n-            os.link( options.own_file, index_path + \'.fa\' )\n-        except:\n-            # Tophat prefers (but doesn\'t require) fasta file to be in same directory, with .fa extension\n-            pass\n-        lCmdsTuples =[]\n-        acronym = "tophat_index"\n-        jobdb = TableJobAdaptatorFactory.createJobInstance()\n-        iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)\n-        cmd_index = iLauncher.getSystemCommand("bowtie-build", [space, "-f %s" % options.own_file, index_path])\n-        cmd2Launch = []\n-        cmdStart = []\n-        cmdFinish = []\n-        cmd2Launch.append(cmd_index)\n-        lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish)) \n-        iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)\n-    else:\n-        for file in inputFileNames:\n-            tmp_index_dir = tempfile.mkdtemp()\n-            index_path = tmp_index_dir + \'/\' + os.path.basename(file).split(\'.\')[0]\n-            index_paths.append(index_path)\n-            tmp_index_dirs.append(tmp_index_dir)\n-\n-    \n-    \n-    acronym = "tophat"\n-    jobdb = TableJobAdaptatorFactory.createJobInstance()\n-    iLauncher = Launcher(jobdb, os.getcwd(), "", "", os.getcwd(), os.getcwd(), "jobs", "", acronym, acronym, False, True)\n-    lCmdsTuples = []\n-    dCutOut2Out = {}\n-    lAllFile2remove = []\n-    # for inputFileName in inputFileNames:\n-    for i in range(len(inputFileNames)):\n-        lCutOutput = []\n-        lCutInputFile = splitFastQ(inputFileNames[i], 20000)\n-        lAllFile2remove.extend(lCutInputFile)\n-        if options.input2:\n-            lCutPairInputFile = splitFastQ(inputRevFileNames[i], 20000)\n-            lAllFile2remove.extend(lCutPairInputFile)\n-        for j in range(len(lCutInputFile)):\n-            cutOutput = "%s_out" %  lCutInputFile[j]\n-            lCutOutput.append(cutOutput)\n-            lAllFile2remove.extend(lCutOutput)\n-            cmd2Launch = []\n-            if options.input2: \n-                inputRevFile = lCutPairInputFile[j]\n-            else:\n-                inputRevFile = ""\n-            if options.own_file:\n-                cmd2Launch.append(_createTopHatCommand(iLauncher, options, index_paths[0], lCutInputFile[j], inputRevFile, space))\n-            else:\n-                cmd2Launch.append(_createTopHatCommand(iLauncher, options, index_paths[i], lCutInputFile[j], inputRevFile, space))\n-            cmdStart = []\n-            cmdFinish = ["shutil.copyfile( os.path.join( \'tophat_out\', \'accepted_hits.bam\' ), \'%s\')" % cutOutput]\n-            lCmdsTuples.append(_map(iLauncher, cmd2Launch, cmdStart, cmdFinish))    \n-        dCutOut2Out[accepted_hits_outputNames[i]] = lCutOutput\n-    iLauncher.runLauncherForMultipleJobs(acronym, lCmdsTuples, True)\n-    \n-    joinBAM(dCutOut2Out) \n-    FileUtils.removeFilesFromListIfExist(lAllFile2remove)   \n-                 \n-    if options.outputTar != None:\n-        toTar(options.outputTar, accepted_hits_outputNames)\n-\n-    \n-    # Clean up temp dirs\n-    for tmp_index_dir in tmp_index_dirs:\n-        if os.path.exists( tmp_index_dir ):\n-            shutil.rmtree( tmp_index_dir )\n-\n-    for tmp in tmp_files:\n-        os.remove(tmp)\n-\n-\n-if __name__=="__main__": __main__()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/tophat_parallel_unSQL.xml
--- a/SMART/DiffExpAnal/tophat_parallel_unSQL.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,577 +0,0 @@\n-<tool id="tophat_parallel_unSQL" name="Tophat for Illumina (for DEA in parallel)" version="1.0.0">\n-    <description>Find splice junctions using RNA-seq data, can have several input RNA-seq data (parallelized).</description>\n-    <version_command>tophat --version</version_command>\n-    <requirements>\n-        <requirement type="package">tophat</requirement>\n-    </requirements>\n-    <command interpreter="python">\n-\t    tophat_parallel_unSQL.py\n-            ## Change this to accommodate the number of threads you have available.\n-            --num-threads="4"\n-\n-            ## Provide outputs.\n-            -o $outputFileName\n-            ##--junctions-output=$junctions\n-            ##--hits-output=$accepted_hits\n-\n-            ## Handle reference file.\n-            #if $refGenomeSource.genomeSource == "history":\n-                --own-file=$refGenomeSource.ownFile\n-            #else:\n-                --indexes-path="${ filter( lambda x: str( x[0] ) == str( $refGenomeSource.index ), $__app__.tool_data_tables[ \'tophat_indexes\' ].get_fields() )[0][-1] }"\n-            #end if\n-\n-            ## Are reads single-end or paired?\n-            --single-paired=$singlePaired.sPaired\n-\n-            ## First input file always required.\n-            --input1=$input1\n-\n-            ## Set params based on whether reads are single-end or paired.\n-            #if $singlePaired.sPaired == "single":\n-                --settings=$singlePaired.sParams.sSettingsType\n-                #if $singlePaired.sParams.sSettingsType == "full":\n-                    -a $singlePaired.sParams.anchor_length\n-                    -m $singlePaired.sParams.splice_mismatches\n-                    -i $singlePaired.sParams.min_intron_length\n-                    -I $singlePaired.sParams.max_intron_length\n-                    -F $singlePaired.sParams.junction_filter\n-                    -g $singlePaired.sParams.max_multihits\n-                    --min-segment-intron $singlePaired.sParams.min_segment_intron\n-                    --max-segment-intron $singlePaired.sParams.max_segment_intron\n-                    --initial-read-mismatches=$singlePaired.sParams.initial_read_mismatches\n-                    --seg-mismatches=$singlePaired.sParams.seg_mismatches\n-                    --seg-length=$singlePaired.sParams.seg_length\n-                    --library-type=$singlePaired.sParams.library_type\n-                    \n-                    ## Indel search.\n-                    #if $singlePaired.sParams.indel_search.allow_indel_search == "Yes":\n-                        ## --allow-indels\n-                        --max-insertion-length $singlePaired.sParams.indel_search.max_insertion_length\n-                        --max-deletion-length $singlePaired.sParams.indel_search.max_deletion_length\n-                    #else:\n-                        --no-novel-indels\n-                    #end if\n-\n-                    ## Supplying junctions parameters.\n-                    #if $singlePaired.sParams.own_junctions.use_junctions == "Yes":\n-                        #if $singlePaired.sParams.own_junctions.gene_model_ann.use_annotations == "Yes":\n-                            -G $singlePaired.sParams.own_junctions.gene_model_ann.gene_annotation_model\n-                        #end if\n-                        #if $singlePaired.sParams.own_junctions.raw_juncs.use_juncs == "Yes":\n-                            -j $singlePaired.sParams.own_junctions.raw_juncs.raw_juncs\n-                        #end if\n-                        ## TODO: No idea why a string cast is necessary, but it is:\n-                        #if str($singlePaired.sParams.own_junctions.no_novel_juncs) == "Yes":\n-                            --no-novel-juncs\n-                        #end if\n-                    #end if\n-\n-                    #if $singlePaired.sParams.closure_search.use_search == "Yes":\n-                        --closure-search\n-                        --min-closure-exon $singlePaired.sParams.closure_search.min_c'..b'hen such a pair is supported by a split segment alignment of a long read. The default is 500000.\n-  -F/--min-isoform-fraction 0.0-1.0 TopHat filters out junctions supported by too few alignments. Suppose a junction spanning two exons, is supported by S reads. Let the average depth of coverage of \n-                                    exon A be D, and assume that it is higher than B. If S / D is less than the minimum isoform fraction, the junction is not reported. A value of zero disables the \n-                                    filter. The default is 0.15.\n-  -g/--max-multihits INT            Instructs TopHat to allow up to this many alignments to the reference for a given read, and suppresses all alignments for reads with more than this many \n-                                    alignments. The default is 40.\n-  -G/--GTF [GTF 2.2 file]           Supply TopHat with a list of gene model annotations. TopHat will use the exon records in this file to build a set of known splice junctions for each gene, and will attempt to align reads to these junctions even if they would not normally be covered by the initial mapping.\n-  -j/--raw-juncs [juncs file]       Supply TopHat with a list of raw junctions. Junctions are specified one per line, in a tab-delimited format. Records look like: [chrom] [left] [right] [+/-], left and right are zero-based coordinates, and specify the last character of the left sequenced to be spliced to the first character of the right sequence, inclusive.\n-  -no-novel-juncs                   Only look for junctions indicated in the supplied GFF file. (ignored without -G)\n-  --no-closure-search               Disables the mate pair closure-based search for junctions. Currently, has no effect - closure search is off by default.\n-  --closure-search                  Enables the mate pair closure-based search for junctions. Closure-based search should only be used when the expected inner distance between mates is small (about or less than 50bp)\n-  --no-coverage-search              Disables the coverage based search for junctions.\n-  --coverage-search                 Enables the coverage based search for junctions. Use when coverage search is disabled by default (such as for reads 75bp or longer), for maximum sensitivity.\n-  --microexon-search                With this option, the pipeline will attempt to find alignments incident to microexons. Works only for reads 50bp or longer.\n-  --butterfly-search                TopHat will use a slower but potentially more sensitive algorithm to find junctions in addition to its standard search. Consider using this if you expect that your experiment produced a lot of reads from pre-mRNA, that fall within the introns of your transcripts.\n-  --segment-mismatches              Read segments are mapped independently, allowing up to this many mismatches in each segment alignment. The default is 2.\n-  --segment-length                  Each read is cut up into segments, each at least this long. These segments are mapped independently. The default is 25.\n-  --min-closure-exon                During closure search for paired end reads, exonic hops in the potential splice graph must be at least this long. The default is 50.\n-  --min-closure-intron              The minimum intron length that may be found during closure search. The default is 50.\n-  --max-closure-intron              The maximum intron length that may be found during closure search. The default is 5000.\n-  --min-coverage-intron             The minimum intron length that may be found during coverage search. The default is 50.\n-  --max-coverage-intron             The maximum intron length that may be found during coverage search. The default is 20000.\n-  --min-segment-intron              The minimum intron length that may be found during split-segment search. The default is 50.\n-  --max-segment-intron              The maximum intron length that may be found during split-segment search. The default is 500000.\n-    </help>\n-</tool>\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/DiffExpAnal/wrappGSNAP.py
--- a/SMART/DiffExpAnal/wrappGSNAP.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,76 +0,0 @@
-#! /usr/bin/env python
-
-import os, sys, subprocess,tempfile
-from optparse import OptionParser
-
-def stop_err(msg):
-    sys.stderr.write('%s\n' % msg)
-    sys.exit()
-
-def __main__():
-    #Parse Command Line
-    description = "GMAP/GSNAP version:2012-12-20."
-    parser = OptionParser(description = description)
-    parser.add_option("-d", "--genomeName", dest="genomeName", help="Define the reference genome name.[compulsory]")
-    parser.add_option("-o", "--outputFile", dest="outputfile", help="output[compulsory]")
-    #parser.add_option("-D", "--workingDir", dest="workingdir", help="Define the directory of writing reference genome index.[compulsory]")
-    parser.add_option("-k", "--kmer", dest="kmer", default=12, help="Choose kmer value (<=16), a big kmer value can take more RAM(4Go).[compulsory]")
-    parser.add_option("-i", "--inputFasta", dest="inputFastaFile", help="Reference genome file, fasta format.[compulsory]")
-    parser.add_option("-q", "--inputFastq", dest="inputFastqFile", help="Input fastq file.")
-    parser.add_option("-p", "--pairedEnd", dest="pairedEndFile", default=None, help="Input paired-end fastq file.")
-    parser.add_option("-A", "--outputFormat", dest="outputFormat", default="sam", help="Choose an output format [sam, goby (need to re-compile with appropriate options)].")
-    (options, args) = parser.parse_args()    
-
-    #If workingDir dose not exist, should create before run the job.
-    
-    workingDir = os.path.dirname(options.inputFastaFile)
-    
-    cmds = []
-    cmd_setup = "gmap_setup -d %s -D %s -k %s %s" % (options.genomeName, workingDir, options.kmer, options.inputFastaFile)
-    cmds.append(cmd_setup)
-    cmd_make_coords = "make -f Makefile.%s coords" % options.genomeName 
-    cmds.append(cmd_make_coords)
-    cmd_make_gmapdb = "make -f Makefile.%s gmapdb" % options.genomeName
-    cmds.append(cmd_make_gmapdb)
-    cmd_make_install = "make -f Makefile.%s install" % options.genomeName
-    cmds.append(cmd_make_install)
-    cmd_run = "gsnap -d %s -D %s -A %s %s " % (options.genomeName, workingDir, options.outputFormat, options.inputFastqFile)
-    if options.pairedEndFile != None:
-        cmd_run += "%s" % options.pairedEndFile
-    cmd_run += " > %s" % options.outputfile
-    cmds.append(cmd_run)
-    
-    tmp_files = []
-    for i in range(len(cmds)):
-        try:
-            tmp_out = tempfile.NamedTemporaryFile().name
-            tmp_files.append(tmp_out)
-            tmp_stdout = open(tmp_out, 'wb')
-            tmp_err = tempfile.NamedTemporaryFile().name
-            tmp_files.append(tmp_err)
-            tmp_stderr = open(tmp_err, 'wb')
-            proc = subprocess.Popen(args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr)
-            returncode = proc.wait()
-            tmp_stderr.close()
-            #get stderr, allowing for case where it's very large
-            tmp_stderr = open(tmp_err, 'rb')
-            stderr = ''
-            buffsize = 1048576
-            try:
-                while True:
-                    stderr += tmp_stderr.read(buffsize)
-                    if not stderr or len(stderr) % buffsize != 0:
-                        break
-            except OverflowError:
-                pass
-            tmp_stdout.close()
-            tmp_stderr.close()
-            if returncode != 0:
-                raise Exception, stderr
-        except Exception, e:
-            stop_err('Error in :\n' + str(e))
-    
-    for tmp_file in tmp_files:
-        os.remove(tmp_file)
-    
-if __name__=="__main__":__main__()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/File.java
--- a/SMART/Java/File.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,55 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-public class File {
-  String name;
-  String formatType;
-  String format;
-
-
-  public File(String name, String type, String format) {
-    this.name       = name;
-    this.formatType = type;
-    this.format     = format;
-  }
-
-  public String getName() {
-    return this.name;
-  }
-
-  public String getFormatType() {
-    return this.formatType;
-  }
-
-  public String getFormat() {
-    return this.format;
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Files.java
--- a/SMART/Java/Files.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,75 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-
-public class Files {
-  HashMap <String, File> files;  
-
-  public Files () {
-    files = new HashMap < String, File> ();
-  }
-
-  public void addFile(String fileName, String type, String format) {
-    this.addFile(new File(fileName, type, format));
-  }
-
-  public void addFile(File file) {
-    files.put(file.name, file);
-  }
-
-  public void clear() {
-    files.clear();
-  }
-
-  public String getType(String fileName) {
-    if (fileName == null) {
-      System.out.println("Error! Looking for format of empty file name!");
-    }
-    if (! files.containsKey(fileName)) {
-      System.out.println("Oops! Format type of file " + fileName + " is not found!");
-      return null;
-    }
-    return files.get(fileName).formatType;
-  }
-
-  public String getFormat(String fileName) {
-    if (fileName == null) {
-      System.out.println("Error! Looking for format of empty file name!");
-    }
-    if (! files.containsKey(fileName)) {
-      System.out.println("Oops! Format of file " + fileName + " is not found!");
-      return null;
-    }
-    return files.get(fileName).format;
-  }
-}
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/FormatType.java
--- a/SMART/Java/FormatType.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,64 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-
-public class FormatType {
-  String type;
-  Vector < String > formats;
-
-  public FormatType (String type) {
-    this.type    = type;
-    this.formats = new Vector < String > ();
-  }
-
-  public String getType () {
-    return this.type;
-  }
-
-  public void addFormat (String format) {
-    formats.add(format);
-  }
-
-  public boolean containsFormat (String format) {
-    for (int i = 0; i < formats.size(); i++) {
-      if (((String) formats.get(i)).compareToIgnoreCase(format) == 0) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  public Vector < String > getFormats () {
-    return formats;
-  }
-}
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/FormatsContainer.java
--- a/SMART/Java/FormatsContainer.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,90 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-
-public class FormatsContainer {
-
-  HashMap < String, FormatType > formatTypes;
-
-
-  public FormatsContainer() {
-    this.formatTypes = new HashMap < String, FormatType > ();
-  }
-
-
-  public void addFormat(String type, String format) {
-    FormatType formatType;
-    if (formatTypes.containsKey(type)) {
-      formatType = this.formatTypes.get(type);
-    }
-    else {
-      formatType = new FormatType(type);
-      this.formatTypes.put(type, formatType);
-    }
-    formatType.addFormat(format);
-  }
-
-
-  public Vector < String > getFormatTypes () {
-    Vector < String > v = new Vector < String > ();
-    v.addAll(this.formatTypes.keySet());
-    return v;
-  }
-
-
-  public FormatType getFormats (String type) {
-    if (! formatTypes.containsKey(type)) {
-      System.out.print("Format type " + type + " is unavailable. Got: ");
-      Iterator it = formatTypes.entrySet().iterator();
-      while (it.hasNext()) {
-        Map.Entry pairs = (Map.Entry) it.next();
-        System.out.print(pairs.getKey() + " ");
-      }
-      System.out.println();
-    }
-    return formatTypes.get(type);
-  }
-
-
-  public String getFormatType (String format) {
-    for (Iterator it = formatTypes.keySet().iterator(); it.hasNext(); ) {
-      Object type       =  it.next();
-      Object formatType = formatTypes.get(type);
-      if (((FormatType) formatType).containsFormat(format)) {
-        return (String) type;
-      }
-    }
-    return null;
-  }
-}
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/FormatsReader.java
--- a/SMART/Java/FormatsReader.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,83 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-import java.io.File;
-import java.io.*;
-
-
-public class FormatsReader {
-
-  String fileName;
-  Vector < FormatType > formatTypes;
-  Vector < String > typeNames;
-
-
-  public FormatsReader(String fileName) {
-    this.fileName    = fileName;  
-    this.formatTypes = new Vector < FormatType > ();
-  }
-
-
-  public boolean read() {
-    File file = new File(this.fileName);
-
-    try {
-      BufferedReader reader = new BufferedReader(new FileReader(file));
-      String     line = null;
-      String[]   lineElements;
-      String[]   formats;
-      String     typeName;
-
-      while ((line = reader.readLine()) != null) {
-        if (line.length() > 0) {
-          lineElements = line.split(":");
-          typeName     = lineElements[0].trim();
-          formats      = lineElements[1].split(",");
-          for (int i = 0; i < formats.length; i++) {
-            Global.formats.addFormat(typeName, formats[i].trim());
-          }
-        }
-      }
-
-      reader.close();
-    }
-    catch (FileNotFoundException e) {
-      return false;
-    }
-    catch (IOException e) {
-      return false;
-    }
-
-    return true;
-  }
-}
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Global.java
--- a/SMART/Java/Global.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,70 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.Vector;
-import java.util.HashMap;
-import javax.swing.DefaultListModel;
-import javax.swing.JButton;
-import javax.swing.JTextField;
-
-public class Global {
-
-  public static int logAreaSize = 100;
-
-  public static String smartConfFileName = "smart.conf";
-
-  public static String smartProgramsFileName = "programs.txt";
-
-  public static String smartFormatsFileName = "formats.txt";
-
-  public static String pythonPath = new String();
-
-  public static String pythonCommand = "python";
-
-  public static String mysqlCommand = "mysql";
-
-  public static String rCommand = "R";
-
-  public static Files files = new Files();
-
-  public static Vector < String > fileNames = new Vector < String >();
-
-  public static FormatsContainer formats = new FormatsContainer();
-
-  public static boolean programRunning = false;
-
-  public static HashMap < JButton, JTextField > otherFilesChooser = new HashMap < JButton, JTextField >();
-
-  public static HashMap < JButton, JTextField > otherDirectoriesChooser = new HashMap < JButton, JTextField >();
-
-  public static HashMap < JButton, JTextField > otherFileConcatenationChooser = new HashMap < JButton, JTextField >();
-
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/Old/PasswordAsker.java
--- a/SMART/Java/Installer/Old/PasswordAsker.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,87 +0,0 @@
-import java.awt.*;
-import java.awt.event.*;
-import javax.swing.*;
-import java.util.concurrent.CountDownLatch;
-
-public class PasswordAsker {
-
-  static String password;
-  static JFrame frame;
-  static CountDownLatch latch;
-
-
-  public PasswordAsker() {
-    password = null;
-    javax.swing.SwingUtilities.invokeLater(new Runnable() {
-      public void run() {
-        createAndShowGUI();
-      }
-    });
-    latch = new CountDownLatch(1);
-  }
-
-
-  private static void createAndShowGUI() {
-    //Create and set up the window.
-    frame = new JFrame("Password");
-    frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
-    frame.setContentPane(setMainPane());
-
-    //Display the window.
-    frame.pack();
-    frame.setVisible(true);
-  }
-
-
-  private static JPanel setMainPane() {
-    JPanel rootPanel = new JPanel(false);
-    rootPanel.setLayout(new GridLayout(0, 1));
-
-    JPanel infoPanel = new JPanel(false);
-    JLabel infoLabel = new JLabel("Please write here the password that you entered for the mySQL root account.\r\nNo information is stored nor sent. I promise.");
-    infoPanel.add(infoLabel);
-
-    JPanel passPanel = new JPanel(false);
-    passPanel.setLayout(new GridLayout(1, 0));
-    JLabel passLabel = new JLabel("password");
-    final JTextField passText = new JTextField(20);
-    passLabel.setLabelFor(passText);
-    passPanel.add(passLabel);
-    passPanel.add(passText);
-
-    JPanel  okPanel  = new JPanel(false);
-    JButton okButton = new JButton("OK");
-    okPanel.add(okButton);
-
-    okButton.addActionListener(new ActionListener() {
-      public void actionPerformed(ActionEvent e) {
-        password = passText.getText();
-        frame.setVisible(false);
-        frame.dispose();
-        latch.countDown();
-      }
-    });
-
-    rootPanel.add(infoPanel);
-    rootPanel.add(passPanel);
-    rootPanel.add(okPanel);
-
-    return rootPanel;
-  }
-
-
-  public boolean waitForPassword() {
-    try {
-      latch.await();
-    }
-    catch (InterruptedException e) {
-      return false;
-    }
-    return true;
-  }
-
-
-  public String getPassword() {
-    return password;
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/Old/SmartInstaller.java
--- a/SMART/Java/Installer/Old/SmartInstaller.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,167 +0,0 @@
-import java.util.*;
-import java.awt.*;
-import java.awt.event.ActionEvent;
-import java.awt.event.ActionListener;
-import java.io.*;
-import javax.swing.*;
-import javax.swing.filechooser.*;
-import javax.swing.border.*;
-import javax.swing.SwingUtilities;
-import java.net.*;
-
-public class SmartInstaller extends JPanel implements ActionListener {
-  int       BUFFER = 1024;
-
-  JFrame    mainFrame;
-  JTextArea logArea;
-
-  // configuration chooser buttons
-  String       configurations[] = {"32 bits", "64 bits"};
-  JRadioButton configurationButtons[];
-
-  // program chooser buttons
-  String    programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "MySQL", "MySQL account", "Python 2.6", "Python DB", "S-MART"};
-  JCheckBox programChooserButtons[];
-
-  JButton   goButton;
-
-  // install directory
-  JButton    installDirectoryChooserButton;
-  JTextField installDirectoryChooserTextField;
-
-
-  public SmartInstaller() {
-    super();
-
-    Box box = Box.createVerticalBox();
-
-    // Header
-    JPanel       headerPanel = new JPanel(false);
-    JTextArea    headerArea  = new JTextArea("This is the S-MART installation tool.\r\nIt will download and install the needed softwares, as well as S-MART itself.\r\nYou can unselect the software that you already have installed.\r\nDuring the installation, accept all the default parameters.\r\nPlease remember the root password if you install MySQL!");
-    TitledBorder headerBorder = BorderFactory.createTitledBorder("Wellcome to the S-MART installer!");
-    headerArea.setEditable(false);
-    headerArea.setBackground(headerPanel.getBackground());
-    headerPanel.add(headerArea);
-    headerPanel.setBorder(headerBorder);
-
-
-    // Configuration
-    JPanel configurationPanel = new JPanel(false);
-    configurationPanel.setLayout(new GridLayout(1, 0));
-    configurationButtons = new JRadioButton[configurations.length];
-    ButtonGroup configurationGroup = new ButtonGroup();
-    for (int i = 0; i < configurations.length; i++) {
-      JRadioButton button = new JRadioButton(configurations[i]);
-      configurationPanel.add(button);
-      configurationButtons[i] = button;
-      configurationGroup.add(button);
-    }
-    configurationButtons[0].setSelected(true);
-    TitledBorder configurationBorder = BorderFactory.createTitledBorder("Configuration");
-    configurationPanel.setBorder(configurationBorder);
-
-
-    // Program chooser panel
-    JPanel programPanel = new JPanel(false);
-    programPanel.setLayout(new GridLayout(0, 1));
-
-    JLabel programLabel = new JLabel("Choose which programs to install:");
-    programPanel.add(programLabel);
-    programChooserButtons = new JCheckBox[programChoosers.length];
-    for (int i = 0; i < programChoosers.length; i++) {
-      JCheckBox button = new JCheckBox(programChoosers[i]);
-      button.setSelected(true);
-      programPanel.add(button);
-      programChooserButtons[i] = button;
-    }
-    TitledBorder programBorder = BorderFactory.createTitledBorder("Programs");
-    programPanel.setBorder(programBorder);
-
-    // Install directory chooser
-    JPanel installDirectoryChooserPanel = new JPanel(false);
-    installDirectoryChooserPanel.setLayout(new GridLayout(1, 0));
-    JLabel installDirectoryChooserLabel = new JLabel("Choose a directory to install S-MART: ");
-    installDirectoryChooserTextField = new JTextField();
-    installDirectoryChooserButton = new JButton("Open...");
-    installDirectoryChooserButton.addActionListener(this);
-
-    installDirectoryChooserPanel.add(installDirectoryChooserLabel);
-    installDirectoryChooserPanel.add(installDirectoryChooserTextField);
-    installDirectoryChooserPanel.add(installDirectoryChooserButton);
-    TitledBorder installDirectoryChooserBorder = BorderFactory.createTitledBorder("Installation directory");
-    installDirectoryChooserPanel.setBorder(installDirectoryChooserBorder);
-
-    // GO!
-    JPanel goPanel = new JPanel(false);
-    goButton = new JButton("GO!");
-    goButton.addActionListener(this);
-    goButton.setSelected(true);
-    goPanel.add(goButton);
-    TitledBorder goBorder = BorderFactory.createTitledBorder("Start install");
-    goPanel.setBorder(goBorder);
-
-    // Log
-    logArea = new JTextArea(10, 120);
-    logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));
-    JScrollPane logScroll  = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
-    TitledBorder logBorder = BorderFactory.createTitledBorder("Log");
-    logScroll.setBorder(logBorder);
-
-    GridLayout horizontalLayout = new GridLayout(1, 0);
-
-    box.add(headerPanel);
-    box.add(configurationPanel);
-    box.add(programPanel);
-    box.add(installDirectoryChooserPanel);
-    box.add(goPanel);
-    box.add(logScroll);
-
-    add(box);
-  }
-
-
-  public void actionPerformed(ActionEvent e) {
-
-    // Install directories chooser
-    if (e.getSource() == goButton) {
-      boolean[] selectedPrograms = new boolean[programChoosers.length];
-      for (int i = 0; i < programChoosers.length; i++) {
-        selectedPrograms[i] = programChooserButtons[i].isSelected();
-      }
-      SmartInstallerTask task = new SmartInstallerTask(logArea, selectedPrograms, installDirectoryChooserTextField.getText(), (configurationButtons[0].isSelected())? 0: 1);
-      task.execute();
-    }
-    // Install directories chooser
-    else if (e.getSource() == installDirectoryChooserButton) {
-      JFileChooser chooser = new JFileChooser();
-      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
-      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {
-        installDirectoryChooserTextField.setText(chooser.getSelectedFile().getPath());
-      }
-    }
-  }
-
-  private static void createAndShowGUI() {
-    // Create and set up the window.
-    JFrame mainFrame = new JFrame("S-Mart Installer");
-    mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
-
-    //Create and set up the content pane.
-    JComponent newContentPane = new SmartInstaller();
-    newContentPane.setOpaque(true);
-    mainFrame.setContentPane(newContentPane);
-
-    // Display the window.
-    mainFrame.pack();
-    mainFrame.setVisible(true);
-  }
-
-
-  public static void main(String[] args) {
-    javax.swing.SwingUtilities.invokeLater(new Runnable() {
-      public void run() {
-        createAndShowGUI();
-      }
-    });
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/Old/SmartInstallerTask.java
--- a/SMART/Java/Installer/Old/SmartInstallerTask.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,455 +0,0 @@\n-import java.util.*;\n-import java.awt.event.ActionEvent;\n-import java.awt.event.ActionListener;\n-import java.io.*;\n-import javax.swing.*;\n-import javax.swing.filechooser.*;\n-import javax.swing.border.*;\n-import javax.swing.SwingUtilities;\n-import java.net.*;\n-import java.util.Stack;\n-import java.util.zip.ZipEntry;\n-import java.util.zip.ZipInputStream;\n-\n-public class SmartInstallerTask extends SwingWorker<Boolean, String> {\n-\n-  int BUFFER = 1024;\n-\n-  int       architecture         = 0;\n-  String    installDirectoryName = null;\n-  JTextArea logArea              = null;\n-  boolean[] selectedPrograms     = null;\n-\n-  // program chooser buttons\n-  String    programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "MySQL", "MySQL account", "Python 2.6", "Python DB", "S-MART"};\n-\n-  // Web addresses for the tools\n-  String packageAddresses[][] = {\n-    {"http://cran.cict.fr/bin/windows/base/R-2.11.0-win32.exe", "http://cran.cict.fr/bin/windows64/base/R-2.11.0-win64.exe"},\n-    {"", ""},\n-    {"", ""},\n-    {"http://mirrors.ircam.fr/pub/mysql/Downloads/MySQL-5.1/mysql-essential-5.1.47-win32.msi", "http://mirrors.ircam.fr/pub/mysql/Downloads/MySQL-5.1/mysql-essential-5.1.47-winx64.msi"},\n-    {"", ""},\n-    {"http://www.python.org/ftp/python/2.6.5/python-2.6.5.msi", "http://www.python.org/ftp/python/2.6.5/python-2.6.5.amd64.msi"},\n-    {"http://www.technicalbard.com/files/MySQL-python-1.2.2.win32-py2.6.exe", "http://www.technicalbard.com/files/MySQL-python-1.2.2.win32-py2.6.exe"},\n-    {"http://urgi.versailles.inra.fr/download/s-mart/s-mart.zip", "http://urgi.versailles.inra.fr/download/s-mart/s-mart.zip"}\n-  };\n-\n-  // Packages to install\n-  String rPackages[] = {"RColorBrewer", "Hmisc"};\n-\n-  // Script lines\n-  String scriptLines[][] = {\n-    {"\\"<INSTALLDIR>\\\\R-2.11.0-win32.exe\\"", "\\"<INSTALLDIR>\\\\R-2.11.0-win64.exe\\""},\n-    {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\""},\n-    {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\""},\n-    {"msiexec /i \\"<INSTALLDIR>\\\\mysql-essential-5.1.47-win32.msi\\"", "msiexec /i \\"<INSTALLDIR>\\\\mysql-essential-5.1.47-winx64.msi\\""},\n-    {"", ""},\n-    {"msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.msi\\"", "msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.amd64.msi\\""},\n-    {"<INSTALLDIR>\\\\MySQL-python-1.2.2.win32-py2.6.exe", "<INSTALLDIR>\\\\MySQL-python-1.2.2.win32-py2.6.exe"},\n-    {"", ""}\n-  };\n-\n-  // Files to uncompress\n-  String compressedFiles[][] = {\n-    {"", ""},\n-    {"", ""},\n-    {"", ""},\n-    {"", ""},\n-    {"", ""},\n-    {"", ""},\n-    {"", ""},\n-    {"<INSTALLDIR>\\\\s-mart.zip", "<INSTALLDIR>\\\\s-mart.zip"}\n-  };\n-\n-\n-  public SmartInstallerTask(JTextArea ta, boolean[] b, String s, int a) {\n-    logArea              = ta;\n-    selectedPrograms     = b;\n-    installDirectoryName = s;\n-    architecture         = a;\n-  }\n-\n-\n-  @Override\n-  public Boolean doInBackground() {\n-    boolean installOk;\n-    publish("Starting install\\n");\n-    writeFiles();\n-    for (int i = 0; i < selectedPrograms.length; i++) {\n-      if (selectedPrograms[i]) {\n-        if (! install(i)) {\n-          return Boolean.FALSE;\n-        }\n-      }\n-    }\n-    removeFiles();\n-    setEnvironmentVariables();\n-    publish("Ending install\\n");\n-    return Boolean.TRUE;\n-  }\n-\n-\n-  @Override\n-  protected void process(List<String> chunks) {\n-    for (String chunk: chunks) {\n-      logArea.append(chunk);\n-    }\n-  }\n-\n-\n-  private boolean launch(String command) {\n-    return realLaunch(new ProcessBuilder(command), command);\n-  }\n-\n-  private boolean launch(String[] command) {\n-    return realLaunch(new ProcessBuilder(command), Arrays.toString(command));\n-  }\n-\n-  private boolean realLaunch(ProcessBuilder pb, String command) {\n-    BufferedReader outputReader;\n-    pb                          = pb.redirectErrorStream(true);\n-    Process        process   '..b' boolean uncompressPackage(int element) {\n-    String file = compressedFiles[element][architecture];\n-    if (! "".equals(file)) {\n-      file = replaceSubstring(file);\n-      publish("    Starting uncompressing file \'" + file + "\'\\n");\n-      try {\n-        FileInputStream     fis = new FileInputStream(file);\n-        BufferedInputStream bis = new BufferedInputStream(fis);\n-        ZipInputStream      zis = new ZipInputStream(bis);\n-        ZipEntry            entry;\n-        while ((entry = zis.getNextEntry()) != null) {\n-          if (! entry.isDirectory()) {\n-            File newFile = new File(installDirectoryName + File.separator + entry.getName());\n-            // create parent directories\n-            File upDirectory = newFile.getParentFile();\n-            while (upDirectory != null){\n-              if (! upDirectory.exists()) {\n-                upDirectory.mkdir();\n-                publish("      Creating directory \'" + upDirectory.getAbsolutePath() + "\'\\n");\n-              }\n-              upDirectory = upDirectory.getParentFile();\n-            }\n-            // write the files to the disk\n-            publish("      Extracting \'" + entry.getName() + "\' to \'" + newFile.getAbsolutePath() + "\'\\n");\n-            int  count;\n-            byte data[] = new byte[BUFFER];\n-            FileOutputStream     fos = new FileOutputStream(newFile);\n-            BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER);\n-            while ((count = zis.read(data, 0, BUFFER)) != -1){\n-              bos.write(data, 0, count);\n-            }\n-            bos.flush();\n-            bos.close();\n-            fos.close();\n-          }\n-        }\n-        zis.close();\n-        bis.close();\n-        fis.close();\n-      }\n-      catch(FileNotFoundException e) {\n-        publish("    !Cannot find file \'" + file + "\'!\\n");\n-        return false;\n-      }\n-      catch(Exception e){\n-        publish("    !Cannot uncompress file \'" + file + "\'!\\n");\n-        return false;\n-      }\n-      publish("    Ending uncompressing file \'" + file + "\'\\n");\n-    }\n-    return true;\n-  }\n-\n-\n-  private boolean removePackage(int element) {\n-    String packageName = packageAddresses[element][architecture];\n-    if ("".equals(packageName)) {\n-      return true;\n-    }\n-    String fileName = getLocalName(packageAddresses[element][architecture]);\n-    return removeFile(fileName);\n-  }\n-\n-\n-  private boolean postProcess(int element) {\n-    switch (element) {\n-      case 4:\n-        // Create mySQL user\n-        PasswordAsker pa = new PasswordAsker();\n-        if (! pa.waitForPassword()) {\n-          publish("Problem in the password asker!\\n");\n-          return false;\n-        }\n-        String command = "\\"<MYSQLFILE>\\" --user=root --password=" + pa.getPassword() + " -e \\"source <INSTALLDIR>\\\\createUser.sql\\"";\n-        command        = replaceSubstring(command);\n-        if (! launch(command)) {\n-          publish("    !Cannot create SQL accounts!\\n");\n-          return false;\n-        }\n-        return true;\n-      case 7:\n-        // Move S-MART files to parent directory\n-        File installDirectory = new File(installDirectoryName + File.separator + "S-Mart");\n-        for (File file: installDirectory.listFiles()) {\n-          File destinationFile = new File(file.getParentFile().getParentFile(), file.getName());\n-          if (! file.renameTo(destinationFile)) {\n-            publish("     !Cannot move \'" + file.getAbsolutePath() + "\' to \'" + destinationFile.getAbsolutePath() + "\'!\\n");\n-          }\n-        }\n-        if (! installDirectory.delete()) {\n-          publish("     !Cannot remove installation S-MART directory \'" + installDirectory.getAbsolutePath() + "\'!\\n");\n-        }\n-    }\n-    return true;\n-  }\n-\n-\n-  private boolean setEnvironmentVariables() {\n-    String[] command = {"REG", "ADD", "HKCU\\\\Environment", "/v", "PYTHONPATH", "/t", "REG_SZ", "/d", "\\"" + installDirectoryName + "\\\\Python\\"", "/f"};\n-    return launch(command);\n-  }\n-}\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/PasswordAsker.java
--- a/SMART/Java/Installer/PasswordAsker.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,87 +0,0 @@
-import java.awt.*;
-import java.awt.event.*;
-import javax.swing.*;
-import java.util.concurrent.CountDownLatch;
-
-public class PasswordAsker {
-
-  static String password;
-  static JFrame frame;
-  static CountDownLatch latch;
-
-
-  public PasswordAsker() {
-    password = null;
-    javax.swing.SwingUtilities.invokeLater(new Runnable() {
-      public void run() {
-        createAndShowGUI();
-      }
-    });
-    latch = new CountDownLatch(1);
-  }
-
-
-  private static void createAndShowGUI() {
-    //Create and set up the window.
-    frame = new JFrame("Password");
-    frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
-    frame.setContentPane(setMainPane());
-
-    //Display the window.
-    frame.pack();
-    frame.setVisible(true);
-  }
-
-
-  private static JPanel setMainPane() {
-    JPanel rootPanel = new JPanel(false);
-    rootPanel.setLayout(new GridLayout(0, 1));
-
-    JPanel infoPanel = new JPanel(false);
-    JLabel infoLabel = new JLabel("Please write here the password that you entered for the mySQL root account.\r\nNo information is stored nor sent. I promise.");
-    infoPanel.add(infoLabel);
-
-    JPanel passPanel = new JPanel(false);
-    passPanel.setLayout(new GridLayout(1, 0));
-    JLabel passLabel = new JLabel("password");
-    final JTextField passText = new JTextField(20);
-    passLabel.setLabelFor(passText);
-    passPanel.add(passLabel);
-    passPanel.add(passText);
-
-    JPanel  okPanel  = new JPanel(false);
-    JButton okButton = new JButton("OK");
-    okPanel.add(okButton);
-
-    okButton.addActionListener(new ActionListener() {
-      public void actionPerformed(ActionEvent e) {
-        password = passText.getText();
-        frame.setVisible(false);
-        frame.dispose();
-        latch.countDown();
-      }
-    });
-
-    rootPanel.add(infoPanel);
-    rootPanel.add(passPanel);
-    rootPanel.add(okPanel);
-
-    return rootPanel;
-  }
-
-
-  public boolean waitForPassword() {
-    try {
-      latch.await();
-    }
-    catch (InterruptedException e) {
-      return false;
-    }
-    return true;
-  }
-
-
-  public String getPassword() {
-    return password;
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/SmartInstaller.jar
b
Binary file SMART/Java/Installer/SmartInstaller.jar has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/SmartInstaller.java
--- a/SMART/Java/Installer/SmartInstaller.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,167 +0,0 @@
-import java.util.*;
-import java.awt.*;
-import java.awt.event.ActionEvent;
-import java.awt.event.ActionListener;
-import java.io.*;
-import javax.swing.*;
-import javax.swing.filechooser.*;
-import javax.swing.border.*;
-import javax.swing.SwingUtilities;
-import java.net.*;
-
-public class SmartInstaller extends JPanel implements ActionListener {
-  int       BUFFER = 1024;
-
-  JFrame    mainFrame;
-  JTextArea logArea;
-
-  // configuration chooser buttons
-  String       configurations[] = {"32 bits", "64 bits"};
-  JRadioButton configurationButtons[];
-
-  // program chooser buttons
-  String    programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "Python 2.6", "S-MART"};
-  JCheckBox programChooserButtons[];
-
-  JButton   goButton;
-
-  // install directory
-  JButton    installDirectoryChooserButton;
-  JTextField installDirectoryChooserTextField;
-
-
-  public SmartInstaller() {
-    super();
-
-    Box box = Box.createVerticalBox();
-
-    // Header
-    JPanel       headerPanel = new JPanel(false);
-    JTextArea    headerArea  = new JTextArea("This is the S-MART installation tool.\r\nIt will download and install the needed softwares, as well as S-MART itself.\r\nYou can unselect the software that you already have installed.\r\nDuring the installation, accept all the default parameters.");
-    TitledBorder headerBorder = BorderFactory.createTitledBorder("Welcome to the S-MART installer!");
-    headerArea.setEditable(false);
-    headerArea.setBackground(headerPanel.getBackground());
-    headerPanel.add(headerArea);
-    headerPanel.setBorder(headerBorder);
-
-
-    // Configuration
-    JPanel configurationPanel = new JPanel(false);
-    configurationPanel.setLayout(new GridLayout(1, 0));
-    configurationButtons = new JRadioButton[configurations.length];
-    ButtonGroup configurationGroup = new ButtonGroup();
-    for (int i = 0; i < configurations.length; i++) {
-      JRadioButton button = new JRadioButton(configurations[i]);
-      configurationPanel.add(button);
-      configurationButtons[i] = button;
-      configurationGroup.add(button);
-    }
-    configurationButtons[0].setSelected(true);
-    TitledBorder configurationBorder = BorderFactory.createTitledBorder("Configuration");
-    configurationPanel.setBorder(configurationBorder);
-
-
-    // Program chooser panel
-    JPanel programPanel = new JPanel(false);
-    programPanel.setLayout(new GridLayout(0, 1));
-
-    JLabel programLabel = new JLabel("Choose which programs to install:");
-    programPanel.add(programLabel);
-    programChooserButtons = new JCheckBox[programChoosers.length];
-    for (int i = 0; i < programChoosers.length; i++) {
-      JCheckBox button = new JCheckBox(programChoosers[i]);
-      button.setSelected(true);
-      programPanel.add(button);
-      programChooserButtons[i] = button;
-    }
-    TitledBorder programBorder = BorderFactory.createTitledBorder("Programs");
-    programPanel.setBorder(programBorder);
-
-    // Install directory chooser
-    JPanel installDirectoryChooserPanel = new JPanel(false);
-    installDirectoryChooserPanel.setLayout(new GridLayout(1, 0));
-    JLabel installDirectoryChooserLabel = new JLabel("Choose a directory to install S-MART: ");
-    installDirectoryChooserTextField = new JTextField();
-    installDirectoryChooserButton = new JButton("Open...");
-    installDirectoryChooserButton.addActionListener(this);
-
-    installDirectoryChooserPanel.add(installDirectoryChooserLabel);
-    installDirectoryChooserPanel.add(installDirectoryChooserTextField);
-    installDirectoryChooserPanel.add(installDirectoryChooserButton);
-    TitledBorder installDirectoryChooserBorder = BorderFactory.createTitledBorder("Installation directory");
-    installDirectoryChooserPanel.setBorder(installDirectoryChooserBorder);
-
-    // GO!
-    JPanel goPanel = new JPanel(false);
-    goButton = new JButton("GO!");
-    goButton.addActionListener(this);
-    goButton.setSelected(true);
-    goPanel.add(goButton);
-    TitledBorder goBorder = BorderFactory.createTitledBorder("Start install");
-    goPanel.setBorder(goBorder);
-
-    // Log
-    logArea = new JTextArea(10, 120);
-    logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));
-    JScrollPane logScroll  = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);
-    TitledBorder logBorder = BorderFactory.createTitledBorder("Log");
-    logScroll.setBorder(logBorder);
-
-    GridLayout horizontalLayout = new GridLayout(1, 0);
-
-    box.add(headerPanel);
-    box.add(configurationPanel);
-    box.add(programPanel);
-    box.add(installDirectoryChooserPanel);
-    box.add(goPanel);
-    box.add(logScroll);
-
-    add(box);
-  }
-
-
-  public void actionPerformed(ActionEvent e) {
-
-    // Install directories chooser
-    if (e.getSource() == goButton) {
-      boolean[] selectedPrograms = new boolean[programChoosers.length];
-      for (int i = 0; i < programChoosers.length; i++) {
-        selectedPrograms[i] = programChooserButtons[i].isSelected();
-      }
-      SmartInstallerTask task = new SmartInstallerTask(logArea, selectedPrograms, installDirectoryChooserTextField.getText(), (configurationButtons[0].isSelected())? 0: 1);
-      task.execute();
-    }
-    // Install directories chooser
-    else if (e.getSource() == installDirectoryChooserButton) {
-      JFileChooser chooser = new JFileChooser();
-      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
-      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {
-        installDirectoryChooserTextField.setText(chooser.getSelectedFile().getPath());
-      }
-    }
-  }
-
-  private static void createAndShowGUI() {
-    // Create and set up the window.
-    JFrame mainFrame = new JFrame("S-Mart Installer");
-    mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
-
-    //Create and set up the content pane.
-    JComponent newContentPane = new SmartInstaller();
-    newContentPane.setOpaque(true);
-    mainFrame.setContentPane(newContentPane);
-
-    // Display the window.
-    mainFrame.pack();
-    mainFrame.setVisible(true);
-  }
-
-
-  public static void main(String[] args) {
-    javax.swing.SwingUtilities.invokeLater(new Runnable() {
-      public void run() {
-        createAndShowGUI();
-      }
-    });
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/SmartInstallerTask.java
--- a/SMART/Java/Installer/SmartInstallerTask.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,419 +0,0 @@\n-import java.util.*;\n-import java.awt.event.ActionEvent;\n-import java.awt.event.ActionListener;\n-import java.io.*;\n-import javax.swing.*;\n-import javax.swing.filechooser.*;\n-import javax.swing.border.*;\n-import javax.swing.SwingUtilities;\n-import java.net.*;\n-import java.util.Stack;\n-import java.util.zip.ZipEntry;\n-import java.util.zip.ZipInputStream;\n-\n-public class SmartInstallerTask extends SwingWorker<Boolean, String> {\n-\n-  int BUFFER = 1024;\n-\n-  int       architecture         = 0;\n-  String    installDirectoryName = null;\n-  JTextArea logArea              = null;\n-  boolean[] selectedPrograms     = null;\n-\n-  // program chooser buttons\n-  String    programChoosers[] = {"R", "R Color Brewer Package", "R HMisc Package", "Python 2.6", "S-MART"};\n-\n-  // Web addresses for the tools\n-  String packageAddresses[][] = {\n-    {"http://cran.cict.fr/bin/windows/base/R-2.11.0-win32.exe", "http://cran.cict.fr/bin/windows64/base/R-2.11.0-win64.exe"},\n-    {"", ""},\n-    {"", ""},\n-    {"http://www.python.org/ftp/python/2.6.5/python-2.6.5.msi", "http://www.python.org/ftp/python/2.6.5/python-2.6.5.amd64.msi"},\n-    {"http://urgi.versailles.inra.fr/content/download/1929/17848/file/s-mart-1.0.15.zip", "http://urgi.versailles.inra.fr/content/download/1929/17848/file/s-mart-1.0.15.zip"}\n-  };\n-\n-  // Packages to install\n-  String rPackages[] = {"RColorBrewer", "Hmisc"};\n-\n-  // Script lines\n-  String scriptLines[][] = {\n-    {"\\"<INSTALLDIR>\\\\R-2.11.0-win32.exe\\"", "\\"<INSTALLDIR>\\\\R-2.11.0-win64.exe\\""},\n-    {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installRColorBrewer.R\\""},\n-    {"\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\"", "\\"<RFILE>\\" CMD BATCH \\"<INSTALLDIR>\\\\installHmisc.R\\""},\n-    {"msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.msi\\"", "msiexec /i \\"<INSTALLDIR>\\\\python-2.6.5.amd64.msi\\""},\n-    {"", ""}\n-  };\n-\n-  // Files to uncompress\n-  String compressedFiles[][] = {\n-    {"", ""},\n-    {"", ""},\n-    {"", ""},\n-    {"", ""},\n-    {"<INSTALLDIR>\\\\s-mart-1.0.15.zip", "<INSTALLDIR>\\\\s-mart-1.0.15.zip"}\n-  };\n-\n-\n-  public SmartInstallerTask(JTextArea ta, boolean[] b, String s, int a) {\n-    logArea              = ta;\n-    selectedPrograms     = b;\n-    installDirectoryName = s;\n-    architecture         = a;\n-  }\n-\n-\n-  @Override\n-  public Boolean doInBackground() {\n-    boolean installOk;\n-    publish("Starting install\\n");\n-    writeFiles();\n-    for (int i = 0; i < selectedPrograms.length; i++) {\n-      if (selectedPrograms[i]) {\n-        if (! install(i)) {\n-          return Boolean.FALSE;\n-        }\n-      }\n-    }\n-    removeFiles();\n-    setEnvironmentVariables();\n-    publish("Ending install\\n");\n-    return Boolean.TRUE;\n-  }\n-\n-\n-  @Override\n-  protected void process(List<String> chunks) {\n-    for (String chunk: chunks) {\n-      logArea.append(chunk);\n-    }\n-  }\n-\n-\n-  private boolean launch(String command) {\n-    return realLaunch(new ProcessBuilder(command), command);\n-  }\n-\n-  private boolean launch(String[] command) {\n-    return realLaunch(new ProcessBuilder(command), Arrays.toString(command));\n-  }\n-\n-  private boolean realLaunch(ProcessBuilder pb, String command) {\n-    BufferedReader outputReader;\n-    pb                          = pb.redirectErrorStream(true);\n-    Process        process      = null;\n-    publish("      Starting command \'" + command + "\'\\n");\n-    try {\n-      process = pb.start();\n-      BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream());\n-      InputStream is        = process.getInputStream();\n-      InputStreamReader isr = new InputStreamReader(is);\n-      outputReader          = new BufferedReader(isr);\n-    }\n-    catch (Exception exception) {\n-      publish("      !Process cannot be started (command is \'" + command + "\')!\\n");\n-      exception.printStackTrace();\n-      return false;\n-    }\n-    if (outputReader == null) {\n-      publish("      !Problem in the outp'..b'turn false;\n-        }\n-        try {\n-          process.waitFor();\n-        }\n-        catch (InterruptedException e) {\n-          publish("    !Cannot wait for the end of the command \'" + command + "\'!\\n");\n-          return false;\n-        }\n-        int exitValue = process.exitValue();\n-        if (exitValue != 0) {\n-          publish("    !Problem during the execution of the command \'" + command + "\'!\\n");\n-          return false;\n-        }\n-        publish("    Ending command \'" + command + "\'\\n");\n-      }\n-    }\n-    return true;\n-  }\n-\n-\n-  private boolean uncompressPackage(int element) {\n-    String file = compressedFiles[element][architecture];\n-    if (! "".equals(file)) {\n-      file = replaceSubstring(file);\n-      publish("    Starting uncompressing file \'" + file + "\'\\n");\n-      try {\n-        FileInputStream     fis = new FileInputStream(file);\n-        BufferedInputStream bis = new BufferedInputStream(fis);\n-        ZipInputStream      zis = new ZipInputStream(bis);\n-        ZipEntry            entry;\n-        while ((entry = zis.getNextEntry()) != null) {\n-          if (! entry.isDirectory()) {\n-            File newFile = new File(installDirectoryName + File.separator + entry.getName());\n-            // create parent directories\n-            File upDirectory = newFile.getParentFile();\n-            while (upDirectory != null){\n-              if (! upDirectory.exists()) {\n-                upDirectory.mkdir();\n-                publish("      Creating directory \'" + upDirectory.getAbsolutePath() + "\'\\n");\n-              }\n-              upDirectory = upDirectory.getParentFile();\n-            }\n-            // write the files to the disk\n-            publish("      Extracting \'" + entry.getName() + "\' to \'" + newFile.getAbsolutePath() + "\'\\n");\n-            int  count;\n-            byte data[] = new byte[BUFFER];\n-            FileOutputStream     fos = new FileOutputStream(newFile);\n-            BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER);\n-            while ((count = zis.read(data, 0, BUFFER)) != -1){\n-              bos.write(data, 0, count);\n-            }\n-            bos.flush();\n-            bos.close();\n-            fos.close();\n-          }\n-        }\n-        zis.close();\n-        bis.close();\n-        fis.close();\n-      }\n-      catch(FileNotFoundException e) {\n-        publish("    !Cannot find file \'" + file + "\'!\\n");\n-        return false;\n-      }\n-      catch(Exception e){\n-        publish("    !Cannot uncompress file \'" + file + "\'!\\n");\n-        return false;\n-      }\n-      publish("    Ending uncompressing file \'" + file + "\'\\n");\n-    }\n-    return true;\n-  }\n-\n-\n-  private boolean removePackage(int element) {\n-    String packageName = packageAddresses[element][architecture];\n-    if ("".equals(packageName)) {\n-      return true;\n-    }\n-    String fileName = getLocalName(packageAddresses[element][architecture]);\n-    return removeFile(fileName);\n-  }\n-\n-\n-  private boolean postProcess(int element) {\n-    switch (element) {\n-      case 4:\n-        // Move S-MART files to parent directory\n-        File installDirectory = new File(installDirectoryName + File.separator + "S-Mart");\n-        for (File file: installDirectory.listFiles()) {\n-          File destinationFile = new File(file.getParentFile().getParentFile(), file.getName());\n-          if (! file.renameTo(destinationFile)) {\n-            publish("     !Cannot move \'" + file.getAbsolutePath() + "\' to \'" + destinationFile.getAbsolutePath() + "\'!\\n");\n-          }\n-        }\n-        if (! installDirectory.delete()) {\n-          publish("     !Cannot remove installation S-MART directory \'" + installDirectory.getAbsolutePath() + "\'!\\n");\n-        }\n-    }\n-    return true;\n-  }\n-\n-\n-  private boolean setEnvironmentVariables() {\n-    String[] command = {"REG", "ADD", "HKCU\\\\Environment", "/v", "PYTHONPATH", "/t", "REG_SZ", "/d", "\\"" + installDirectoryName + "\\\\Python\\"", "/f"};\n-    return launch(command);\n-  }\n-}\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/build.sh
--- a/SMART/Java/Installer/build.sh Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-#! /bin/sh
-
-rm -rf SmartInstaller.jar
-javac *.java
-jar cvfm SmartInstaller.jar manifest.txt *.class
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/manifest.txt
--- a/SMART/Java/Installer/manifest.txt Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-Manifest-Version: 1.0
-Created-By: Matthias Zytnicki
-Main-Class: SmartInstaller
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Installer/s-mart.zip
b
Binary file SMART/Java/Installer/s-mart.zip has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/LICENSE.txt
--- a/SMART/Java/LICENSE.txt Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,506 +0,0 @@\n-\n-CeCILL FREE SOFTWARE LICENSE AGREEMENT\n-\n-\n-    Notice\n-\n-This Agreement is a Free Software license agreement that is the result\n-of discussions between its authors in order to ensure compliance with\n-the two main principles guiding its drafting:\n-\n-    * firstly, compliance with the principles governing the distribution\n-      of Free Software: access to source code, broad rights granted to\n-      users,\n-    * secondly, the election of a governing law, French law, with which\n-      it is conformant, both as regards the law of torts and\n-      intellectual property law, and the protection that it offers to\n-      both authors and holders of the economic rights over software.\n-\n-The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])\n-license are:\n-\n-Commissariat \xe0 l\'Energie Atomique - CEA, a public scientific, technical\n-and industrial research establishment, having its principal place of\n-business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.\n-\n-Centre National de la Recherche Scientifique - CNRS, a public scientific\n-and technological establishment, having its principal place of business\n-at 3 rue Michel-Ange, 75794 Paris cedex 16, France.\n-\n-Institut National de Recherche en Informatique et en Automatique -\n-INRIA, a public scientific and technological establishment, having its\n-principal place of business at Domaine de Voluceau, Rocquencourt, BP\n-105, 78153 Le Chesnay cedex, France.\n-\n-\n-    Preamble\n-\n-The purpose of this Free Software license agreement is to grant users\n-the right to modify and redistribute the software governed by this\n-license within the framework of an open source distribution model.\n-\n-The exercising of these rights is conditional upon certain obligations\n-for users so as to preserve this status for all subsequent redistributions.\n-\n-In consideration of access to the source code and the rights to copy,\n-modify and redistribute granted by the license, users are provided only\n-with a limited warranty and the software\'s author, the holder of the\n-economic rights, and the successive licensors only have limited liability.\n-\n-In this respect, the risks associated with loading, using, modifying\n-and/or developing or reproducing the software by the user are brought to\n-the user\'s attention, given its Free Software status, which may make it\n-complicated to use, with the result that its use is reserved for\n-developers and experienced professionals having in-depth computer\n-knowledge. Users are therefore encouraged to load and test the\n-suitability of the software as regards their requirements in conditions\n-enabling the security of their systems and/or data to be ensured and,\n-more generally, to use and operate it in the same conditions of\n-security. This Agreement may be freely reproduced and published,\n-provided it is not altered, and that no provisions are either added or\n-removed herefrom.\n-\n-This Agreement may apply to any or all software for which the holder of\n-the economic rights decides to submit the use thereof to its provisions.\n-\n-\n-    Article 1 - DEFINITIONS\n-\n-For the purpose of this Agreement, when the following expressions\n-commence with a capital letter, they shall have the following meaning:\n-\n-Agreement: means this license agreement, and its possible subsequent\n-versions and annexes.\n-\n-Software: means the software in its Object Code and/or Source Code form\n-and, where applicable, its documentation, "as is" when the Licensee\n-accepts the Agreement.\n-\n-Initial Software: means the Software in its Source Code and possibly its\n-Object Code form and, where applicable, its documentation, "as is" when\n-it is first distributed under the terms and conditions of the Agreement.\n-\n-Modified Software: means the Software modified by at least one\n-Contribution.\n-\n-Source Code: means all the Software\'s instructions and program lines to\n-which access is required so as to modify the Software.\n-\n-Object Code: means the binary files originating from the co'..b"a case-by-case basis between the relevant Licensor and the\n-Licensee pursuant to a memorandum of understanding. The Licensor\n-disclaims any and all liability as regards the Licensee's use of the\n-name of the Software. No warranty is given as regards the existence of\n-prior rights over the name of the Software or as regards the existence\n-of a trademark.\n-\n-\n-    Article 10 - TERMINATION\n-\n-10.1 In the event of a breach by the Licensee of its obligations\n-hereunder, the Licensor may automatically terminate this Agreement\n-thirty (30) days after notice has been sent to the Licensee and has\n-remained ineffective.\n-\n-10.2 A Licensee whose Agreement is terminated shall no longer be\n-authorized to use, modify or distribute the Software. However, any\n-licenses that it may have granted prior to termination of the Agreement\n-shall remain valid subject to their having been granted in compliance\n-with the terms and conditions hereof.\n-\n-\n-    Article 11 - MISCELLANEOUS\n-\n-\n-      11.1 EXCUSABLE EVENTS\n-\n-Neither Party shall be liable for any or all delay, or failure to\n-perform the Agreement, that may be attributable to an event of force\n-majeure, an act of God or an outside cause, such as defective\n-functioning or interruptions of the electricity or telecommunications\n-networks, network paralysis following a virus attack, intervention by\n-government authorities, natural disasters, water damage, earthquakes,\n-fire, explosions, strikes and labor unrest, war, etc.\n-\n-11.2 Any failure by either Party, on one or more occasions, to invoke\n-one or more of the provisions hereof, shall under no circumstances be\n-interpreted as being a waiver by the interested Party of its right to\n-invoke said provision(s) subsequently.\n-\n-11.3 The Agreement cancels and replaces any or all previous agreements,\n-whether written or oral, between the Parties and having the same\n-purpose, and constitutes the entirety of the agreement between said\n-Parties concerning said purpose. No supplement or modification to the\n-terms and conditions hereof shall be effective as between the Parties\n-unless it is made in writing and signed by their duly authorized\n-representatives.\n-\n-11.4 In the event that one or more of the provisions hereof were to\n-conflict with a current or future applicable act or legislative text,\n-said act or legislative text shall prevail, and the Parties shall make\n-the necessary amendments so as to comply with said act or legislative\n-text. All other provisions shall remain effective. Similarly, invalidity\n-of a provision of the Agreement, for any reason whatsoever, shall not\n-cause the Agreement as a whole to be invalid.\n-\n-\n-      11.5 LANGUAGE\n-\n-The Agreement is drafted in both French and English and both versions\n-are deemed authentic.\n-\n-\n-    Article 12 - NEW VERSIONS OF THE AGREEMENT\n-\n-12.1 Any person is authorized to duplicate and distribute copies of this\n-Agreement.\n-\n-12.2 So as to ensure coherence, the wording of this Agreement is\n-protected and may only be modified by the authors of the License, who\n-reserve the right to periodically publish updates or new versions of the\n-Agreement, each with a separate number. These subsequent versions may\n-address new issues encountered by Free Software.\n-\n-12.3 Any Software distributed under a given version of the Agreement may\n-only be subsequently distributed under the same version of the Agreement\n-or a subsequent version, subject to the provisions of Article 5.3.4.\n-\n-\n-    Article 13 - GOVERNING LAW AND JURISDICTION\n-\n-13.1 The Agreement is governed by French law. The Parties agree to\n-endeavor to seek an amicable solution to any disagreements or disputes\n-that may arise during the performance of the Agreement.\n-\n-13.2 Failing an amicable solution within two (2) months as from their\n-occurrence, and unless emergency proceedings are necessary, the\n-disagreements or disputes shall be referred to the Paris Courts having\n-jurisdiction, by the more diligent Party.\n-\n-\n-Version 2.0 dated 2006-09-05.\n"
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Program.java
--- a/SMART/Java/Program.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,175 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-import java.awt.*;
-import javax.swing.*;
-
-
-public class Program {
-  String                 shortName;
-  String                 name;
-  String                 section;
-  String                 description;
-  Vector <ProgramOption> options;
-  JPanel                 panel;
-  JButton                button;
-
-
-  public Program() {
-    this.shortName = null;  
-    this.name      = null;  
-    this.options   = new Vector <ProgramOption> ();  
-  }
-
-
-  public void setShortName(String shortName) {
-    this.shortName = shortName;
-  }
-
-
-  public void setName(String name) {
-    this.name = name;
-  }
-
-
-  public void setSection(String section) {
-    this.section = section;
-  }
-
-  public void setDescription(String description) {
-    this.description = description;
-  }
-
-
-  public void addOption(ProgramOption option) {
-    options.add(option);
-  }
-
-
-  public String getShortName() {
-    return this.shortName;
-  }
-
-
-  public String getName() {
-    return this.name;
-  }
-
-
-  public String getSection() {
-    return this.section;
-  }
-
-  public String getDescription() {
-    return this.description;
-  }
-
-
-  public String checkValues() {
-    for (int i = 0; i < options.size(); i++) {
-      String comment = options.get(i).checkValue();
-      if (comment != null) {
-        return comment;
-      }
-    }
-    return null;
-  }
-
-
-  public LinkedList<String> getCommand() {
-    LinkedList<String> parameterList = new LinkedList<String>();
-    parameterList.add(Global.pythonCommand);
-    parameterList.add("Python" + java.io.File.separator + this.shortName);
-    for (int i = 0; i < options.size(); i++) {
-      ProgramOption option = options.get(i);
-      parameterList.addAll(option.getCommand());
-    }
-    return parameterList;
-  }
-
-
-  public JPanel getPanel() {
-    if (this.panel != null) {
-      return this.panel;
-    }
-    
-    this.panel = new JPanel(false);
-    this.panel.setLayout(new FlowLayout());
-    Box box = Box.createVerticalBox();
-
-    JPanel descriptionPanel = new JPanel(false);
-    JLabel descriptionLabel = new JLabel(this.description);
-    descriptionPanel.add(descriptionLabel);
-    box.add(descriptionPanel);
-
-    for (int i = 0; i < options.size(); i++) {
-      ProgramOption option = options.get(i);
-      JPanel        panel  = option.getPanel();
-      if (panel == null) {
-        System.out.println("Problem with Python program '" + this.shortName + "'.");
-        return null;
-      }
-      box.add(option.getPanel());
-    }
-
-    JPanel buttonPanel = new JPanel(false);
-    this.button = new JButton("GO!");
-
-    buttonPanel.add(button);
-
-    box.add(buttonPanel);
-
-    this.panel.add(box);
-
-    return this.panel;
-  }
-
-
-  public JButton getButton() {
-    if (this.button == null) {
-      this.getPanel();
-    }
-    return this.button;
-  }
-
-  
-  public Vector < File > getOutputFiles() {
-    Vector < File > files = new Vector < File > ();
-    for (int i = 0; i < options.size(); i++) {
-      ProgramOption option = options.get(i);
-      if (! option.isInput()) {
-        files.add(option.getOutputFile());
-      }
-    }
-    return files;
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/ProgramFileReader.java
--- a/SMART/Java/ProgramFileReader.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,174 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-import java.io.File;
-import java.io.*;
-
-
-public class ProgramFileReader {
-  String fileName;
-  Vector <Program> programs;
-
-
-  public ProgramFileReader(String fileName) {
-    this.fileName = fileName;  
-    this.programs = new Vector <Program> ();
-  }
-
-
-  public boolean read() {
-//  File    file    = new File(this.fileName);
-//  Program program = null;
-//  int     step    = 0;
-//  TreeMap <String, ProgramOption> options = new TreeMap <String, ProgramOption> ();
-
-//  try {
-//    BufferedReader reader = new BufferedReader(new FileReader(file));
-//    String line    = null;
-//    String section = null;
-
-//    while ((line = reader.readLine()) != null) {
-
-//      line = line.trim();
-
-//      if (line.length() == 0) {
-//        if (program != null) {
-//          programs.add(program);
-//        }
-//        program = null;
-//        step = 0;
-//        continue;
-//      }
-
-//      if ((line.charAt(0) == '[') && (line.charAt(line.length() - 1) == ']')) {
-//        section = line.substring(1, line.length() - 1).trim();
-//        continue;
-//      }
-//      switch (step) {
-//        case 0:
-//        program = new Program();
-//          program.setName(line);
-//          if (section == null) {
-//            System.out.println("Error! Section of program '" + line + "' is not set!");
-//          }
-//          program.setSection(section);
-//          step = 1;
-//          break;
-//        case 1:
-//          program.setShortName(line);
-//          step = 2;
-//          break;
-//        case 2:
-//          ProgramOption option = new ProgramOption();
-
-//          String[] elements    = line.split(":");
-//          boolean  input       = elements[0].trim().equalsIgnoreCase("input")? true: false;
-//          String[] subElements = elements[1].split(";");
-//          String   identifier = subElements[0].trim();
-
-//          option.setInput(input);
-
-//          if (input) {
-
-//            if (subElements.length < 4) {
-//              System.out.println("Line '" + line + "' is weird...");
-//            }
-
-//            String   type       = subElements[1].trim();
-//            String   comment    = subElements[2].trim();
-//            boolean  compulsory = subElements[3].trim().equalsIgnoreCase("0")? false: true;
-
-//            option.setIdentifier(identifier);
-//            option.setType(type);
-//            option.setComment(comment);
-//            option.setCompulsory(compulsory);
-
-//            if ("file".compareToIgnoreCase(type) == 0) {
-//              if (subElements.length < 5) {
-//                System.out.println("Line '" + line + "' is weird...");
-//              }
-
-//              String formatIdentifier = subElements[4].trim();
-//              option.setFormatIdentifier(formatIdentifier);
-//            }
-//            else if ("choice".compareToIgnoreCase(type) == 0) {
-//              if (subElements.length < 5) {
-//                System.out.println("Line '" + line + "' is weird...");
-//              }
-
-//              String[] choices = subElements[4].trim().split(",");
-//              for (int i = 0; i < choices.length; i++) {
-//                choices[i] = choices[i].trim();
-//              }
-//              option.setChoices(choices);
-//            }
-//            options.put(identifier, option);
-//          }
-//          else {
-//            String format = subElements[1].trim();
-
-//            option.setFormat(format);
-//            option.setAssociatedOption(options.get(identifier));
-//          }
-
-//          program.addOption(option);
-
-//          break;
-//        default:
-//          return false;
-//      }
-//    }
-
-//    reader.close();
-//  }
-//  catch (FileNotFoundException e) {
-//    return false;
-//  }
-//  catch (IOException e) {
-//    return false;
-//  }
-
-//  if (program != null) {
-//    programs.add(program);
-//  }
-
-    return true;
-  }
-
-  public int getNbPrograms() {
-    return programs.size();
-  }
-
-  public Program getProgram(int i) {
-    return programs.get(i);
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/ProgramLauncher.java
--- a/SMART/Java/ProgramLauncher.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,209 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-import java.io.*;
-import javax.swing.SwingUtilities;
-import javax.swing.*;
-import java.util.concurrent.CountDownLatch;
-
-public class ProgramLauncher extends SwingWorker<Boolean, String>  {
-
-  String[]     command;
-  JTextArea    logArea;
-  JLabel       messageField;
-  JProgressBar progressBar;
-  JLabel       etaField;
-  int          exitValue;
-  CountDownLatch latch;
-
-  
-
-  public ProgramLauncher (LinkedList <String> c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
-    command       = new String[c.size()];
-    logArea       = la;
-    messageField  = mf;
-    progressBar   = pb;
-    etaField      = ef;
-    exitValue     = -1;
-    c.toArray(command);
-    latch = new CountDownLatch(1);
-  }
-
-
-  public ProgramLauncher (String[] c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
-    command       = c;
-    logArea       = la;
-    messageField  = mf;
-    progressBar   = pb;
-    etaField      = ef;
-    exitValue     = -1;
-    latch = new CountDownLatch(1);
-  }
-
-
-  @Override
-  public Boolean doInBackground() {
-    ProcessBuilder pb           = new ProcessBuilder(command);
-    Process process             = null;
-    BufferedReader outputReader = null;
-    pb                          = pb.redirectErrorStream(true);
-    Map<String, String> env     = pb.environment();
-    env.put("PYTHONPATH", System.getProperty("user.dir"));
-    env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "SMART" + java.io.File.separator + "Java" + java.io.File.separator + "Python");
-    env.put("SMARTMYSQLPATH", Global.mysqlCommand);
-    env.put("SMARTRPATH", Global.rCommand);
-    String commandJoined = Arrays.toString(command);
-
-    try {
-      publish("=== Starting command '" + commandJoined.trim() + "' ===\n");
-      process = pb.start();
-
-      BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream());
-      InputStream is                   = process.getInputStream();
-      InputStreamReader isr            = new InputStreamReader(is);
-      outputReader                     = new BufferedReader(isr);
-    }
-    catch (Exception exception) {
-      publish("!Process cannot be started (command is '" + commandJoined + "')!\n");
-      exception.printStackTrace();
-      latch.countDown();
-      return Boolean.FALSE;
-    }
-    if (outputReader == null) {
-      publish("!Problem in the output of the command!\n");
-      latch.countDown();
-      return Boolean.FALSE;
-    }
-    else {
-      try {
-        String line;
-        while ((line = outputReader.readLine()) != null) {
-          publish(line + "\n");
-        }
-      }
-      catch (IOException e) {
-        e.printStackTrace();
-        publish("!Cannot get the output of the command!\n");
-        latch.countDown();
-        return Boolean.FALSE;
-      }
-    }
-    try {
-      process.waitFor();
-    }
-    catch (InterruptedException e) {
-      e.printStackTrace();
-      publish("!Cannot wait for the end of the command!\n");
-      latch.countDown();
-      return Boolean.FALSE;
-    }
-    try {
-      exitValue = process.exitValue();
-    }
-    catch (IllegalThreadStateException e) {
-      e.printStackTrace();
-      publish("!Cannot get the exit value of the command!\n");
-      latch.countDown();
-      return Boolean.FALSE;
-    }
-    if (exitValue != 0) {
-      publish("!Problem during the execution of the command '" + commandJoined + "'!\n");
-      latch.countDown();
-      return Boolean.FALSE;
-    }
-    publish("=== Ending command '" + commandJoined.trim() + "' ===\n");
-    latch.countDown();
-    return Boolean.TRUE;
-  }
-
-
-  @Override
-  protected void process(List<String> chunks) {
-    String message = "";
-    String text    = logArea.getText();
-    for (String chunk: chunks) {
-      text += chunk;
-    }
-    for (String lineSeparatedByCarriageReturn: text.split("\n")) {
-      for (String line: lineSeparatedByCarriageReturn.split("\r")) {
-        boolean progressLine = false;
-        if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*")) {
-          String[] ratioElements = line.split("\\]")[1].trim().split("/");
-          int      current       = Integer.parseInt(ratioElements[0].trim());
-          int      aim           = Integer.parseInt(ratioElements[1].trim());
-          messageField.setText(line.split("\\[")[0].trim());
-          progressBar.setValue(current * 100 / aim);
-          etaField.setText("");
-          progressLine = true;
-        }
-        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*ETA:\\s*.*")) {
-          String[] ratioElements = line.split("\\]")[1].split("E")[0].trim().split("/");
-          int      current       = Integer.parseInt(ratioElements[0].trim());
-          int      aim           = Integer.parseInt(ratioElements[1].trim());
-          String   eta           = line.split("ETA:")[1].trim();
-          messageField.setText(line.split("\\[")[0].trim());
-          progressBar.setValue(current * 100 / aim);
-          etaField.setText("ETA: " + eta);
-          progressLine = true;
-        }
-        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*\\s*completed in.*")) {
-          String nbElements = line.split("\\]")[1].split("completed")[0].trim();
-          String timeSpent  = line.split("completed in")[1].trim();
-          message          += line.split("\\[")[0].trim() + ": " + nbElements + " elements completed in " + timeSpent + "\n";
-          messageField.setText(line.split("\\[")[0].trim());
-          progressLine = true;
-        }
-        if (! progressLine) {
-          message += line + "\n";
-        }
-      }
-    }
-    String lines[]     = message.split("\n");
-    String toBeWritten = "";
-    for (int i = Math.max(0, lines.length - Global.logAreaSize); i < lines.length; i++) {
-      toBeWritten += lines[i] + "\n";
-    }
-    logArea.setText(toBeWritten);
-  }
-
-  public int getExitValue() {
-    try {
-      latch.await();
-    }
-    catch (InterruptedException e) {
-      logArea.append("Cannot wait for the end of the process!\n");
-      e.printStackTrace();
-      return -1;
-    }
-    return exitValue;
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/ProgramOption.java
--- a/SMART/Java/ProgramOption.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,358 +0,0 @@\n-/**\n- *\n- * Copyright INRA-URGI 2009-2010\n- * \n- * This software is governed by the CeCILL license under French law and\n- * abiding by the rules of distribution of free software. You can use,\n- * modify and/ or redistribute the software under the terms of the CeCILL\n- * license as circulated by CEA, CNRS and INRIA at the following URL\n- * "http://www.cecill.info".\n- * \n- * As a counterpart to the access to the source code and rights to copy,\n- * modify and redistribute granted by the license, users are provided only\n- * with a limited warranty and the software\'s author, the holder of the\n- * economic rights, and the successive licensors have only limited\n- * liability.\n- * \n- * In this respect, the user\'s attention is drawn to the risks associated\n- * with loading, using, modifying and/or developing or reproducing the\n- * software by the user in light of its specific status of free software,\n- * that may mean that it is complicated to manipulate, and that also\n- * therefore means that it is reserved for developers and experienced\n- * professionals having in-depth computer knowledge. Users are therefore\n- * encouraged to load and test the software\'s suitability as regards their\n- * requirements in conditions enabling the security of their systems and/or\n- * data to be ensured and, more generally, to use and operate it in the\n- * same conditions as regards security.\n- * \n- * The fact that you are presently reading this means that you have had\n- * knowledge of the CeCILL license and that you accept its terms.\n- *\n- */\n-import java.util.*;\n-import java.awt.*;\n-import java.awt.event.ActionEvent;\n-import java.awt.event.ActionListener;\n-import java.io.*;\n-import javax.swing.*;\n-import javax.swing.filechooser.*;\n-import javax.swing.border.*;\n-import javax.swing.SwingUtilities;\n-\n-\n-public class ProgramOption {\n-  boolean       input;\n-  String        identifier;\n-  String        type;\n-  String        comment;\n-  boolean       compulsory;\n-  String[]      format;\n-  String        formatIdentifier;\n-  ProgramOption associatedOption;\n-  String        defaultValue;\n-  String[]      choices;\n-  JComponent    component;\n-  JPanel        panel;\n-\n-\n-  public ProgramOption() {\n-    this.input            = true;\n-    this.identifier       = null;\n-    this.type             = null;\n-    this.comment          = null;\n-    this.compulsory       = false;\n-    this.format           = null;\n-    this.formatIdentifier = null;\n-    this.associatedOption = null;\n-    this.defaultValue     = "";\n-    this.choices          = null;\n-    this.component        = null;\n-    this.panel            = null;\n-  }\n-\n-\n-  public void setInput(boolean input) {\n-    this.input = input;\n-  }\n-\n-\n-  public void setIdentifier(String identifier) {\n-    this.identifier = identifier;\n-  }\n-\n-\n-  public void setType(String type) {\n-    this.type = type;\n-  }\n-\n-\n-  public void setComment(String comment) {\n-    this.comment = comment;\n-  }\n-\n-\n-  public void setCompulsory(boolean compulsory) {\n-    this.compulsory = compulsory;\n-  }\n-\n-\n-  public void setFormat(String[] format) {\n-    this.format = format;\n-  }\n-\n-\n-  public void setFormat(String format) {\n-    this.format    = new String[1];\n-    this.format[0] = format;\n-  }\n-\n-\n-  public void setFormatIdentifier(String formatIdentifier) {\n-    this.formatIdentifier = formatIdentifier;\n-  }\n-\n-\n-  public void setAssociatedOption(ProgramOption option) {\n-    this.associatedOption = option;\n-  }\n-\n-\n-  public void setChoices(String[] choices) {\n-    this.choices = new String[choices.length+1];\n-    this.choices[0] = "---";\n-    for (int i = 0; i < choices.length; i++) {\n-      this.choices[i+1] = choices[i];\n-    }\n-  }\n-\n-\n-  public void setDefault(String defaultValue) {\n-    this.defaultValue = defaultValue;\n-  }\n-\n-\n-  public boolean isInput() {\n-    return this.input;\n-  }\n-\n-\n-  public boolean checkSettings() {\n-    if (this.identifier == null) {\n-      return false;\n-    }\n-    if (this.type == nul'..b'{\n-      this.component = new JComboBox(this.choices);\n-      label.setLabelFor(this.component);\n-      this.panel.add(label);\n-      this.panel.add(this.component);\n-    }\n-    else {\n-      System.out.println("Do not know how to read type " + this.type);\n-    }\n-\n-    return this.panel;\n-  }\n-\n-\n-  public JComponent getComponent() {\n-    if (component == null) {\n-      this.getPanel();\n-    }\n-    return this.component;\n-  }\n-\n-\n-  private String getValue() {\n-    if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("directory".equals(this.type)) || ("files".equals(this.type)))  {\n-      String s = ((JTextField) this.component).getText();\n-      if ("None".equals(s)) {\n-        return "";\n-      }\n-      return s;\n-    }\n-    if ("file".equals(this.type)) {\n-      return (String) ((JComboBox) this.component).getSelectedItem();\n-    }\n-    if ("boolean".equals(this.type)) {\n-      return ((JCheckBox) this.component).isSelected()? "true": "false";\n-    }\n-    if ("format".equals(this.type)) {\n-      return (String) ((JComboBox) this.component).getSelectedItem();\n-    }\n-    if ("choice".equals(this.type)) {\n-      String s = (String) ((JComboBox) this.component).getSelectedItem();\n-      if ("---".equals(s)) {\n-        return "";\n-      }\n-      return s;\n-    }\n-    System.out.println("Do not know how to get value of \'" + this.type + "\' (" + this.identifier + ").");\n-    return null;\n-  }\n-\n-\n-  public String checkValue() {\n-    String value = this.getValue();\n-    if ((this.compulsory) && ((value == null) || ("".equals(value)))) {\n-      return "Option \'" + this.comment + "\' has no value... Please specify it.\\n";\n-    }\n-    if ("int".equals(this.type)) {\n-      if ((value != null) && (! "".equals(value)) && (! "None".equals(value))) {\n-        try {\n-          int i = Integer.parseInt(value);\n-        }\n-        catch (NumberFormatException e) {\n-          return "Option \'" + this.comment + "\' should be an integer... Please correct it.\\n";\n-        }\n-      }\n-    }\n-    else if ("float".equals(this.type)) {\n-      if ((value != null) && (! "".equals(value))) {\n-        try {\n-          float i = Float.parseFloat(value);\n-        }\n-        catch (NumberFormatException e) {\n-          return "Option \'" + this.comment + "\' should be a float... Please correct it.\\n";\n-        }\n-      }\n-    }\n-    return null;\n-  }\n-\n-\n-  public LinkedList <String> getCommand() {\n-    LinkedList <String> list = new LinkedList <String> ();\n-\n-    if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("format".equals(this.type)) || ("directory".equals(this.type)) || ("files".equals(this.type)) || ("choice".equals(this.type))) {\n-      String value = this.getValue();\n-      if (value.length() == 0) {\n-        return list;\n-      }\n-      list.add(this.identifier);\n-      list.add(value);\n-      return list;\n-    }\n-    if ("file".equals(this.type)) {\n-      String fileName = (String) ((JComboBox) this.component).getSelectedItem();\n-      if (fileName == null) {\n-        return list;\n-      }\n-      list.add(this.identifier);\n-      list.add(this.getValue());\n-      return list;\n-    }\n-    if (("boolean".equals(this.type)) || ("bool".equals(this.type))) {\n-      if ("true".equals(this.getValue())) {\n-        list.add(this.identifier);\n-      }\n-      return list;\n-    }\n-    System.out.println("Cannot get type of option " + this.type + " (" + this.identifier + "): " + this.getValue());\n-    return null;\n-  }\n-\n-\n-  public File getOutputFile() {\n-    if (this.input) return null;\n-    String format = "";\n-    if (this.format != null) {\n-      format = this.format[0];\n-    }\n-    if (this.associatedOption != null) {\n-      format = this.associatedOption.getValue();\n-    }\n-    return new File(this.getValue(), Global.formats.getFormatType(format), format);\n-  }\n-}\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/.RData
b
Binary file SMART/Java/Python/.RData has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/.gitignore
--- a/SMART/Java/Python/.gitignore Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-/CleanTranscriptFile.py
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/100%
--- a/SMART/Java/Python/100% Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,152 +0,0 @@
-
-R version 2.15.1 (2012-06-22) -- "Roasted Marshmallows"
-Copyright (C) 2012 The R Foundation for Statistical Computing
-ISBN 3-900051-07-0
-Platform: x86_64-pc-linux-gnu (64-bit)
-
-R is free software and comes with ABSOLUTELY NO WARRANTY.
-You are welcome to redistribute it under certain conditions.
-Type 'license()' or 'licence()' for distribution details.
-
-  Natural language support but running in an English locale
-
-R is a collaborative project with many contributors.
-Type 'contributors()' for more information and
-'citation()' on how to cite R or R packages in publications.
-
-Type 'demo()' for some demos, 'help()' for on-line help, or
-'help.start()' for an HTML browser interface to help.
-Type 'q()' to quit R.
-
-[Previously saved workspace restored]
-
-> library(extrafont)
-Registering fonts with R
-There were 50 or more warnings (use warnings() to see the first 50)
-> loadfonts()
-Padauk already registered with pdfFonts().
-Padauk Book already registered with pdfFonts().
-Abyssinica SIL already registered with pdfFonts().
-cmex10 already registered with pdfFonts().
-cmmi10 already registered with pdfFonts().
-cmr10 already registered with pdfFonts().
-cmsy10 already registered with pdfFonts().
-DejaVu Sans already registered with pdfFonts().
-DejaVu Sans Condensed already registered with pdfFonts().
-DejaVu Sans Light already registered with pdfFonts().
-DejaVu Sans Mono already registered with pdfFonts().
-DejaVu Serif already registered with pdfFonts().
-DejaVu Serif Condensed already registered with pdfFonts().
-Droid Arabic Naskh already registered with pdfFonts().
-Droid Sans already registered with pdfFonts().
-Droid Sans Armenian already registered with pdfFonts().
-Droid Sans Ethiopic already registered with pdfFonts().
-Droid Sans Fallback already registered with pdfFonts().
-Droid Sans Georgian already registered with pdfFonts().
-Droid Sans Hebrew already registered with pdfFonts().
-Droid Sans Mono already registered with pdfFonts().
-Droid Sans Thai already registered with pdfFonts().
-Droid Serif already registered with pdfFonts().
-esint10 already registered with pdfFonts().
-eufm10 already registered with pdfFonts().
-FreeMono already registered with pdfFonts().
-FreeSans already registered with pdfFonts().
-FreeSerif already registered with pdfFonts().
-gargi already registered with pdfFonts().
-Garuda already registered with pdfFonts().
-Gentium already registered with pdfFonts().
-GentiumAlt already registered with pdfFonts().
-Gentium Basic already registered with pdfFonts().
-Gentium Book Basic already registered with pdfFonts().
-Junicode already registered with pdfFonts().
-KacstArt already registered with pdfFonts().
-KacstBook already registered with pdfFonts().
-KacstDecorative already registered with pdfFonts().
-KacstDigital already registered with pdfFonts().
-KacstFarsi already registered with pdfFonts().
-KacstLetter already registered with pdfFonts().
-KacstNaskh already registered with pdfFonts().
-KacstOffice already registered with pdfFonts().
-KacstOne already registered with pdfFonts().
-KacstPen already registered with pdfFonts().
-KacstPoster already registered with pdfFonts().
-KacstQurn already registered with pdfFonts().
-KacstScreen already registered with pdfFonts().
-KacstTitle already registered with pdfFonts().
-KacstTitleL already registered with pdfFonts().
-Kedage already registered with pdfFonts().
-Khmer OS already registered with pdfFonts().
-Khmer OS System already registered with pdfFonts().
-LKLUG already registered with pdfFonts().
-Lohit Bengali already registered with pdfFonts().
-Lohit Gujarati already registered with pdfFonts().
-Lohit Hindi already registered with pdfFonts().
-Lohit Punjabi already registered with pdfFonts().
-Lohit Tamil already registered with pdfFonts().
-Loma already registered with pdfFonts().
-Mallige already registered with pdfFonts().
-MarVoSym already registered with pdfFonts().
-Meera already registered with pdfFonts().
-mry_KacstQurn already registered with pdfFonts().
-msam10 already registered with pdfFonts().
-msbm10 already registered with pdfFonts().
-Mukti Narrow already registered with pdfFonts().
-NanumGothic already registered with pdfFonts().
-NanumMyeongjo already registered with pdfFonts().
-OpenSymbol already registered with pdfFonts().
-Phetsarath OT already registered with pdfFonts().
-Pothana2000 already registered with pdfFonts().
-Purisa already registered with pdfFonts().
-Rachana already registered with pdfFonts().
-Rekha already registered with pdfFonts().
-rsfs10 already registered with pdfFonts().
-Saab already registered with pdfFonts().
-Sawasdee already registered with pdfFonts().
-Tibetan Machine Uni already registered with pdfFonts().
-TlwgMono already registered with pdfFonts().
-TlwgTypewriter already registered with pdfFonts().
-Tlwg Typist already registered with pdfFonts().
-Tlwg Typo already registered with pdfFonts().
-Ubuntu already registered with pdfFonts().
-Ubuntu Condensed already registered with pdfFonts().
-Ubuntu Light already registered with pdfFonts().
-Ubuntu Mono already registered with pdfFonts().
-Ume Gothic already registered with pdfFonts().
-Ume Gothic C4 already registered with pdfFonts().
-Ume Gothic C5 already registered with pdfFonts().
-Ume Gothic O5 already registered with pdfFonts().
-Ume Gothic S4 already registered with pdfFonts().
-Ume Gothic S5 already registered with pdfFonts().
-Ume P Gothic already registered with pdfFonts().
-Ume P Gothic C4 already registered with pdfFonts().
-Ume P Gothic C5 already registered with pdfFonts().
-Ume P Gothic O5 already registered with pdfFonts().
-Ume P Gothic S4 already registered with pdfFonts().
-Ume P Gothic S5 already registered with pdfFonts().
-Ume UI Gothic already registered with pdfFonts().
-Ume UI Gothic O5 already registered with pdfFonts().
-UnBatang already registered with pdfFonts().
-UnDotum already registered with pdfFonts().
-UnGraphic already registered with pdfFonts().
-UnGungseo already registered with pdfFonts().
-UnPilgi already registered with pdfFonts().
-utkal already registered with pdfFonts().
-Vemana2000 already registered with pdfFonts().
-VL Gothic already registered with pdfFonts().
-VL PGothic already registered with pdfFonts().
-Waree already registered with pdfFonts().
-wasy10 already registered with pdfFonts().
-There were 26 warnings (use warnings() to see them)
-> library(ggplot2)
-> data <- read.table("tmpFile36240.dat", header = T)
-> data$Sample <- factor(data$Sample, levels=c("Col", "rtl2", "35S::RTL2#1", "35S::RTL2#2", "35S::RTL1#1", "dcl2dcl3dcl4"))
-> data$Size <- factor(data$Size, levels=c(21, 22, 23, 24))
-> png("/home/mzytnick/Desktop/Projects/Vaucheret/RTrna/MiRna/genomicSizes.png", width = 1000, height = 200)
-> ggplot(data, aes(x = Size, y = Count, fill = Size))  + geom_bar(stat = "identity") + facet_grid(. ~ Sample, space="free_x") + xlab("Sizes") + ylab("Count") + scale_fill_manual(values = c("blue", "green", "pink", "red")) + theme(legend.position = "none", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())
-> dev.off()
-null device 
-          1 
-> 
-> proc.time()
-   user  system elapsed 
-  1.884   0.088   3.163 
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/CleanTranscriptFile.py
--- a/SMART/Java/Python/CleanTranscriptFile.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,74 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2011
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from optparse import OptionParser
-from SMART.Java.Python.cleaning.CleanerChooser import CleanerChooser
-
-
-class CleanTranscriptFile(object):
-
- def __init__(self, verbosity):
- self.verbosity = verbosity
- self.chooser   = CleanerChooser(self.verbosity)
-
- def setInputFile(self, fileName, format):
- self.chooser.findFormat(format)
- self.cleaner = self.chooser.getCleaner()
- self.cleaner.setInputFileName(fileName)
-
- def setOutputFile(self, fileName):
- self.cleaner.setOutputFileName(fileName)
-
- def setAcceptedTypes(self, types):
- if types != None:
- self.cleaner.setAcceptedTypes(types)
-
- def run(self):
- self.cleaner.clean()
-
-
-if __name__ == "__main__":
-
- description = "Clean Transcript File v1.0.1: Clean a transcript file so that it is useable for S-MART. [Category: Other]"
-
- parser = OptionParser(description = description)
- parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
- parser.add_option("-f", "--format",      dest="format",         action="store",                     type="string", help="format of previous file [compulsory] [format: transcript file format]")
- parser.add_option("-t", "--types",       dest="acceptedTypes",  action="store",      default=None,  type="string", help="name of the types you want to keep in GFF/GTF (list separated by commas) [format: string] [default: None]")
- parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [format: output file in GFF3 format]")
- parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
- (options, args) = parser.parse_args()
-
- ctf = CleanTranscriptFile(options.verbosity)
- ctf.setInputFile(options.inputFileName, options.format)
- ctf.setOutputFile(options.outputFileName)
- ctf.setAcceptedTypes(None if options.acceptedTypes == None else options.acceptedTypes.split(","))
- ctf.run()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ClusterizeByTags.py
--- a/SMART/Java/Python/ClusterizeByTags.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,157 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2011\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import random\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection\n-from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter\n-\n-\n-OPERATIONS = ("diff", "div")\n-BOOLTOSTRANDS = {True: [0], False: [-1, 1]}\n-\n-class ClusterizeByTags(object):\n-\n-    def __init__(self, verbosity):\n-        self.verbosity   = verbosity\n-        self.connection  = MySqlConnection(self.verbosity-1)\n-        self.defautValue = None\n-        self.maxDistance = None\n-        self.oneStrand   = False\n-\n-    def setInputFile(self, fileName, format):\n-        chooser = ParserChooser(self.verbosity)\n-        chooser.findFormat(format)\n-        parser = chooser.getParser(fileName)\n-        writer = MySqlTranscriptWriter(self.connection, None, self.verbosity)\n-        writer.addTranscriptList(parser)\n-        writer.write()\n-        self.transcriptTables = writer.getTables()\n-\n-    def setOutputFile(self, fileName):\n-        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n-\n-    def setTag(self, tagName, defaultValue):\n-        self.tagName      = tagName\n-        self.defaultValue = defaultValue\n-\n-    def setThreshold(self, threshold):\n-        self.threshold = threshold\n-\n-    def setOperation(self, operation):\n-        self.operation = operation\n-        if self.operation not in OPERATIONS:\n-            raise Exception("Operation \'%s\' unsupported: choose among %s" % (self.operation, ", ".join(OPERATIONS)))\n-\n-    def setMaxDistance(self, distance):\n-        self.maxDistance = distance\n-\n-    def setOneStrand(self, oneStrand):\n-        self.oneStrand = oneStrand\n-\n-    def run(self):\n-        for chromosome in sorted(self.transcriptTables.keys()):\n-            progress = Progress(self.transcriptTables[chromosome].getNbElements(), "Analyzing %s" % (chromosome), self.verbosity)\n-            for strand in BOOLTOSTRANDS[self.oneStrand]:\n-                previousValue      = None\n-                previousTrend      = None\n-                previousTranscript = None\n-                sumValue           = 0\n-                command = "SELECT * FROM %s" % (self.tran'..b'       trend = value / previousValue\n-                    if previousTranscript == None:\n-                        sumValue = value\n-                    elif (previousTrend == None or abs(trend - previousTrend) <= self.threshold) and (self.maxDistance == None or previousTranscript.getDistance(transcript) <= self.maxDistance) and (previousTranscript.getDirection() == transcript.getDirection() or not self.oneStrand):\n-                        if previousTranscript.getDirection() != transcript.getDirection():\n-                            transcript.reverse()\n-                        previousTranscript.merge(transcript)\n-                        transcript = previousTranscript\n-                        sumValue += value\n-                        previousTrend = trend\n-                    else:\n-                        previousTranscript.setTagValue(self.tagName, sumValue)\n-                        self.writer.addTranscript(previousTranscript)\n-                        sumValue = value\n-                        previousTrend = None\n-                    previousValue      = value\n-                    previousTranscript = transcript\n-                    progress.inc()\n-                if previousTranscript != None:\n-                    previousTranscript.setTagValue(self.tagName, sumValue)\n-                    self.writer.addTranscript(previousTranscript)\n-            progress.done()\n-        self.writer.close()\n-\n-\n-if __name__ == "__main__":\n-    \n-    description = "Clusterize By Tags v1.0.1: Clusterize a set of element using their tag values. [Category: Merge]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format",      dest="format",         action="store",                     type="string", help="format of previous file [compulsory] [format: transcript file format]")\n-    parser.add_option("-t", "--tag",         dest="tagName",        action="store",                     type="string", help="name of the tag [format: string] [compulsory]")\n-    parser.add_option("-e", "--default",     dest="defaultValue",   action="store",      default=None,  type="int",    help="default value for the tag [format: string]")\n-    parser.add_option("-r", "--threshold",   dest="threshold",      action="store",                     type="int",    help="threshold between two consecutive tags [format: int] [compulsory]")\n-    parser.add_option("-p", "--operation",   dest="operation",      action="store",                     type="string", help="operation to apply between 2 different clusters to compare them [format: choice (diff, div)] [compulsory]")\n-    parser.add_option("-d", "--distance",    dest="maxDistance",    action="store",      default=None,  type="int",    help="maximum distance for 2 clusters to be merged [format: int] [default: None]")\n-    parser.add_option("-1", "--oneStrand",   dest="oneStrand",      action="store_true", default=False,                help="also cluster the elements which are on different strands [format: bool] [default: False]")\n-    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [format: output file in GFF3 format]")\n-    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-    cbt = ClusterizeByTags(options.verbosity)\n-    cbt.setInputFile(options.inputFileName, options.format)\n-    cbt.setOutputFile(options.outputFileName)\n-    cbt.setTag(option.tagName, option.defaultValue)\n-    cbt.setThreshold(option.threshold)\n-    cbt.setOperation(option.operation)\n-    cbt.setMaxDistance(operation.maxDistance)\n-    cbt.setOneStrand(operation.oneStrand)\n-    cbt.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/CollapseReads.py
--- a/SMART/Java/Python/CollapseReads.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,174 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os\n-from optparse import OptionParser, OptionGroup\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle\n-from SMART.Java.Python.ncList.FileSorter import FileSorter\n-from SMART.Java.Python.misc.Progress import Progress\n-\n-\n-class CollapseReads(object):\n-    """\n-    Merge two reads if they have exactly the same genomic coordinates\n-    """\n-\n-    def __init__(self, verbosity = 0):\n-        self.verbosity         = verbosity\n-        self.inputReader       = None\n-        self.outputWriter      = None\n-        self.strands           = True\n-        self.nbRead            = 0\n-        self.nbWritten         = 0\n-        self.nbMerges          = 0\n-        self.splittedFileNames = {}\n-\n-    def __del__(self):\n-        for fileName in self.splittedFileNames.values():\n-            os.remove(fileName)\n-            \n-    def close(self):\n-        self.outputWriter.close()\n-        \n-    def setInputFile(self, fileName, format):\n-        parserChooser = ParserChooser(self.verbosity)\n-        parserChooser.findFormat(format)\n-        self.parser = parserChooser.getParser(fileName)\n-        self.sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])\n-\n-    def setOutputFile(self, fileName):\n-        self.outputWriter = Gff3Writer(fileName, self.verbosity)\n-\n-    def getNbElements(self):\n-        return self.parser.getNbTranscripts()\n-\n-    def _sortFile(self):\n-        fs = FileSorter(self.parser, self.verbosity-4)\n-        fs.perChromosome(True)\n-        fs.setOutputFileName(self.sortedFileName)\n-        fs.sort()\n-        self.splittedFileNames       = fs.getOutputFileNames()\n-        self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()\n-        self.nbRead                  = fs.getNbElements()\n-        \n-    def _iterate(self, chromosome):\n-        progress    = Progress(self.nbElementsPerChromosome[chromosome], "Checking chromosome %s" % (chromosome), self.verbosity)\n-        transcripts = []\n-        parser      = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)\n-        for newTranscript in parser.getIterator():\n-            newTranscripts = []\n-            for oldTranscript in transcripts:\n-                if self._checkOverlap(newTranscript, oldTranscript):\n-       '..b'pt2):\n-        self.nbMerges += 1\n-        transcript2.setDirection(transcript1.getDirection())\n-        transcript1.merge(transcript2)\n-\n-    def _write(self, transcript):\n-        self.nbWritten += 1\n-        self.outputWriter.addTranscript(transcript)\n-\n-    def _checkOverlap(self, transcript1, transcript2):\n-        if transcript1.getStart() != transcript2.getStart() or transcript1.getEnd() != transcript2.getEnd():\n-            return False\n-        return (not self.strands or transcript1.getDirection() == transcript2.getDirection())\n-\n-    def _checkPassed(self, transcript1, transcript2):\n-        return (transcript2.getStart() < transcript1.getStart())\n-\n-    def collapseChromosome(self, chromosome):\n-        progress            = Progress(table.getNbElements(), "Analysing chromosome %s" % (chromosome), self.verbosity)\n-        command             = "SELECT * FROM %s ORDER BY start ASC, end DESC" % (table.name)\n-        transcriptStart     = None\n-        transcriptEnd       = None\n-        transcriptDirection = None\n-        currentTranscript   = None\n-        if self.strands:\n-            command += ", direction"\n-        for index, transcript in table.selectTranscripts(command, True):\n-            self.nbRead += 1\n-            if not self.strands:\n-                transcript.setDirection("+")\n-            if transcriptStart != transcript.getStart() or transcriptEnd != transcript.getEnd() or transcriptDirection != transcript.getDirection():\n-                self.writeTranscript(currentTranscript)\n-                transcriptStart     = transcript.getStart()\n-                transcriptEnd       = transcript.getEnd()\n-                transcriptDirection = transcript.getDirection()\n-                currentTranscript   = transcript\n-            else:\n-                currentTranscript.setTagValue("nbElements", (currentTranscript.getTagValue("nbElements") + 1) if "nbElements" in currentTranscript.getTagNames() else 1)\n-            progress.inc()\n-        self.writeTranscript(currentTranscript)\n-        progress.done()\n-\n-    def collapse(self):\n-        self._sortFile()\n-        for chromosome in sorted(self.nbElementsPerChromosome.keys()):\n-            self._iterate(chromosome)\n-        self.outputWriter.close()\n-        if self.verbosity > 1:\n-            print "# reads read: %d" % (self.nbRead)\n-            print "# reads written: %d (%.2f%%)" % (self.nbWritten, float(self.nbWritten) / self.nbRead * 100)\n-            print "# reads merges: %d" % (self.nbMerges)\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Collapse Reads v1.0.3: Merge two reads if they have exactly the same genomic coordinates. [Category: Merge]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in mapping format given by -f]")\n-    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the file [compulsory] [format: mapping file format]")\n-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-    parser.add_option("-s", "--strands",   dest="strands",        action="store_true", default=False,                help="merge elements on 2 different strands [format: bool] [default: false]")\n-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-    collapser = CollapseReads(options.verbosity)\n-    collapser.setInputFile(options.inputFileName, options.format)\n-    collapser.setOutputFile(options.outputFileName)\n-    collapser.strands = not options.strands\n-    collapser.collapse()\n-    collapser.close()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/CombineTags.py
--- a/SMART/Java/Python/CombineTags.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,115 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2011
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import os
-import random
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.misc.Progress import Progress
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.writer.Gff3Writer import Gff3Writer
-
-OPERATIONS = ("plus", "minus", "times", "div")
-
-class CombineTags(object):
-
-    def __init__(self, verbosity = 0):
-        self.verbosity       = verbosity
-
-    def setInputFile(self, fileName, format):
-        self.inputFileName = fileName
-        parserChooser = ParserChooser(self.verbosity)
-        parserChooser.findFormat(format, "transcript")
-        self.parser = parserChooser.getParser(fileName)
-
-    def setOutputFile(self, fileName):
-        self.outputWriter = Gff3Writer(fileName, self.verbosity)
-
-    def setTags(self, tag1, tag2, outputTag, defaultValue = None):
-        self.tag1         = tag1
-        self.tag2         = tag2
-        self.outputTag    = outputTag
-        self.defaultValue = defaultValue
-
-    def setOperation(self, operation):
-        self.operation = operation
-        if self.operation not in OPERATIONS:
-            raise Exception("Do no handle operation %s, only: %s" % (self.operation, ", ".join(OPERATIONS)))
-
-    def run(self):
-        progress = Progress(self.parser.getNbTranscripts(), "Printing transcripts %s" % (self.inputFileName), self.verbosity)
-        for transcript in self.parser.getIterator():
-            tag1 = transcript.getTagValue(self.tag1)
-            tag2 = transcript.getTagValue(self.tag2)
-            if tag1 == None or tag2 == None:
-                if self.defaultValue == None:
-                    raise Exception("Transcript %s misses one of the tags %s and %s, and has no default value !" % (transcript, self.tag1, self.tag2))
-                newTag = self.defaultValue
-            else:
-                tag1, tag2 = float(tag1), float(tag2)
-                if self.operation == "plus":
-                    newTag = tag1 + tag2
-                elif self.operation == "minus":
-                    newTag = tag1 - tag2
-                elif self.operation == "times":
-                    newTag = tag1 * tag2
-                elif self.operation == "div":
-                    newTag = tag1 / tag2
-            transcript.setTagValue(self.outputTag, newTag)
-            self.outputWriter.addTranscript(transcript)
-            progress.inc()
-        progress.done()
-        self.parser.close()
-        self.outputWriter.close()
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Change Tag Name v1.0.1: Change the name of tag of a list of transcripts. [Category: Data Modification]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",               type="string", help="format of the input file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",      dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-t", "--tag1",        dest="tag1",           action="store",               type="string", help="name of the first tag [compulsory] [format: string]")
-    parser.add_option("-T", "--tag2",        dest="tag2",           action="store",               type="string", help="name of the second tag [compulsory] [format: string]")
-    parser.add_option("-d", "--default",     dest="defaultValue",   action="store", default=None, type="string", help="default value when one of the tag is absent [compulsory] [format: float]")
-    parser.add_option("-n", "--new",         dest="newTag",         action="store",               type="string", help="name of the new tag [compulsory] [format: string]")
-    parser.add_option("-p", "--operation",   dest="operation",      action="store",               type="string", help="operation combining the tags [compulsory] [format: choice (plus, minus, times, div)]")
-    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int] [default: 1]")
-    (options, args) = parser.parse_args()
-
-    combiner = CombineTags(options.verbosity)
-    combiner.setInputFile(options.inputFileName, options.inputFormat)
-    combiner.setOutputFile("%s.gff3" % (options.outputFileName))
-    combiner.setTags(options.tag1, options.tag2, options.newTag, options.defaultValue)
-    combiner.setOperation(options.operation)
-    combiner.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/CompareOverlapping.py
--- a/SMART/Java/Python/CompareOverlapping.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,491 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os, struct, time, random\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.ncList.NCList import NCList\n-from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n-from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n-from SMART.Java.Python.ncList.NCListHandler import NCListHandler\n-from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-from SMART.Java.Python.misc import Utils\n-try:\n-\timport cPickle as pickle\n-except:\n-\timport pickle\n-\n-REFERENCE = 0\n-QUERY = 1\n-TYPES = (REFERENCE, QUERY)\n-TYPETOSTRING = {0: "reference", 1: "query"}\n-\n-class CompareOverlapping(object):\n-\n-\tdef __init__(self, verbosity = 1):\n-\t\tself._outputFileName\t\t   = "outputOverlaps.gff3"\n-\t\tself._iWriter\t\t\t\t   = None\n-\t\tself._nbOverlappingQueries\t   = 0\n-\t\tself._nbOverlaps\t\t\t   = 0\n-\t\tself._nbLines\t\t\t\t   = {REFERENCE: 0, QUERY: 0}\n-\t\tself._verbosity\t\t\t\t   = verbosity\n-\t\tself._ncLists\t\t\t\t   = {}\n-\t\tself._cursors\t\t\t\t   = {}\n-\t\tself._splittedFileNames\t\t   = {}\n-\t\tself._nbElements\t\t\t   = {}\n-\t\tself._nbElementsPerChromosome  = {}\n-\t\tself._inputFileNames\t\t   = {REFERENCE: None,  QUERY: None}\n-\t\tself._inputFileFormats\t\t   = {REFERENCE: None,  QUERY: None}\n-\t\tself._starts\t\t\t\t   = {REFERENCE: None, QUERY: None}\n-\t\tself._ends\t\t\t\t\t   = {REFERENCE: None, QUERY: None}\n-\t\tself._fivePrimes\t\t\t   = {REFERENCE: None, QUERY: None}\n-\t\tself._threePrimes\t\t\t   = {REFERENCE: None, QUERY: None}\n-\t\tself._ncListHandlers\t\t   = {REFERENCE: None,  QUERY: None}\n-\t\tself._convertedFileNames\t   = {REFERENCE: False, QUERY: False}\n-\t\tself._sorted                   = False\n-\t\tself._index                    = False\n-\t\tself._introns\t\t\t\t   = False\n-\t\tself._antisense\t\t\t\t   = False\n-\t\tself._colinear\t\t\t\t   = False\n-\t\tself._invert\t\t\t\t   = False\n-\t\tself._distance\t\t\t\t   = 0\n-\t\tself._minOverlap\t\t\t   = 1\n-\t\tself._pcOverlap\t\t\t\t   = None\n-\t\tself._included\t\t\t\t   = False\n-\t\tself._including\t\t\t\t   = False\n-\t\tself._outputNotOverlapping\t   = False\n-\t\tself._tmpRefFileName\t\t   = None\n-\t\tself._currentQueryTranscript   = None\n-\t\tself._currentOrQueryTranscript = None\n-\t'..b'in file 1 (do not use it with -S) [format: int]")\n-\tparser.add_option("-u", "--end2",\t\t\t  dest="end2",\t\t     action="store",\t  default=None,  type="int",\thelp="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]")\n-\tparser.add_option("-t", "--intron",\t\t      dest="introns",\t\t action="store_true", default=False,\t\t\t\thelp="also report introns [format: bool] [default: false]")\n-\tparser.add_option("-E", "--5primeExtension1", dest="fivePrime1",\t action="store",\t  default=None,  type="int",\thelp="extension towards 5\' in file 1 [format: int]")\n-\tparser.add_option("-e", "--5primeExtension2", dest="fivePrime2",\t action="store",\t  default=None,  type="int",\thelp="extension towards 5\' in file 2 [format: int]")\n-\tparser.add_option("-N", "--3primeExtension1", dest="threePrime1",\t action="store",\t  default=None,  type="int",\thelp="extension towards 3\' in file 1 [format: int]")\n-\tparser.add_option("-n", "--3primeExtension2", dest="threePrime2",\t action="store",\t  default=None,  type="int",\thelp="extension towards 3\' in file 2 [format: int]")\n-\tparser.add_option("-c", "--colinear",\t\t  dest="colinear",\t\t action="store_true", default=False,\t\t\t\thelp="colinear only [format: bool] [default: false]")\n-\tparser.add_option("-a", "--antisense",\t\t  dest="antisense",\t\t action="store_true", default=False,\t\t\t\thelp="antisense only [format: bool] [default: false]")\n-\tparser.add_option("-d", "--distance",\t\t  dest="distance",\t     action="store",\t  default=0,\t type="int",\thelp="accept some distance between query and reference [format: int]")\n-\tparser.add_option("-k", "--included",\t\t  dest="included",\t     action="store_true", default=False,\t\t\t\thelp="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]")\n-\tparser.add_option("-K", "--including",\t\t  dest="including",\t     action="store_true", default=False,\t\t\t\thelp="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]")\n-\tparser.add_option("-m", "--minOverlap",\t\t  dest="minOverlap",\t action="store",\t  default=1,\t type="int",\thelp="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]")\n-\tparser.add_option("-p", "--pcOverlap",\t\t  dest="pcOverlap",\t     action="store",\t  default=None,  type="int",\thelp="minimum percentage of nucleotides to overlap to declare an overlap [format: int]")\n-\tparser.add_option("-O", "--notOverlapping",   dest="notOverlapping", action="store_true", default=False,\t\t\t\thelp="also output not overlapping data [format: bool] [default: false]")\n-\tparser.add_option("-x", "--exclude",\t\t  dest="exclude",\t\t action="store_true", default=False,\t\t\t\thelp="invert the match [format: bool] [default: false]")\n-\tparser.add_option("-v", "--verbosity",\t\t  dest="verbosity",\t\t action="store",\t  default=1,\t type="int",\thelp="trace level [format: int]")\n-\t(options, args) = parser.parse_args()\n-\n-\tco = CompareOverlapping(options.verbosity)\n-\tco.setInput(options.inputFileName1, options.format1, QUERY)\n-\tco.setInput(options.inputFileName2, options.format2, REFERENCE)\n-\tco.setOutput(options.output)\n-\tco.setSorted(options.sorted)\n-\tco.setIndex(options.index)\n-\tco.restrictToStart(options.start1, QUERY)\n-\tco.restrictToStart(options.start2, REFERENCE)\n-\tco.restrictToEnd(options.end1, QUERY)\n-\tco.restrictToEnd(options.end2, REFERENCE)\n-\tco.extendFivePrime(options.fivePrime1, QUERY)\n-\tco.extendFivePrime(options.fivePrime2, REFERENCE)\n-\tco.extendThreePrime(options.threePrime1, QUERY)\n-\tco.extendThreePrime(options.threePrime2, REFERENCE)\n-\tco.acceptIntrons(options.introns)\n-\tco.getAntisenseOnly(options.antisense)\n-\tco.getColinearOnly(options.colinear)\n-\tco.getInvert(options.exclude)\n-\tco.setMaxDistance(options.distance)\n-\tco.setMinOverlap(options.minOverlap)\n-\tco.setPcOverlap(options.pcOverlap)\n-\tco.setIncludedOnly(options.included)\n-\tco.setIncludingOnly(options.including)\n-\tco.includeNotOverlapping(options.notOverlapping)\n-\tco.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/CompareOverlapping.pyc
b
Binary file SMART/Java/Python/CompareOverlapping.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/CompareOverlappingSmallQuery.py
--- a/SMART/Java/Python/CompareOverlappingSmallQuery.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,261 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2011\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Mapping import Mapping\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-\n-MINBIN = 3\n-MAXBIN = 7\n-REFERENCE = 0\n-QUERY = 1\n-\n-def getBin(start, end):\n-\tfor i in range(MINBIN, MAXBIN + 1):\n-\t\tbinLevel = 10 ** i\n-\t\tif int(start / binLevel) == int(end / binLevel):\n-\t\t\treturn int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))\n-\treturn int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n-\n-def getOverlappingBins(start, end):\n-\tarray\t= []\n-\tbigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n-\tfor i in range(MINBIN, MAXBIN + 1):\n-\t\tbinLevel = 10 ** i\n-\t\tarray.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))\n-\tarray.append((bigBin, bigBin))\n-\treturn array\n-\n-\n-class CompareOverlappingSmallQuery(object):\n-\n-\tdef __init__(self, verbosity):\n-\t\tself.verbosity      = verbosity\n-\t\tself.tableNames     = {}\n-\t\tself.nbQueries      = 0\n-\t\tself.nbRefs\t        = 0\n-\t\tself.nbWritten      = 0\n-\t\tself.nbOverlaps     = 0\n-\t\tself.distance       = None\n-\t\tself.invert         = False\n-\t\tself.antisense      = False\n-\t\tself.collinear      = False\n-\t\tself.pcOverlapQuery = False\n-\t\tself.pcOverlapRef   = False\n-\t\tself.minOverlap     = False\n-\t\tself.included       = False\n-\t\tself.including      = False\n-\t\tself.bins\t        = {}\n-\t\tself.overlaps       = {}\n-\t\tself.notOverlapping = False\n-\n-\tdef setReferenceFile(self, fileName, format):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tself.refParser = chooser.getParser(fileName)\n-\n-\tdef setQueryFile(self, fileName, format):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tself.queryParser = chooser.getParser(fileName)\n-\n-\tdef setOutputFile(self, fileName):\n-\t\tself.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n-\n-\tdef setDistance(self, distance):\n-\t\tself.distance = distance\n-\n-\tdef setInvert(self, boolean):\n-\t\tself.invert = boolean\n-\n-\tdef setCollinear(self, boolean):\n-\t\tself.collinear = boolean\n-\n-\tdef setAntisense(self, boolean):\n-\t\tself.antisense = boolean\n-\n-\tdef setMinPercentOverlap(sel'..b'\tprint "# written:  %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)\n-\n-\tdef run(self):\n-\t\tself.loadQuery()\n-\t\tself.compare()\n-\t\tself.printResults()\n-\t\tself.displayResults()\n-\n-if __name__ == "__main__":\n-\t\n-\tdescription = "Compare Overlapping Small Query v1.0.1: Provide the queries that overlap with a reference, when the query is small. [Category: Data Comparison]"\n-\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--input1",\t        dest="inputFileName1", action="store",\t\t\t           type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n-\tparser.add_option("-f", "--format1",        dest="format1",\t\t  action="store",\t\t\t           type="string", help="format of previous file [compulsory] [format: transcript file format]")\n-\tparser.add_option("-j", "--input2",\t        dest="inputFileName2", action="store",\t\t\t           type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n-\tparser.add_option("-g", "--format2",        dest="format2",\t\t  action="store",\t\t\t           type="string", help="format of previous file [compulsory] [format: transcript file format]")\n-\tparser.add_option("-o", "--output",\t        dest="outputFileName", action="store",\t\t\t           type="string", help="output file [format: output file in GFF3 format]")\n-\tparser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,\t\t\t\t help="also output not overlapping data [format: bool] [default: false]")\n-\tparser.add_option("-d", "--distance",\t\tdest="distance",\t   action="store",\t    default=0,\t   type="int",\t help="accept some distance between query and reference [format: int]")\n-\tparser.add_option("-c", "--collinear",\t\tdest="collinear",\t   action="store_true", default=False,\t\t\t \t help="provide collinear features [format: bool] [default: false]")\n-\tparser.add_option("-a", "--antisense",\t\tdest="antisense",\t   action="store_true", default=False,\t\t\t \t help="provide antisense features [format: bool] [default: false]")\n-\tparser.add_option("-m", "--minOverlap",\t    dest="minOverlap",     action="store",      default=False, type="int",\t help="min. #nt overlap [format: bool] [default: false]")\n-\tparser.add_option("-p", "--pcOverlapQuery",\tdest="pcOverlapQuery", action="store",      default=False, type="int",\t help="min. % overlap of the query [format: bool] [default: false]")\n-\tparser.add_option("-P", "--pcOverlapRef",\tdest="pcOverlapRef",   action="store",      default=False, type="int",   help="min. % overlap of the reference [format: bool] [default: false]")\n-\tparser.add_option("-k", "--included",\t\tdest="included",\t   action="store_true", default=False,\t\t\t \t help="provide query elements which are nested in reference elements [format: bool] [default: false]")\n-\tparser.add_option("-K", "--including",\t\tdest="including",\t   action="store_true", default=False,\t\t\t \t help="provide query elements in which reference elements are nested [format: bool] [default: false]")\n-\tparser.add_option("-x", "--exclude",\t\tdest="exclude",\t\t   action="store_true", default=False,\t\t\t \t help="invert the match [format: bool] [default: false]")\n-\tparser.add_option("-v", "--verbosity",      dest="verbosity",\t   action="store",      default=1,     type="int",\t help="trace level [format: int]")\n-\t(options, args) = parser.parse_args()\n-\n-\tcosq = CompareOverlappingSmallQuery(options.verbosity)\n-\tcosq.setQueryFile(options.inputFileName1, options.format1)\n-\tcosq.setReferenceFile(options.inputFileName2, options.format2)\n-\tcosq.setOutputFile(options.outputFileName)\n-\tcosq.includeNotOverlapping(options.notOverlapping)\n-\tcosq.setDistance(options.distance)\n-\tcosq.setCollinear(options.collinear)\n-\tcosq.setAntisense(options.antisense)\n-\tcosq.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)\n-\tcosq.setMinOverlap(options.minOverlap)\n-\tcosq.setInclude(options.included, options.including)\n-\tcosq.setInvert(options.exclude)\n-\tcosq.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/CompareOverlappingSmallRef.py
--- a/SMART/Java/Python/CompareOverlappingSmallRef.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,250 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2011\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Mapping import Mapping\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-\n-MINBIN = 3\n-MAXBIN = 7\n-REFERENCE = 0\n-QUERY = 1\n-\n-def getBin(start, end):\n-\tfor i in range(MINBIN, MAXBIN + 1):\n-\t\tbinLevel = 10 ** i\n-\t\tif int(start / binLevel) == int(end / binLevel):\n-\t\t\treturn int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))\n-\treturn int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n-\n-def getOverlappingBins(start, end):\n-\tarray\t= []\n-\tbigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n-\tfor i in range(MINBIN, MAXBIN + 1):\n-\t\tbinLevel = 10 ** i\n-\t\tarray.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))\n-\tarray.append((bigBin, bigBin))\n-\treturn array\n-\n-\n-class CompareOverlappingSmallRef(object):\n-\n-\tdef __init__(self, verbosity):\n-\t\tself.verbosity      = verbosity\n-\t\tself.tableNames     = {}\n-\t\tself.nbQueries      = 0\n-\t\tself.nbRefs\t        = 0\n-\t\tself.nbWritten      = 0\n-\t\tself.nbOverlaps     = 0\n-\t\tself.invert         = False\n-\t\tself.antisense      = False\n-\t\tself.collinear      = False\n-\t\tself.distance       = None\n-\t\tself.minOverlap     = False\n-\t\tself.pcOverlapQuery = False\n-\t\tself.pcOverlapRef   = False\n-\t\tself.included       = False\n-\t\tself.including      = False\n-\t\tself.bins\t        = {}\n-\t\tself.notOverlapping = False\n-\n-\tdef setReferenceFile(self, fileName, format):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tself.refParser = chooser.getParser(fileName)\n-\n-\tdef setQueryFile(self, fileName, format):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tself.queryParser = chooser.getParser(fileName)\n-\n-\tdef setOutputFile(self, fileName):\n-\t\tself.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n-\n-\tdef setDistance(self, distance):\n-\t\tself.distance = distance\n-\n-\tdef setCollinear(self, boolean):\n-\t\tself.collinear = boolean\n-\n-\tdef setAntisense(self, boolean):\n-\t\tself.antisense = boolean\n-\n-\tdef setInvert(self, boolean):\n-\t\tself.invert = boolean\n-\n-\tdef setMinPercentOverlap(self, pcOverlapQuery, pcOverlapRe'..b'\t\t\tprint "# refs:     %d" % (self.nbRefs)\n-\t\t\tprint "# written:  %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)\n-\n-\tdef run(self):\n-\t\tself.loadRef()\n-\t\tself.compare()\n-\t\tself.displayResults()\n-\n-if __name__ == "__main__":\n-\t\n-\tdescription = "Compare Overlapping Small Reference v1.0.1: Provide the queries that overlap with a reference, when the reference is small. [Category: Data Comparison]"\n-\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--input1",\t        dest="inputFileName1", action="store",\t\t\t           type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n-\tparser.add_option("-f", "--format1",        dest="format1",\t\t  action="store",\t\t\t           type="string", help="format of previous file [compulsory] [format: transcript file format]")\n-\tparser.add_option("-j", "--input2",\t        dest="inputFileName2", action="store",\t\t\t           type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n-\tparser.add_option("-g", "--format2",        dest="format2",\t\t  action="store",\t\t\t           type="string", help="format of previous file [compulsory] [format: transcript file format]")\n-\tparser.add_option("-o", "--output",\t        dest="outputFileName", action="store",\t\t\t           type="string", help="output file [format: output file in GFF3 format]")\n-\tparser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,\t\t\t\t  help="also output not overlapping data [format: bool] [default: false]")\n-\tparser.add_option("-d", "--distance",\t\tdest="distance",\t   action="store",\t    default=0,\t   type="int",\t  help="accept some distance between query and reference [format: int]")\n-\tparser.add_option("-c", "--collinear",\t\tdest="collinear",\t   action="store_true", default=False,\t\t\t \t  help="provide collinear features [format: bool] [default: false]")\n-\tparser.add_option("-a", "--antisense",\t\tdest="antisense",\t   action="store_true", default=False,\t\t\t \t  help="provide antisense features [format: bool] [default: false]")\n-\tparser.add_option("-m", "--minOverlap",\t    dest="minOverlap",     action="store",      default=False, type="int",\t  help="min. #nt overlap [format: bool] [default: false]")\n-\tparser.add_option("-p", "--pcOverlapQuery",\tdest="pcOverlapQuery", action="store",      default=False, type="int",\t  help="min. % overlap of the query [format: bool] [default: false]")\n-\tparser.add_option("-P", "--pcOverlapRef",\tdest="pcOverlapRef",   action="store",      default=False, type="int",    help="min. % overlap of the reference [format: bool] [default: false]")\n-\tparser.add_option("-k", "--included",\t\tdest="included",\t   action="store_true", default=False,\t\t\t \t  help="provide query elements which are nested in reference elements [format: bool] [default: false]")\n-\tparser.add_option("-K", "--including",\t\tdest="including",\t   action="store_true", default=False,\t\t\t \t  help="provide query elements in which reference elements are nested [format: bool] [default: false]")\n-\tparser.add_option("-x", "--exclude",\t\tdest="exclude",\t\t   action="store_true", default=False,\t\t\t \t  help="invert the match [format: bool] [default: false]")\n-\tparser.add_option("-v", "--verbosity",      dest="verbosity",\t   action="store",      default=1,     type="int",\t  help="trace level [format: int]")\n-\t(options, args) = parser.parse_args()\n-\n-\tcosr = CompareOverlappingSmallRef(options.verbosity)\n-\tcosr.setQueryFile(options.inputFileName1, options.format1)\n-\tcosr.setReferenceFile(options.inputFileName2, options.format2)\n-\tcosr.setOutputFile(options.outputFileName)\n-\tcosr.includeNotOverlapping(options.notOverlapping)\n-\tcosr.setDistance(options.distance)\n-\tcosr.setAntisense(options.antisense)\n-\tcosr.setInclude(options.included, options.including)\n-\tcosr.setInvert(options.exclude)\n-\tcosr.setMinOverlap(options.minOverlap)\n-\tcosr.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)\n-\tcosr.run()\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ComputeCoverage.py
--- a/SMART/Java/Python/ComputeCoverage.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,142 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2011
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import os, random
-from optparse import OptionParser, OptionGroup
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.misc.Progress import Progress
-from commons.core.writer.Gff3Writer import Gff3Writer
-
-
-class CoverageComputer(object):
-
- def __init__(self, verbosity = 0):
- self.verbosity      = verbosity
- self.queryReader  = None
- self.referenceReader = None
- self.outputWriter  = None
- self.introns  = False
- self.nbNucleotides   = 0
- self.nbCovered      = 0
-
- def setInputQueryFile(self, fileName, format):
- self.queryReader = TranscriptContainer(fileName, format, self.verbosity-1)
-
- def setInputReferenceFile(self, fileName, format):
- self.referenceReader = TranscriptContainer(fileName, format, self.verbosity-1)
-
- def includeIntrons(self, boolean):
- self.introns = boolean
-
- def setOutputFileName(self, fileName, title="S-MART", feature="transcript", featurePart="exon"):
- self.outputWriter = Gff3Writer(fileName, self.verbosity-1)
- self.outputWriter.setTitle(title)
- self.outputWriter.setFeature(feature)
- self.outputWriter.setFeaturePart(featurePart)
-
- def readReference(self):
- self.coveredRegions = {}
- progress = Progress(self.referenceReader.getNbTranscripts(), "Reading reference file", self.verbosity-1)
- for transcript in self.referenceReader.getIterator():
- chromosome = transcript.getChromosome()
- if chromosome not in self.coveredRegions:
- self.coveredRegions[chromosome] = {}
- if self.introns:
- transcript.removeExons()
- for exon in transcript.getExons():
- for position in range(exon.getStart(), exon.getEnd()+1):
- self.coveredRegions[chromosome][position] = 1
- progress.inc()
- progress.done()
-
- def readQuery(self):
- progress = Progress(self.queryReader.getNbTranscripts(), "Reading query file", self.verbosity-1)
- for transcript in self.queryReader.getIterator():
- progress.inc()
- chromosome = transcript.getChromosome()
- if chromosome not in self.coveredRegions:
- continue
- if self.introns:
- transcript.removeExons()
- for exon in transcript.getExons():
- for position in range(exon.getStart(), exon.getEnd()+1):
- self.nbNucleotides += 1
- self.nbCovered     += self.coveredRegions[chromosome].get(position, 0)
- progress.done()
-
- def write(self):
- progress = Progress(self.queryReader.getNbTranscripts(), "Writing output file", self.verbosity-1)
- for transcript in self.queryReader.getIterator():
- chromosome = transcript.getChromosome()
- if self.introns:
- transcript.removeExons()
- size  = transcript.getSize()
- coverage = 0
- for exon in transcript.getExons():
- for position in range(exon.getStart(), exon.getEnd()+1):
- coverage += self.coveredRegions[chromosome].get(position, 0)
- transcript.setTagValue("coverage", 0 if size == 0 else float(coverage) / size * 100)
- self.outputWriter.addTranscript(transcript)
- progress.inc()
- progress.done()
-
- def sumUp(self):
- print "%d nucleotides in query, %d (%.f%%) covered" % (self.nbNucleotides, self.nbCovered, 0 if self.nbNucleotides == 0 else float(self.nbCovered) / self.nbNucleotides * 100)
-
- def run(self):
- self.readReference()
- self.readQuery()
- if self.outputWriter != None:
- self.write()
- self.sumUp()
-
-
-if __name__ == "__main__":
-
- # parse command line
- description = "Compute Coverage v1.0.1: Compute the coverage of a set with respect to another set. [Category: Personal]"
-
- parser = OptionParser(description = description)
- parser.add_option("-i", "--input1",    dest="inputFileName1", action="store",                     type="string", help="input query file [compulsory] [format: file in transcript format given by -f]")
- parser.add_option("-f", "--format1",   dest="format1",        action="store",                     type="string", help="format of the first file [compulsory] [format: transcript file format]")
- parser.add_option("-j", "--input2",    dest="inputFileName2", action="store",                     type="string", help="input reference file [compulsory] [format: file in transcript format given by -f]")
- parser.add_option("-g", "--format2",   dest="format2",        action="store",                     type="string", help="format of the second file [compulsory] [format: transcript file format]")
- parser.add_option("-t", "--introns",   dest="introns",        action="store_true", default=False,                help="also include introns [format: boolean] [default: false]")
- parser.add_option("-o", "--output",    dest="outputFileName", action="store",    default=None,  type="string", help="output file [format: output file in GFF3 format]")
- parser.add_option("-v", "--verbosity", dest="verbosity",   action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
- (options, args) = parser.parse_args()
-
- computer = CoverageComputer(options.verbosity)
- computer.setInputQueryFile(options.inputFileName1, options.format1)
- computer.setInputReferenceFile(options.inputFileName2, options.format2)
- computer.includeIntrons(options.introns)
- computer.setOutputFileName(options.outputFileName)
- computer.run()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/CountLoci.py
--- a/SMART/Java/Python/CountLoci.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,230 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2012\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os, os.path, random\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from commons.core.parsing.GffParser import GffParser\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.cleanGff import CleanGff\n-from SMART.Java.Python.CompareOverlapping import CompareOverlapping\n-from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n-from SMART.Java.Python.GetUpDownStream import GetUpDownStream\n-\n-REFERENCE = 0\n-QUERY = 1\n-\n-class CountLoci(object):\n-    \n-    def __init__(self, verbosity = 1):\n-        self.verbosity = verbosity\n-        self.tmpFileNames = []\n-\n-    def __del__(self):\n-        for fileName in self.tmpFileNames:\n-            if os.path.exists(fileName):\n-                os.remove(fileName)\n-    \n-    def setInputFile(self, fileName, format):\n-        self.inputFileName = fileName\n-        self.inputFormat = format\n-        self.parser = TranscriptContainer(fileName, format, self.verbosity-1)\n-        if self.verbosity > 0:\n-            print "%d elements in input" % (self.parser.getNbTranscripts())\n-\n-    def setReference(self, fileName):\n-        self.referenceFileName = fileName\n-\n-    def setDistance(self, distance):\n-        self.distance = distance\n-\n-    def setOutputFileName(self, fileName):\n-        self.outputFileName = fileName\n-        self.writer         = Gff3Writer(fileName, self.verbosity-1)\n-        self.outputBase     = "%s_%d_" % (os.path.splitext(fileName)[0], random.randint(0, 10000))\n-\n-    def _writeTmpRef(self, tags, outputFileName):\n-        cleanGff = CleanGff(self.verbosity-1)\n-        cleanGff.setInputFileName(self.referenceFileName)\n-        cleanGff.setOutputFileName(outputFileName)\n-        cleanGff.setAcceptedTypes(tags)\n-        cleanGff.run()\n-\n-    def _getReferenceFiles(self):\n-        self.referenceFiles = {"CDS":                       "%scds.gff3"      % (self.outputBase), \\\n-                               "five_prime_UTR":            "%sfive.gff3"     % (self.outputBase), \\\n-                               "three_prime_UTR":           "%sthree.gff3"    % (self.outputBase), \\\n-                               "mRNA":                      "%smrna.gff3"     % (sel'..b', "gff3", self.referenceFiles["transposable_element_gene"], "gff3", outputNoFileName, True)\n-        self._copy(outputFileName, "TE")\n-        if self.verbosity > 0:\n-            print "%d overlaps in TE" % (nbOverlaps)\n-        return outputNoFileName\n-    \n-    def _getIntron(self, inputFileName):\n-        outputFileName   = "%sin_intron.gff3" % (self.outputBase)\n-        outputNoFileName = "%sin_nointron.gff3" % (self.outputBase)\n-        self.tmpFileNames.extend([outputFileName, outputNoFileName])\n-        nbOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["mRNA"], "gff3", outputFileName)\n-        self._compare(inputFileName, "gff3", self.referenceFiles["mRNA"], "gff3", outputNoFileName, True)\n-        self._copy(outputFileName, "intron")\n-        if self.verbosity > 0:\n-            print "%d overlaps in introns" % (nbOverlaps)\n-        return outputNoFileName\n-    \n-    def _getVicinity(self, inputFileName):\n-        guds = GetUpDownStream(self.verbosity-1)\n-        guds.setInputFile(self.referenceFiles["mRNA"], "gff3")\n-        guds.setOutputFile(self.referenceFiles["vic"])\n-        guds.setDistances(self.distance, self.distance)\n-        guds.run()\n-        outputFileName = "%sout_vicinity.gff3" % (self.outputBase)\n-        outputNoFileName = "%sout_novicinity.gff3" % (self.outputBase)\n-        self.tmpFileNames.extend([outputFileName, outputNoFileName])\n-        nbOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["vic"], "gff3", outputFileName)\n-        nbNoOverlaps = self._compare(inputFileName, "gff3", self.referenceFiles["vic"], "gff3", outputNoFileName, True)\n-        self._copy(outputFileName, "vicinity")\n-        self._copy(outputNoFileName, "intergenic")\n-        if self.verbosity > 0:\n-            print "%d overlaps in vicinity" % (nbOverlaps)\n-            print "%d elsewhere" % (nbNoOverlaps)\n-    \n-    def run(self):\n-        self._getReferenceFiles()\n-        outputFileName = self._getCds()\n-        outputFileName = self._getFivePrime(outputFileName)\n-        outputFileName = self._getThreePrime(outputFileName)\n-        outputFileName = self._getNcRna(outputFileName)\n-        outputFileName = self._getTe(outputFileName)\n-        outputFileName = self._getIntron(outputFileName)\n-        self._getVicinity(outputFileName)\n-\n-\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Count Loci v1.0.0: Count input elements with respect to CDS, 5\' UTR, 3\' UTR, intron, downstream, upstream. [Category: Personal]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",            type="string", help="input file              [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format",    dest="format",         action="store",            type="string", help="format of the input     [compulsory] [format: transcript file format]")\n-    parser.add_option("-r", "--reference", dest="reference",      action="store",            type="string", help="reference file          [compulsory] [format: file in GFF format]")     \n-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",            type="string", help="output file             [compulsory] [format: output file in GFF3 format]")\n-    parser.add_option("-d", "--distance",  dest="distance",       action="store",            type="int",    help="distance up/down stream [compulsory] [format: output file in GFF3 format]")\n-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1, type="int",    help="trace level                          [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-    cl = CountLoci(options.verbosity)\n-    cl.setInputFile(options.inputFileName, options.format)\n-    cl.setDistance(options.distance)\n-    cl.setReference(options.reference)\n-    cl.setOutputFileName(options.outputFileName)\n-    cl.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/CountReadGCPercent.py
--- a/SMART/Java/Python/CountReadGCPercent.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import FastaParser
-from commons.core.writer.Gff3Writer import Gff3Writer
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.misc.Progress import Progress
-from commons.core.utils.RepetOptionParser import RepetOptionParser
-from Gnome_tools.CountGCPercentBySlidingWindow import CountGCPercentBySlidingWindow
-
-
-class CountReadGCPercent(object):
-    
-    def __init__(self):
-        self.referenceReader = None
-        self.gffReader = None
-        self.outputWriter = None
-        self.verbose = 0
-        
-    def setInputReferenceFile(self, fileName):
-        self.referenceReader = fileName

-    def setInputGffFile(self, fileName):
-        self.gffReader = TranscriptContainer(fileName, 'gff3', self.verbose)
-        
-    def setOutputFileName(self, fileName):
-        self.outputWriter = Gff3Writer(fileName, self.verbose)
-
-    def readGffAnnotation(self):
-        self.coveredRegions = {}
-        progress = Progress(self.gffReader.getNbTranscripts(), "Reading gff3 annotation file", self.verbose)
-        for transcript in self.gffReader.getIterator():
-            chromosome = transcript.getChromosome()
-            if chromosome not in self.coveredRegions:
-                self.coveredRegions[chromosome] = {}
-            for exon in transcript.getExons():
-                for position in range(exon.getStart(), exon.getEnd()+1):
-                    self.coveredRegions[chromosome][position] = 1
-            progress.inc()
-        progress.done()
-        
-    def write(self):
-        iParser = FastaParser(self.referenceReader)
-        iParser.setTags()
-        iGetGCPercentBySW = CountGCPercentBySlidingWindow()
-        progress = Progress(self.gffReader.getNbTranscripts(), "Writing output file", self.verbose)
-        for transcript in self.gffReader.getIterator():
-            chromosome = transcript.getChromosome()
-            GCpercent = 0
-            nPercent = 0
-            for exon in transcript.getExons():
-                    for sequenceName in iParser.getTags().keys():
-                        if sequenceName != chromosome:
-                            continue
-                        else:
-                            subSequence = iParser.getSubSequence(sequenceName, exon.getStart() , exon.getEnd(), 1)
-                            GCpercent, nPercent = iGetGCPercentBySW.getGCPercentAccordingToNAndNPercent(subSequence)
-                            print "GCpercent = %f, nPercent = %f" % (GCpercent, nPercent)
-            transcript.setTagValue("GCpercent", GCpercent)
-            transcript.setTagValue("NPercent", nPercent)
-            self.outputWriter.addTranscript(transcript)
-            progress.inc()
-        progress.done()

-    def run(self):
-        self.readGffAnnotation()
-        if self.outputWriter != None:
-            self.write()
-            
-if __name__ == "__main__":
-        description = "Count GC percent for each read against a genome."
-        usage = "CountReadGCPercent.py -i <fasta file> -j <gff3 file> -o <output gff3 file> -v <verbose> -h]"
-        examples = "\nExample: \n"
-        examples += "\t$ python CountReadGCPercent.py -i file.fasta -j annotation.gff -o output.gff3"
-        examples += "\n\n"
-        parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples)
-        parser.add_option( '-i', '--inputGenome', dest='fastaFile', help='fasta file [compulsory]', default= None )
-        parser.add_option( '-j', '--inputAnnotation', dest='gffFile', help='gff3 file [compulsory]', default= None)
-        parser.add_option( '-o', '--output', dest='outputFile', help='output gff3 file [compulsory]', default= None )
-        parser.add_option( '-v', '--verbose', dest='verbose', help='verbosity level (default=0/1)',type="int", default= 0 )
-        (options, args) = parser.parse_args()
-    
-        readGCPercent = CountReadGCPercent()
-        readGCPercent.setInputReferenceFile(options.fastaFile)
-        readGCPercent.setInputGffFile(options.gffFile)
-        readGCPercent.setOutputFileName(options.outputFile)
-        readGCPercent.run()
-        
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/FindOverlapsOptim.py
--- a/SMART/Java/Python/FindOverlapsOptim.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,343 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2012\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-\n-import os, struct, time, shutil\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.ncList.NCList import NCList\n-from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList\n-from SMART.Java.Python.ncList.NCListParser import NCListParser\n-from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n-from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n-from SMART.Java.Python.ncList.NCListHandler import NCListHandler\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-try:\n-   import cPickle as pickle\n-except:\n-   import pickle\n-\n-REFERENCE = 0\n-QUERY = 1\n-TYPES = (REFERENCE, QUERY)\n-TYPETOSTRING = {0: "reference", 1: "query"}\n-\n-class FindOverlapsOptim(object):\n-\t\n-\tdef __init__(self, verbosity = 1):\n-\t\tself._parsers\t\t\t\t  = {}\n-\t\tself._sortedFileNames\t\t  = {}\n-\t\tself._outputFileName\t\t  = "outputOverlaps.gff3"\n-\t\tself._iWriter\t\t\t\t  = None\n-\t\tself._inputFileNames\t\t  = {REFERENCE: None,  QUERY: None}\n-\t\tself._convertedFileNames      = {REFERENCE: False, QUERY: False}\n-\t\tself._inputFileFormats\t\t  = {REFERENCE: None,  QUERY: None}\n-\t\tself._converted\t\t\t      = {REFERENCE: False, QUERY: False}\n-\t\tself._ncListHandlers          = {REFERENCE: None,  QUERY: None}\n-\t\tself._splittedFileNames\t      = {REFERENCE: {},\tQUERY: {}}\n-\t\tself._nbOverlappingQueries\t  = 0\n-\t\tself._nbOverlaps\t\t\t  = 0\n-\t\tself._nbLines\t\t\t\t  = {REFERENCE: 0, QUERY: 0}\n-\t\tself._sorted                  = False\n-\t\tself._index                   = False\n-\t\tself._verbosity\t\t\t      = verbosity\n-\t\tself._ncLists\t\t\t\t  = {}\n-\t\tself._cursors\t\t\t\t  = {}\n-\t\tself._nbElementsPerChromosome = {}\n-\t\tself._tmpDirectories\t\t  = {REFERENCE: False, QUERY: False}\n-\t\t\n-\tdef close(self):\n-\t\tself._iWriter.close()\n-\t\tfor fileName in (self._sortedFileNames.values()):\n-\t\t\tif os.path.exists(fileName):\n-\t\t\t\tos.remove(fileName)\n-\t\tfor fileName in self._convertedFileNames.values():\n-\t\t\tif fileName:\n-\t\t\t\tos.remove(fileName)\n-\t\t\n-\tdef setRefFileName(self, fileName, format):\n-\t\tself.setFileName(fileName, format, REFERENCE)\n-\t\t\n-\tdef setQueryFileName(self, fileName, format):\n-\t\tself.setFileName(fileName, format, QUERY)\n-\n-\tdef se'..b'def isOverlapping(self, queryTranscript, refTranscript):\n-\t\tif (queryTranscript.getStart() <= refTranscript.getEnd() and queryTranscript.getEnd() >= refTranscript.getStart()):\n-\t\t\treturn 0   \n-\t\tif queryTranscript.getEnd() < refTranscript.getStart():\n-\t\t\treturn 1\n-\t\treturn -1\n-\n-\tdef checkIndex(self, transcript, cursor):\n-\t\tif not self._index:\n-\t\t\treturn None\n-\t\tchromosome = transcript.getChromosome()\n-\t\tnextLIndex = self._indices[REFERENCE][chromosome].getIndex(transcript)\n-\t\tif nextLIndex == None:\n-\t\t\treturn None\n-\t\tncList\t\t = self._ncLists[REFERENCE][chromosome]\n-\t\tnextGffAddress = ncList.getRefGffAddr(nextLIndex)\n-\t\tthisGffAddress = cursor.getGffAddress()\n-\t\tif nextGffAddress > thisGffAddress:\n-\t\t\treturn nextLIndex\n-\t\treturn None\n-\t\t\n-\tdef _writeIntervalInNewGFF3(self, transcript, names):\n-\t\tnbOverlaps = 0\n-\t\tfor cpt in names.values():\n-\t\t\tnbOverlaps += cpt\n-\t\tif not names:\n-\t\t\treturn\n-\t\ttranscript.setTagValue("overlapsWith", "--".join(sorted(names.keys())))\n-\t\ttranscript.setTagValue("nbOverlaps", nbOverlaps)\n-\t\tself._iWriter.addTranscript(transcript)\n-\t\tself._iWriter.write()\n-\t\tself._nbOverlappingQueries += 1\n-\t\tself._nbOverlaps\t\t   += nbOverlaps\n-\t\t\n-\tdef _extractID(self, transcript):\n-\t\tnbElements = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1\n-\t\tid\t\t   = transcript.getTagValue("ID")\t\t\t\t if "ID"\t\t in transcript.getTagNames() else transcript.getUniqueName()\n-\t\treturn {id: nbElements}\n-\t\t\n-\tdef run(self):\n-\t\tself.createNCLists()\n-\t\tself.compare()\n-\t\tself.close()\n-\t\tif self._verbosity > 0:\n-\t\t\tprint "# queries: %d" % (self._nbLines[QUERY])\n-\t\t\tprint "# refs:    %d" % (self._nbLines[REFERENCE])\n-\t\t\tprint "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps)\n-\t\t\tprint "time:      %.2gs" % (self._timeSpent)\n-\n-\n-if __name__ == "__main__":\n-\tdescription = "Find Overlaps Optim v1.0.0: Finds overlaps with several query intervals. [Category: Data Comparison]"\n-\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--query",\t     dest="inputQueryFileName", action="store",\t\t\t            type="string", help="query input file [compulsory] [format: file in transcript or other format given by -f]")\n-\tparser.add_option("-f", "--queryFormat", dest="queryFormat",\t\taction="store",\t\t\t            type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")\n-\tparser.add_option("-j", "--ref",\t\t dest="inputRefFileName",   action="store",\t\t\t            type="string", help="reference input file [compulsory] [format: file in transcript or other format given by -g]")\n-\tparser.add_option("-g", "--refFormat",   dest="refFormat",\t\t    action="store",\t\t\t            type="string", help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")\n-\tparser.add_option("-o", "--output",\t     dest="outputFileName",\t    action="store",\t\t\t            type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-\tparser.add_option("-d", "--index",\t     dest="index",\t            action="store_true", default=False,\t               help="add an index to the reference file (faster but more memory) [format: boolean] [default: False]")\n-\tparser.add_option("-s", "--sorted",\t     dest="sorted",\t            action="store_true", default=False,\t               help="input files are already sorted [format: boolean] [default: False]")\n-\tparser.add_option("-v", "--verbosity",   dest="verbosity",\t\t    action="store",      default=1,     type="int",\t   help="Trace level [format: int] [default: 1]")\n-\t(options, args) = parser.parse_args()\n-\t\n-\tiFOO = FindOverlapsOptim(options.verbosity)\n-\tiFOO.setRefFileName(options.inputRefFileName, options.refFormat)\n-\tiFOO.setQueryFileName(options.inputQueryFileName, options.queryFormat)\n-\tiFOO.setOutputFileName(options.outputFileName)\n-\tiFOO.setIndex(options.index)\n-\tiFOO.setSorted(options.sorted)\n-\tiFOO.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/GetDifferentialExpression.py
--- a/SMART/Java/Python/GetDifferentialExpression.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,441 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""Get the differential expression between 2 conditions (2 files), on regions defined by a third file"""\n-\n-import os, re\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.misc import Utils\n-from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection\n-from SMART.Java.Python.structure.Transcript import Transcript\n-\n-class GetDifferentialExpression(object):\n-    \n-    def __init__(self, verbosity = 1):\n-        self.verbosity              = verbosity\n-        self.mySqlConnection        = MySqlConnection(verbosity)\n-        self.inputs                 = (0, 1)\n-        self.transcriptContainers   = [None, None]\n-        self.transcriptContainerRef = None\n-        self.outputFileName         = None\n-        self.writer                 = None\n-        self.tables                 = [None, None]\n-        self.nbElements             = [0, 0]\n-\n-        self.regionsToValues = {}\n-        self.regionsToNames  = {}\n-        self.valuesToPvalues = {}\n-\n-        self.oriented                      = True\n-        self.simpleNormalization           = False\n-        self.simpleNormalizationParameters = None\n-        self.adjustedNormalization         = False\n-        self.fixedSizeFactor               = None\n-        self.normalizationSize             = None\n-        self.normalizationFactors          = [1, 1]\n-        self.fdr                           = None \n-        self.fdrPvalue                     = None \n-\n-        self.plot    = False\n-        self.plotter = None\n-        self.plotterName = None\n-        self.points  = {}\n-\n-\n-    def setInputFile(self, i, fileName, fileFormat):\n-        self.transcriptContainers[i] = TranscriptContainer(fileName, fileFormat, self.verbosity)\n-        self.transcriptContainers[i].mySqlConnection = self.mySqlConnection\n-\n-\n-    def setReferenceFile(self, fileName, fileFormat):\n-        self.transcriptContainerRef = TranscriptContainer(fileName, fileFormat, self.verbosity)\n-        self.transcriptContainerRef.mySqlConnection = self.mySqlConnection\n-\n-\n-    def setOutputFile(self, fileName):\n-        self.outputFileName = fileName\n-        self.writer         = Gff3Writer(fileName, self.verbosity)\n-\n-    \n-    def setOriented(self'..b' file in transcript format given by -f]")\n-    parser.add_option("-f", "--format1",          dest="format1",           action="store",                     type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n-    parser.add_option("-j", "--input2",           dest="inputFileName2",    action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n-    parser.add_option("-g", "--format2",          dest="format2",           action="store",                     type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n-    parser.add_option("-k", "--reference",        dest="referenceFileName", action="store",                     type="string", help="reference file [compulsory] [format: file in transcript format given by -l]")\n-    parser.add_option("-l", "--referenceFormat",  dest="referenceFormat",   action="store",                     type="string", help="format of reference file [compulsory] [format: transcript file format]")\n-    parser.add_option("-o", "--output",           dest="outputFileName",    action="store",                     type="string", help="output file [format: output file in gff3 format]")\n-    parser.add_option("-n", "--notOriented",      dest="notOriented",       action="store_true", default=False,                help="if the reads are not oriented [default: False] [format: bool]")\n-    parser.add_option("-s", "--simple",           dest="simple",            action="store_true", default=False,                help="normalize using the number of reads in each condition [format: bool]")\n-    parser.add_option("-S", "--simpleParameters", dest="simpleParameters",  action="store",      default=None,  type="string", help="provide the number of reads [format: bool]")\n-    parser.add_option("-a", "--adjusted",         dest="adjusted",          action="store_true", default=False,                help="normalize using the number of reads of \'mean\' regions [format: bool]")\n-    parser.add_option("-x", "--fixedSizeFactor",  dest="fixedSizeFactor",   action="store",      default=None,  type="int",    help="give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization) [format: int]")\n-    parser.add_option("-d", "--fdr",              dest="fdr",               action="store",      default=None,  type="float",  help="use FDR [format: float]")\n-    parser.add_option("-p", "--plot",             dest="plotName",          action="store",      default=None,  type="string", help="plot cloud plot [format: output file in PNG format]")\n-    parser.add_option("-v", "--verbosity",        dest="verbosity",         action="store",      default=1,     type="int",    help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-\n-        \n-    differentialExpression = GetDifferentialExpression(options.verbosity)\n-    differentialExpression.setInputFile(0, options.inputFileName1, options.format1)\n-    differentialExpression.setInputFile(1, options.inputFileName2, options.format2)\n-    differentialExpression.setReferenceFile(options.referenceFileName, options.referenceFormat)\n-    differentialExpression.setOutputFile(options.outputFileName)\n-    if options.plotName != None :\n-        differentialExpression.setPlotterName(options.plotName)\n-        differentialExpression.setPlotter()\n-    differentialExpression.setOriented(not options.notOriented)\n-    differentialExpression.setSimpleNormalization(options.simple)\n-    differentialExpression.setSimpleNormalizationParameters(options.simpleParameters)\n-    differentialExpression.setAdjustedNormalization(options.adjusted)\n-    differentialExpression.setFixedSizeNormalization(options.fixedSizeFactor)\n-    differentialExpression.setFdr(options.fdr)\n-    differentialExpression.getDifferentialExpression()\n-    differentialExpression.mySqlConnection.deleteDatabase()\n-    \n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/GetDistribution.py
--- a/SMART/Java/Python/GetDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,362 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2012\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.parsing.FastaParser import FastaParser\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.misc.MultipleRPlotter import MultipleRPlotter\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-from SMART.Java.Python.misc.Progress import Progress\n-\n-TWOSTRANDS = {True: [1, -1], False: [0]}\n-STRANDTOSTR = {1: "(+)", -1: "(-)", 0: ""}\n-\n-class GetDistribution(object):\n-\n-\tdef __init__(self, verbosity):\n-\t\tself.verbosity     = verbosity\n-\t\tself.sizes         = None\n-\t\tself.twoStrands    = False\n-\t\tself.start         = 1\n-\t\tself.names         = ["nbElements"]\n-\t\tself.average       = False\n-\t\tself.nbValues      = {}\n-\t\tself.height        = 300\n-\t\tself.width         = 600\n-\t\tself.colors        = None\n-\t\tself.gffFileName   = None\n-\t\tself.csvFileName   = None\n-\t\tself.yMin          = None\n-\t\tself.yMax          = None\n-\t\tself.chromosome    = None\n-\t\tself.merge         = False\n-\t\tself.nbTranscripts = None\n-\n-\tdef setInputFile(self, fileName, format):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tself.parser = chooser.getParser(fileName)\n-\n-\tdef setReferenceFile(self, fileName):\n-\t\tif fileName == None:\n-\t\t\treturn\n-\t\tfastaParser = FastaParser(fileName, self.verbosity)\n-\t\tself.chromosomes = fastaParser.getRegions()\n-\t\tself.sizes       = dict([region, fastaParser.getSizeOfRegion(region)] for region in self.chromosomes)\n-\t\tself.maxSize     = max(self.sizes.values())\n-\n-\tdef setRegion(self, chromosome, start, end):\n-\t\tif chromosome == None:\n-\t\t\treturn\n-\t\tself.maxSize     = options.end\n-\t\tself.sizes       = {chromosome: end}\n-\t\tself.chromosomes = [chromosome]\n-\t\tself.chromosome  = chromosome\n-\t\tself.start       = start\n-\t\tself.end         = end\n-\n-\tdef setOutputFile(self, fileName):\n-\t\tself.outputFileName = fileName\n-\n-\tdef setNbBins(self, nbBins):\n-\t\tself.nbBins = nbBins\n-\n-\tdef set2Strands(self, twoStrands):\n-\t\tself.twoStrands = twoStrands\n-\n-\tdef setNames(self, names):\n-\t\tself.names = names\n-\n-\tdef setAverage(self, average):\n-\t\tself.average = average\n-\n-\tdef setNormalization(self, normalization):\n-\t\tself.normalization = normalization\n-\t\n-\tdef setImageSize(self, height, width):\n-\t\tself.height = height\n-\t\tself.width '..b'        action="store",      default=1000,         type="int",    help="number of bins [default: 1000] [format: int]")\n-\tparser.add_option("-2", "--bothStrands", dest="bothStrands",       action="store_true", default=False,                       help="plot one curve per strand [format: bool] [default: false]")\n-\tparser.add_option("-c", "--chromosome",  dest="chromosome",        action="store",      default=None,         type="string", help="plot only a chromosome [format: string]")\n-\tparser.add_option("-s", "--start",       dest="start",             action="store",      default=None,         type="int",    help="start from a given region [format: int]")\n-\tparser.add_option("-e", "--end",         dest="end",               action="store",      default=None,         type="int",    help="end from a given region [format: int]")\n-\tparser.add_option("-y", "--yMin",        dest="yMin",              action="store",      default=None,         type="int",    help="minimum value on the y-axis to plot [format: int]")\n-\tparser.add_option("-Y", "--yMax",        dest="yMax",              action="store",      default=None,         type="int",    help="maximum value on the y-axis to plot [format: int]")\n-\tparser.add_option("-x", "--csv",         dest="csv",               action="store",      default=None,                        help="write a .csv file [format: output file in CSV format] [default: None]")\n-\tparser.add_option("-g", "--gff",         dest="gff",               action="store",      default=None,                        help="also write GFF3 file [format: output file in GFF format] [default: None]")\n-\tparser.add_option("-H", "--height",      dest="height",            action="store",      default=300,          type="int",    help="height of the graphics [format: int] [default: 300]")\n-\tparser.add_option("-W", "--width",       dest="width",             action="store",      default=600,          type="int",    help="width of the graphics [format: int] [default: 1000]")\n-\tparser.add_option("-a", "--average",     dest="average",           action="store_true", default=False,                       help="plot average (instead of sum) [default: false] [format: boolean]")\n-\tparser.add_option("-n", "--names",       dest="names",             action="store",      default="nbElements", type="string", help="name for the tags (separated by commas and no space) [default: None] [format: string]")\n-\tparser.add_option("-l", "--color",       dest="colors",            action="store",      default=None,         type="string", help="color of the lines (separated by commas and no space) [format: string]")\n-\tparser.add_option("-z", "--normalize",   dest="normalize",         action="store_true", default=False,                       help="normalize data (when panels are different) [format: bool] [default: false]")\n-\tparser.add_option("-m", "--merge",       dest="mergePlots",        action="store_true", default=False,                       help="merge all plots in one figure [format: bool] [default: false]")\n-\tparser.add_option("-v", "--verbosity",   dest="verbosity",         action="store",      default=1,            type="int",    help="trace level [default: 1] [format: int]")\n-\t(options, args) = parser.parse_args()\n-\n-\tgt = GetDistribution(options.verbosity)\n-\tgt.setInputFile(options.inputFileName, options.format)\n-\tgt.setOutputFile(options.outputFileName)\n-\tgt.setReferenceFile(options.referenceFileName)\n-\tgt.setNbBins(int(options.nbBins))\n-\tgt.set2Strands(options.bothStrands)\n-\tgt.setRegion(options.chromosome, options.start, options.end)\n-\tgt.setNormalization(options.normalize)\n-\tgt.setAverage(options.average)\n-\tgt.setYLimits(options.yMin, options.yMax)\n-\tgt.writeCsv(options.csv)\n-\tgt.writeGff(options.gff)\n-\tgt.setImageSize(options.height, options.width)\n-\tgt.setNames(options.names.split(","))\n-\tgt.setColors(None if options.colors == None else options.colors.split(","))\n-\tgt.setNormalization(options.normalize)\n-\tgt.mergePlots(options.mergePlots)\n-\tgt.run()\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/GetFlanking.py
--- a/SMART/Java/Python/GetFlanking.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,233 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2011\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.misc.Progress import Progress\n-\n-QUERY        = 0\n-REFERENCE    = 1\n-INPUTS       = (QUERY, REFERENCE)\n-STRANDS      = (-1, 1)\n-TAG_DISTANCE = "distance_"\n-TAG_SENSE    = "_sense"\n-TAG_REGION   = "_region"\n-TAGS_REGION  = {-1: "_upstream", 0: "", 1: "_downstream"}\n-TAGS_RREGION = {-1: "upstream", 0: "overlapping", 1: "downstream"}\n-TAGS_SENSE   = {-1: "antisense", 0: "", 1: "collinear"}\n-STRANDSTOSTR = {-1: "(-)", 0: "", 1: "(+)"}\n-\n-\n-def getOrderKey(transcript, direction, input):\n-\tif direction == 1:\n-\t\tif input == QUERY:\n-\t\t\treturn (transcript.getEnd(), -transcript.getStart())\n-\t\treturn (transcript.getStart(), -transcript.getEnd())\n-\tif input == QUERY:\n-\t\treturn (-transcript.getStart(), transcript.getEnd())\n-\treturn (-transcript.getEnd(), transcript.getStart())\n-\n-\n-class GetFlanking(object):\n-\n-\tdef __init__(self, verbosity):\n-\t\tself.verbosity   = verbosity\n-\t\tself.transcripts = dict([id, {}] for id in INPUTS)\n-\t\tself.directions  = []\n-\t\tself.noOverlap   = False\n-\t\tself.colinear    = False\n-\t\tself.antisense   = False\n-\t\tself.distance    = None\n-\t\tself.minDistance = None\n-\t\tself.maxDistance = None\n-\t\tself.tagName     = "flanking"\n-\n-\tdef setInputFile(self, fileName, format, id):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tparser = chooser.getParser(fileName)\n-\t\tfor transcript in parser.getIterator():\n-\t\t\tchromosome = transcript.getChromosome()\n-\t\t\tif chromosome not in self.transcripts[id]:\n-\t\t\t\tself.transcripts[id][chromosome] = []\n-\t\t\tself.transcripts[id][chromosome].append(transcript)\n-\n-\tdef setOutputFile(self, fileName):\n-\t\tself.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n-\n-\tdef addUpstreamDirection(self, upstream):\n-\t\tif upstream:\n-\t\t\tself.directions.append(-1)\n-\n-\tdef addDownstreamDirection(self, downstream):\n-\t\tif downstream:\n-\t\t\tself.directions.append(1)\n-\n-\tdef setColinear(self, colinear):\n-\t\tself.colinear = colinear\n-\n-\tdef setAntisense(self, antisense):\n-\t\tself.antisense = antisense\n-\n-\tdef setNoOverlap(self, noOverlap):\n-\t\tself.noOverlap = noOverlap\n-\n-\tdef setMinDistance(self, distance):\n-\t\tself.minDistance = distance\n-\n-\tdef setMaxDistance(se'..b'scriptRef: transcriptQuery.getDistance(transcriptRef))[0]\n-\t\t\t\tself.writer.addTranscript(self.setTags(transcriptQuery, transcriptRef, 0))\n-\t\t\tprogress.inc()\n-\t\tprogress.done()\n-\n-\tdef run(self):\n-\t\tfor chromosome in sorted(self.transcripts[QUERY].keys()):\n-\t\t\tself.flankings = dict([query, {}] for query in self.transcripts[QUERY][chromosome])\n-\t\t\tfor direction in STRANDS:\n-\t\t\t\t#print "comparison", chromosome, direction\n-\t\t\t\tself.getFlanking(chromosome, direction)\n-\t\t\tself.write()\n-\t\tself.writer.close()\n-\n-if __name__ == "__main__":\n-\t\n-\tdescription = "Get Flanking v1.0.1: Get the flanking regions of a set of reference. [Category: Data Selection]"\n-\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--input1",      dest="inputFileName1", action="store",                          type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n-\tparser.add_option("-f", "--format1",     dest="format1",        action="store",                          type="string", help="format of previous file [compulsory] [format: transcript file format]")\n-\tparser.add_option("-j", "--input2",      dest="inputFileName2", action="store",                          type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n-\tparser.add_option("-g", "--format2",     dest="format2",        action="store",                          type="string", help="format of previous file [compulsory] [format: transcript file format]")\n-\tparser.add_option("-5", "--upstream",    dest="upstream",       action="store_true", default=False,                     help="output upstream elements [format: boolean] [default: False]")\n-\tparser.add_option("-3", "--downstream",  dest="downstream",     action="store_true", default=False,                     help="output downstream elements [format: boolean] [default: False]")\n-\tparser.add_option("-c", "--colinear",    dest="colinear",       action="store_true", default=False,                     help="find first colinear element [format: boolean] [default: False]")\n-\tparser.add_option("-a", "--antisense",   dest="antisense",      action="store_true", default=False,                     help="find first anti-sense element [format: boolean] [default: False]")\n-\tparser.add_option("-e", "--noOverlap",   dest="noOverlap",      action="store_true", default=False,                     help="do not consider elements which are overlapping reference elements [format: boolean] [default: False]")\n-\tparser.add_option("-d", "--minDistance", dest="minDistance",    action="store",      default=None,       type="int",    help="minimum distance between 2 elements [format: int]")\n-\tparser.add_option("-D", "--maxDistance", dest="maxDistance",    action="store",      default=None,       type="int",    help="maximum distance between 2 elements [format: int]")\n-\tparser.add_option("-t", "--tag",         dest="tagName",        action="store",      default="flanking", type="string", help="name of the new tag [format: string] [default: flanking]")\n-\tparser.add_option("-o", "--output",      dest="outputFileName", action="store",                          type="string", help="output file [format: output file in GFF3 format]")\n-\tparser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,          type="int",    help="trace level [format: int]")\n-\t(options, args) = parser.parse_args()\n-\n-\tgf = GetFlanking(options.verbosity)\n-\tgf.setInputFile(options.inputFileName1, options.format1, QUERY)\n-\tgf.setInputFile(options.inputFileName2, options.format2, REFERENCE)\n-\tgf.setOutputFile(options.outputFileName)\n-\tgf.addUpstreamDirection(options.upstream)\n-\tgf.addDownstreamDirection(options.downstream)\n-\tgf.setColinear(options.colinear)\n-\tgf.setAntisense(options.antisense)\n-\tgf.setNoOverlap(options.noOverlap)\n-\tgf.setMinDistance(options.minDistance)\n-\tgf.setMaxDistance(options.maxDistance)\n-\tgf.setNewTagName(options.tagName)\n-\tgf.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/GetFlanking.pyc
b
Binary file SMART/Java/Python/GetFlanking.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/GetRandomSubset.py
--- a/SMART/Java/Python/GetRandomSubset.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,96 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2011
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import random
-from optparse import OptionParser
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.misc.Progress import Progress
-
-class GetRandomSubset(object):
-
-    def __init__(self, verbosity):
-        self.verbosity = verbosity
-
-    def setInputFile(self, fileName, format):
-        chooser = ParserChooser(self.verbosity)
-        chooser.findFormat(format)
-        self.parser = chooser.getParser(fileName)
-
-    def setNumber(self, number, percent):
-        if number != None:
-            self.number = number
-        elif percent != None:
-            self.number = int(float(percent) / 100 * self.parser.getNbTranscripts())
-        else:
-            raise Exception("Error! Number of elements to output is not given!")
-
-    def setOutputFile(self, fileName):
-        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
-
-    def chooseElements(self):
-        self.randomIndices = random.sample(range(self.parser.getNbTranscripts()), self.number)
-        
-    def run(self):
-        self.chooseElements()
-        progress  = Progress(self.parser.getNbTranscripts(), "Reading input file", self.verbosity)
-        nbWritten = 0
-        for cpt1, transcript in enumerate(self.parser.getIterator()):
-            if cpt1 in self.randomIndices:
-                self.writer.addTranscript(transcript)
-                nbWritten += 1
-            progress.inc()
-        self.writer.write()
-        self.writer.close()
-        progress.done()
-        if self.verbosity > 1:
-            print "%d transcripts read" % (self.parser.getNbTranscripts())
-            print "%d transcripts written" % (nbWritten)
-
-
-if __name__ == "__main__":
-    
-    description = "Get Random Subset v1.0.1: Get a random sub-set of a list of genomic coordinates. [Category: Personal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of file [compulsory] [format: transcript file format]")
-    parser.add_option("-n", "--number",    dest="number",         action="store", default=None, type="string", help="number of elements to output [format: int]")
-    parser.add_option("-p", "--percent",   dest="percent",        action="store", default=None, type="string", help="percentage of elements to output (between 0 and 100) [format: int]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [format: output file in GFF3 format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int", help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    grs = GetRandomSubset(options.verbosity)
-    grs.setInputFile(options.inputFileName, options.format)
-    grs.setNumber(options.number, options.percent)
-    grs.setOutputFile(options.outputFileName)
-    grs.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/GetReadDistribution.py
--- a/SMART/Java/Python/GetReadDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,303 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import random, os, glob, subprocess\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.parsing.GffParser import GffParser\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc import Utils\n-from commons.core.LoggerFactory import LoggerFactory\n-from commons.core.utils.RepetOptionParser import RepetOptionParser\n-\n-LOG_DEPTH      = "smart"\n-DEFAULT_REGION = "_all_"\n-MULTIPLE_STR   = {1: "", 1000: " (in kbp)", 1000000: " (in Gbp)"}\n-\n-class GetReadDistribution(object):\n-\n-\tdef __init__(self, verbosity = 0):\n-\t\tself.xLab         = ""\n-\t\tself.yLab         = "# reads"\n-\t\tself.verbosity    = verbosity\n-\t\tself.number       = random.randint(0, 100000)\n-\t\tself.log          = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)\n-\t\tself.parsers      = {}\n-\t\tself.distribution = {}\n-\t\tself.factors      = {}\n-\t\tself.regions      = None\n-\t\tself.tmpDatName   = None\n-\t\tself.tmpRName     = None\n-\t\tself.quorum       = 1\n-\t\tself.strands      = False\n-\t\tself.width        = 800\n-\t\tself.height       = 300\n-\t\tself.arial        = False\n-\n-\tdef setNames(self, names):\n-\t\tself.names = names\n-\n-\tdef setInputFiles(self, fileNames, format):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tfor cpt, fileName in enumerate(fileNames):\n-\t\t\tself.parsers[self.names[cpt]] = chooser.getParser(fileName)\n-\n-\tdef setOutputFileName(self, fileName):\n-\t\tself.outputFileName = fileName\n-\n-\tdef setLabs(self, xLab, yLab):\n-\t\tself.xLab = xLab\n-\t\tself.yLab = yLab\n-\n-\tdef setBinSize(self, binSize):\n-\t\tself.binSize = binSize\n-\n-\tdef setColors(self, colors):\n-\t\tself.colors = colors\n-\n-\tdef setFactors(self, factors):\n-\t\tif factors == None:\n-\t\t\tself.factors = dict([name, 1.0] for name in self.names)\n-\t\telse:\n-\t\t\tself.factors = dict(zip(self.names, factors))\n-\n-\tdef setMultiple(self, boolean):\n-\t\tself.multiple = boolean\n-\t\n-\tdef setImageSize(self, width, height):\n-\t\tif width != None:\n-\t\t\tself.width = width\n-\t\tif height != None:\n-\t\t\tself.height = height\n-\n-\tdef setQuorum(self, quorum):\n-\t\tself.quorum = quorum\n-\n-\tdef setRegionsFile(self, fileName):\n-\t\tif fileName != None:\n-\t\t\tself._loadRegions(fileName)\n-\n-\tdef setBothStrands(self, strands):\n-\t\tself.strands = strands\n-\n-\tdef setArial(self, arial):\n-\t\tself.arial = arial\n-\n-\tdef _checkOptions(self):\n-\t\tif not self.parsers:\n-\t\t\tse'..b' separated by commas [compulsory] [format: string]")\n-\tparser.add_option("-f", "--format",    dest="format",          action="store",      default=None,      type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")\n-\tparser.add_option("-n", "--names",     dest="names",           action="store",      default=None,      type="string", help="name of the input data, separated by commas [compulsory] [format: string]")\n-\tparser.add_option("-o", "--output",    dest="outputFileName",  action="store",      default=None,      type="string", help="output file [format: output file in PNG format]")\n-\tparser.add_option("-s", "--binSize",   dest="binSize",         action="store",      default=10000,     type="int",    help="bin size [format: int] [default: 10000]")\n-\tparser.add_option("-l", "--xLabel",    dest="xLab",            action="store",      default="",        type="string", help="x-axis label name [format: string]")\n-\tparser.add_option("-L", "--yLabel",    dest="yLab",            action="store",      default="# reads", type="string", help="y-axis label name [format: string] [default: Reads]")\n-\tparser.add_option("-c", "--colors",    dest="colors",          action="store",      default=None,      type="string", help="colors of the bars, separated by commas  [format: string]")\n-\tparser.add_option("-a", "--factors",   dest="factors",         action="store",      default=None,      type="string", help="normalization factors, separated by commas  [format: string]")\n-\tparser.add_option("-r", "--regions",   dest="regionsFileName", action="store",      default=None,      type="string", help="regions to plot [format: transcript file in GFF format]")\n-\tparser.add_option("-2", "--strands",   dest="strands",         action="store_true", default=False,                    help="plot negative strands on the negative x-axis [format: boolean] [default: False]")\n-\tparser.add_option("-m", "--multiple",  dest="multiple",        action="store_true", default=False,                    help="use human readable genomic positions (k, G) [format: boolean] [default: False]")\n-\tparser.add_option("-q", "--quorum",    dest="quorum",          action="store",      default=1,         type="int",    help="minimum number of intervals to plot a region [format: int] [default: 1]")\n-\tparser.add_option("-z", "--width",     dest="width",           action="store",      default=800,       type="int",    help="width of the image [format: int] [default: 800]")\n-\tparser.add_option("-Z", "--height",    dest="height",          action="store",      default=300,       type="int",    help="height of the image [format: int] [default: 300]")\n-\tparser.add_option("-A", "--arial",     dest="arial",           action="store_true", default=False,                    help="use Arial font [format: boolean] [default: false]")\n-\tparser.add_option("-v", "--verbosity", dest="verbosity",       action="store",      default=1,         type="int",    help="trace level [format: int]")\n-\toptions = parser.parse_args()[0]\n-\tiGetReadDistribution = GetReadDistribution(options.verbosity)\n-\tiGetReadDistribution.setNames(options.names.split(","))\n-\tiGetReadDistribution.setInputFiles(options.inputFileNames.split(","), options.format)\n-\tiGetReadDistribution.setOutputFileName(options.outputFileName)\n-\tiGetReadDistribution.setLabs(options.xLab, options.yLab)\n-\tiGetReadDistribution.setBinSize(options.binSize)\n-\tiGetReadDistribution.setColors(None if options.colors == None else options.colors.split(","))\n-\tiGetReadDistribution.setFactors(None if options.factors == None else map(float, options.factors.split(",")))\n-\tiGetReadDistribution.setRegionsFile(options.regionsFileName)\n-\tiGetReadDistribution.setMultiple(options.multiple)\n-\tiGetReadDistribution.setQuorum(options.quorum)\n-\tiGetReadDistribution.setImageSize(options.width, options.height)\n-\tiGetReadDistribution.setBothStrands(options.strands)\n-\tiGetReadDistribution.setArial(options.arial)\n-\tiGetReadDistribution.run()\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/GetReadSizes.py
--- a/SMART/Java/Python/GetReadSizes.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,262 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import random, os, glob, subprocess\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.parsing.GffParser import GffParser\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc import Utils\n-from commons.core.LoggerFactory import LoggerFactory\n-from commons.core.utils.RepetOptionParser import RepetOptionParser\n-\n-LOG_DEPTH      = "smart"\n-DEFAULT_REGION = "_all_"\n-\n-class GetReadSizes(object):\n-\n-\tdef __init__(self, verbosity = 0):\n-\t\tself.xLab       = "Size"\n-\t\tself.yLab       = "# reads"\n-\t\tself.verbosity  = verbosity\n-\t\tself.number     = random.randint(0, 100000)\n-\t\tself.log        = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)\n-\t\tself.parsers    = {}\n-\t\tself.sizes      = {}\n-\t\tself.factors    = {}\n-\t\tself.regions    = None\n-\t\tself.tmpDatName = None\n-\t\tself.tmpRName   = None\n-\t\tself.width      = 800\n-\t\tself.height     = 300\n-\t\tself.arial      = False\n-\n-\tdef setNames(self, names):\n-\t\tself.names = names\n-\n-\tdef setInputFiles(self, fileNames, format):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tfor cpt, fileName in enumerate(fileNames):\n-\t\t\tself.parsers[self.names[cpt]] = chooser.getParser(fileName)\n-\n-\tdef setOutputFileName(self, fileName):\n-\t\tself.outputFileName = fileName\n-\n-\tdef setLabs(self, xLab, yLab):\n-\t\tself.xLab = xLab\n-\t\tself.yLab = yLab\n-\n-\tdef setSizes(self, minSize, maxSize):\n-\t\tself.minSize = minSize\n-\t\tself.maxSize = maxSize\n-\n-\tdef setColors(self, colors):\n-\t\tself.colors = colors\n-\n-\tdef setFactors(self, factors):\n-\t\tself.factors = dict(zip(self.names, factors))\n-\n-\tdef setRegionsFile(self, fileName):\n-\t\tif fileName != None:\n-\t\t\tself._loadRegions(fileName)\n-\n-\tdef setImageSize(self, width, height):\n-\t\tif width != None:\n-\t\t\tself.width = width\n-\t\tif height != None:\n-\t\t\tself.height = height\n-\n-\tdef setArial(self, arial):\n-\t\tself.arial = arial\n-\n-\tdef _checkOptions(self):\n-\t\tif not self.parsers:\n-\t\t\tself.logAndRaise("ERROR: Missing input file names")\n-\n-\tdef _logAndRaise(self, errorMsg):\n-\t\tself.log.error(errorMsg)\n-\t\traise Exception(errorMsg)\n-\n-\tdef _loadRegions(self, fileName):\n-\t\tself.regions = {}\n-\t\tparser       = GffParser(fileName, self.verbosity)\n-\t\tfor transcript in parser.getIterator():\n-\t\t\tchromosome = transcript.getChromosome()\n-\t\t\tstart      = transcript.getStart()\n-\t\t\tend       '..b'fileName)):\n-\t\t\t\t\tos.remove(otherFileName)\n-\n-\tdef run(self):\n-\t\tLoggerFactory.setLevel(self.log, self.verbosity)\n-\t\tself._checkOptions()\n-\t\tself.log.info("START Get Read Sizes")\n-\t\tfor name in self.names:\n-\t\t\tself._parse(name)\n-\t\tself._plot()\n-\t\tself._cleanFiles()\n-\t\tself.log.info("END Get Read Sizes")\n-\n-\n-if __name__ == "__main__":\n-\tdescription = "Usage: GetReadSizes.py [options]\\n\\nGet Read Sizes v1.0.1: Get the sizes of a set of reads. [Category: Personal]\\n"\n-\tepilog = ""\n-\tparser = RepetOptionParser(description = description, epilog = epilog)\n-\tparser.add_option("-i", "--input",     dest="inputFileNames",  action="store",      default=None,     type="string", help="input files, separated by commas [compulsory] [format: string]")\n-\tparser.add_option("-f", "--format",    dest="format",          action="store",      default=None,     type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")\n-\tparser.add_option("-n", "--names",     dest="names",           action="store",      default=None,     type="string", help="name of the input data, separated by commas [compulsory] [format: string]")\n-\tparser.add_option("-o", "--output",    dest="outputFileName",  action="store",      default=None,      type="string", help="output file [format: output file in PNG format]")\n-\tparser.add_option("-s", "--minSize",   dest="minSize",         action="store",      default=None,      type="int",    help="minimum size [format: int]")\n-\tparser.add_option("-S", "--maxSize",   dest="maxSize",         action="store",      default=None,      type="int",    help="maximum size [format: int]")\n-\tparser.add_option("-l", "--xLabel",    dest="xLab",            action="store",      default="Size",    type="string", help="x-axis label name [format: string] [default: Size]")\n-\tparser.add_option("-L", "--yLabel",    dest="yLab",            action="store",      default="# reads", type="string", help="y-axis label name [format: string] [default: Reads]")\n-\tparser.add_option("-c", "--colors",    dest="colors",          action="store",      default=None,      type="string", help="colors of the bars, separated by commas  [format: string]")\n-\tparser.add_option("-a", "--factors",   dest="factors",         action="store",      default=None,      type="string", help="normalization factors, separated by commas  [format: string]")\n-\tparser.add_option("-r", "--regions",   dest="regionsFileName", action="store",      default=None,      type="string", help="regions to plot [format: transcript file in GFF format]")\n-\tparser.add_option("-z", "--width",     dest="width",           action="store",      default=800,       type="int",    help="width of the image [format: int] [default: 800]")\n-\tparser.add_option("-Z", "--height",    dest="height",          action="store",      default=300,       type="int",    help="height of the image [format: int] [default: 300]")\n-\tparser.add_option("-A", "--arial",     dest="arial",           action="store_true", default=False,                    help="use Arial font [format: boolean] [default: false]")\n-\tparser.add_option("-v", "--verbosity", dest="verbosity",       action="store",      default=1,         type="int",    help="trace level [format: int]")\n-\toptions = parser.parse_args()[0]\n-\tiGetReadSizes = GetReadSizes(options.verbosity)\n-\tiGetReadSizes.setNames(options.names.split(","))\n-\tiGetReadSizes.setInputFiles(options.inputFileNames.split(","), options.format)\n-\tiGetReadSizes.setOutputFileName(options.outputFileName)\n-\tiGetReadSizes.setLabs(options.xLab, options.yLab)\n-\tiGetReadSizes.setSizes(options.minSize, options.maxSize)\n-\tiGetReadSizes.setColors(None if options.colors == None else options.colors.split(","))\n-\tiGetReadSizes.setFactors(None if options.factors == None else map(float, options.factors.split(",")))\n-\tiGetReadSizes.setRegionsFile(options.regionsFileName)\n-\tiGetReadSizes.setImageSize(options.width, options.height)\n-\tiGetReadSizes.setArial(options.arial)\n-\tiGetReadSizes.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/GetUpDownStream.py
--- a/SMART/Java/Python/GetUpDownStream.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,152 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2012
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import os
-from optparse import OptionParser, OptionGroup
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.writer.Gff3Writer import Gff3Writer
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
-from SMART.Java.Python.ncList.FileSorter import FileSorter
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc import Utils
-
-
-class GetUpDownStream(object):
-
-    def __init__(self, verbosity = 0):
-        self.verbosity         = verbosity
-        self.inputReader       = None
-        self.outputWriter      = None
-        self.nbRead            = 0
-        self.nbWritten         = 0
-        self.nbMerges          = 0
-        self.splittedFileNames = {}
-
-    def __del__(self):
-        for fileName in self.splittedFileNames.values():
-            os.remove(fileName)
-            
-    def setInputFile(self, fileName, format):
-        parserChooser = ParserChooser(self.verbosity)
-        parserChooser.findFormat(format, "transcript")
-        self.parser = parserChooser.getParser(fileName)
-        self.sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])
-
-    def setOutputFile(self, fileName):
-        self.outputWriter = Gff3Writer(fileName, self.verbosity)
-
-    def setDistances(self, up, down):
-        self.upDistance   = up
-        self.downDistance = down
-
-    def _sortFile(self):
-        fs = FileSorter(self.parser, self.verbosity-4)
-        fs.perChromosome(True)
-        fs.setOutputFileName(self.sortedFileName)
-        fs.sort()
-        self.splittedFileNames       = fs.getOutputFileNames()
-        self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()
-        self.nbRead                  = fs.getNbElements()
-
-    def _write(self, start, end, reference, after):
-        if start > end:
-            return
-        transcript = Transcript()
-        transcript.setChromosome(reference.getChromosome())
-        transcript.setStart(start)
-        transcript.setEnd(end)
-        transcript.setDirection("+")
-        transcript.setName("%s_%s" % ("up" if Utils.xor(reference.getDirection() == 1, after) else "down", reference.getName()))
-        self.outputWriter.addTranscript(transcript)
-        
-    def _getFlanking(self, chromosome):
-        progress    = Progress(self.nbElementsPerChromosome[chromosome], "Analyzing chromosome %s" % (chromosome), self.verbosity)
-        parser      = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)
-        previous    = None
-        for transcript in parser.getIterator():
-            progress.inc()
-            transcript.removeExons()
-            if previous == None:
-                distance = self.upDistance if transcript.getDirection() == 1 else self.downDistance
-                start    = max(1, transcript.getStart() - distance)
-                self._write(start, transcript.getStart()-1, transcript, False)
-                previous = transcript
-                continue
-            if previous.include(transcript):
-                continue
-            if transcript.overlapWith(previous):
-                previous = transcript
-                continue
-            distancePrevious = self.downDistance if previous.getDirection()   == 1 else self.upDistance
-            distanceCurrent  = self.upDistance   if transcript.getDirection() == 1 else self.downDistance
-            distance = transcript.getDistance(previous)
-            if distancePrevious + distanceCurrent == 0:
-                previous = transcript
-                continue
-            if distance >= distancePrevious + distanceCurrent:
-                endPrevious  = previous.getEnd() + distancePrevious
-                startCurrent = transcript.getStart() - distanceCurrent
-            else:
-                middle       = previous.getEnd() + int((distance-1) * float(distancePrevious) / (distancePrevious + distanceCurrent))
-                endPrevious  = middle
-                startCurrent = middle+1
-            self._write(previous.getEnd() + 1, endPrevious, previous, True)
-            self._write(startCurrent, transcript.getStart() - 1, transcript, False)
-            previous = transcript
-        distance = self.downDistance if previous.getDirection() == 1 else self.upDistance
-        self._write(previous.getEnd() + 1, previous.getEnd() + distance, previous, True)
-        progress.done()
-
-    def run(self):
-        self._sortFile()
-        for chromosome in sorted(self.nbElementsPerChromosome.keys()):
-            self._getFlanking(chromosome)
-        self.outputWriter.close()
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Get Up and Down Stream v1.0.0: Get the flanking regions of an annotation. [Category: Data Modification]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in mapping format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the file [compulsory] [format: mapping file format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-u", "--up",        dest="up",             action="store",      default=0,     type="int",    help="the upstream distance  [format: int]")
-    parser.add_option("-d", "--down",      dest="down",           action="store",      default=0,     type="int",    help="the downstream distance  [format: int]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
-    (options, args) = parser.parse_args()
-
-    guds = GetUpDownStream(options.verbosity)
-    guds.setInputFile(options.inputFileName, options.format)
-    guds.setOutputFile(options.outputFileName)
-    guds.setDistances(options.up, options.down)
-    guds.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/GetUpDownStream.pyc
b
Binary file SMART/Java/Python/GetUpDownStream.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/Helitrons.fasta
--- a/SMART/Java/Python/Helitrons.fasta Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2378 +0,0 @@\n->HELITRON1\n-TCTACATATACATTTTTGGGAACGATTTTGAGTTGAAAAATCATTTATCAATTTCTGATA\n-TTGTATGTTTAGTCCCTACAAAATCAATCAGACAAGATAGTCTTCTGCGTTAGGACAATG\n-ACCAAATTTTTGATGAACTATTTACGTTTTGCACTAATTACTTACCTAAACATATAAATT\n-GTTTTGCACTATCCTCATAAATTGCACACATTGACTCCCTTAAAACTCTACGAATTCAAT\n-ATTACACGAAATAATATTTCCTAATTAGCAAAAATCCGATACAACCAATCACTTAACTAA\n-CCATATCTATAAAATTACTTGATTCGAAAATCTCCGATATTTTTGCTACAATCTTACCAA\n-ATATTCATATTCTCTATATCACATTTCTATAAGTTAAATCAGTTATTAAAGTTGCTTTAA\n-TTCCGTTTACCTCAAACAAACACTTAATATATCATATGATCTTACATTTGGTATAGTAAA\n-TATATTTTGAAATATACTACAAATCATCATCATACTCAAAATTCTTCCTATATATCGGTA\n-TTGTGTTATCAATCTTTACCAAGTACTATATTCTCCATCTAATTTAAAATGTAAAGTAAT\n-CTTGCATTCCAAAGATATTTTCCGATTTGAATCGCATCATCAATTCCCTCCATTCCTCAT\n-TTATAGCAATGCAGTTTCCTGTTTCCAAACATAGCACATCACATTAGCTAGAATATCTCC\n-AAAATTTTGCTTGAGAATCAAAGCAAAATCTAAATACTCATGCAATATTCCATCTTCTAA\n-ACCATCCAAATCATTCTGTTTTGAATGTACAAGCGTAAAAATAAGGGCAAATGCATCTAC\n-CATATCTCGACAGTTTGATTCAGAATCAAATCAAAATCTAAATACGGCATTTCGATAATT\n-ATTTTGCACTTATGATACTCAAGCATATTCCTATTTTAAAACCATTCCAAATTGTCTGTT\n-TCTATTGAGCTGCGCTTAGAATCTTTTATACGGTGTGAAAATTTAGGTTTACTAACTTAG\n-TGCACCCACTATAAATAAGGTTGCTCATATCATTCCATTTCATCATCACCTACACAGCAA\n-ACTATCTACACATTTCACTTCTACTTCTACAACTTTGTTATTTGAGAAATGGCTGCTACA\n-TTCGCATACCTTAAAGACGTGAGGCCTTACAAAAACGCATGGAGAGTCCAGGTCAAGATT\n-CTTCACTCTTGGAAGCAGTACACAAGCAACACCCGTGAAACCATTGAGTTGGTCATCTCT\n-GATGAATATGAAAAAAGTAAACTTACCATTTTAATTCTTTGCAATCGTAAATTATTATGT\n-CTTGCTAATGCCCAAATGAAAACGTCTATTCTATACTAATATGCTATGACTTCTCTTGGT\n-GGTAAGGGGAAAAAAATGCATGCTACCGTGAAGAAGGAGTTGGTTTCTAAGTTTGTTCAC\n-AAGCTTATTGTTGGCGAATGGGTCTTCATTGAGATTTTTGGTCTTACCTATGCGTCTGGC\n-CAGTTTCGCCCAACCAACCATCTATACAAGATGGCGTTCCAGGTTAGAACTGAAGTCATG\n-GGTTGTGCTTCTGTCTCTGACTCCAACTTCTTGACTTTGGCGCCATTTTCAAAAATCCAG\n-AGTGGTGAGCTTAACCCTCACATGTTGGTTGGTGAGGGCTATATCTCTCAAAGATCCGTG\n-TATTTTAATTGTCATGTTATTCAAGAAACTTTATAGCCATTACGTTTTTTTGAATCTGTG\n-ATTCGTTTTTGTATTAGATGCTATTGGCCAAATCATTACTGTTGGTGAGTTGGAGGAACT\n-CGAGGCTAACAATAAGCCTACAACAAAGATTGACTTTGAGATCAGAGATCAAATGTAAGT\n-TGCTTATTCTCTGATTTTTGGTCCTCAAGAATGCAAACTATATCCAACAATTTATCTTAT\n-AGTAATTGTTTTAGTAAATTTTAAAACTAACATTAAAGTTAACTATTATAAGGATGAGAG\n-AATGCAAGTTACTTTGTGGGGAACATACGCTCAGCAGGTTTACAGAGCATGTCAGGAATC\n-TGAGGGAAAGAATGTGATTTTTCTCATTCGTTTTGCCAAAATTAAGAGTTACAAGGGTAT\n-ATTCAGTTTCTTTGTACCTATTCCTTTTCTTTCCATACTGCATGATCTATGGTCACTTAA\n-ATGTTATGAGACTGCAGGTGTGAAGAGTTTATCCAACTCATTTGATGCATCGCAAGTACA\n-TGTCAATCCGGACTTCCCTGAAGCTCACCATTTCAGTCAAACGTACGAAGATGTCATATT\n-TTCTATAGATTGTAAAGTGCTTACTTTTATAATATGTGATCTATGGTAACTTAACCGTTT\n-ATAAAATTGCTGGTGTGAAAAGTTTATCCAACTCTTTTGATGCATCTCAAGTACATGTCA\n-ATCCTAACTTCCTCGAAGTTGTGGCTTTCAGTCAATCGTAAATTTAGTAATATGATTCTC\n-AAAAAGGATATGCTCATATTGAATTTTGCTTTTTTTGTAATGTATATGAATACCGGTTTA\n-ACTCACTTTCCGTTTATATGTTATTTAACAGACTTCCAAATGATGGTGCTATTTGTGTGT\n-TCCGTGCAAGAGTCCCACGTTTTGAGATGGTTGCAGTTAAAAGGATTGACTACAGTGAGT\n-ACACAAGGAATACCATTGAAGATCTGCTTAGCTCGACTGAGGATTGTTTTTAATCAGATA\n-CTTTGTAATATGCACTTAAAGACATTAAGACTATATACTCATTTATGCTAATACATTGTA\n-TTTACTATATGTTTGTATAATTTCATTTAGGTTGGTAAAGTCAGAGTTTTGTGCACAATC\n-TATGCAATTGATACGGATTGGGCTTGGTATTACATCAGCTGCAAGACATGTAATAAGAAA\n-GTGAATCATATTCATGCTGGTGTTAATGGAGTAAACAACAAGGGTAAGAAGCCTAGATTC\n-TGGTGTGATACATGCAAGTCTGTTGTAACCAATGTGGTCTCTAGGTGCATATGCACTCCC\n-TATGCGTCATTAGTAGTTGCAGAGTATTTAATACATTCAAAAATGTTTATGGATTTTCTC\n-AAACGGTCTTACTTATAATTTATAATCTAAGTGGTTTTGCAAAAAAATGTGACCTATACA\n-ACTCAGTACATGATCTATGCAAAGGTTATGGATAGCACTGGTGAAGCCAAATTGCTTCTG\n-TTTGATTCAATTTGCTCTGAGATCATTGGCGAGTCTGCAACCTCTGTTCTTAATGGATCT\n-GTTGATGAGGTTTGTTTCTTAAAGTTTTCCCGTGTCTACTTTATGTCTTATTCTGATATA\n-TATTAATCTAGATTTTAAATACTATATTATATTACCCTGTTGCAGATTGAGGATCCAGAA\n-GATCTTCCTGATTCTGTAAAGAATCTGATTGGTAAGACATTTCTGTTTCTGGTGTGGGTT\n-GAGAAAGACAACATCTCGGATGGAAAAGAAATCTATAAGGTTTCAAAGGTGCTTCTGAAG\n-GATGGACTACTAGAGGAACAATTACTAGAGGATTCTGCTGAACATGTGAACCCTGCATCC\n-ATTGTGTCTGGTGATCAGGTAATATATACTATAAACATATATCATTAATCATAACACTTA\n-TAATAATTTGTATTTATTGTGCTTCCATTCATATGTCTAATCTTTAATCGTGTATTATAA\n-TACAATAGGTTCTACTTATGCTGGAGAATGGTAATGGATCGCCAGACTCTACGACTCCAT\n-CTTCAAAGCGTGTTTACGCTAGAGAAACGAGTGGCTCTGAAGGTTCTTCAAGTTCAAAGA\n-AGGTGTGTGTTGTACCATTAGACTTGGAAAAGTCTTTATCTGAGAATGCTGAACATGGAG\n'..b'\n-TATAAGATTTTATATGACTTATTTTTTTTTTTGAACCGACATATAAGATCTTTAGTCAAG\n-TTAGAGGGTTTTTCATTGAAAGTTATCTTAAATCGTAGGCTTTAAATTTTTAAATGTGAA\n-GAAGGTTCATCTATTGTTTTGACTTTCAAGAAAATTGGTAATACATGCATTTGCATTTTT\n-ACAAAAAAAAAAACTGAATAATTTATGTTTATTTTTTAATATATAGTAAAACATACTGTT\n-TTTACAAAAAAAAATAAAAAAAAATTGAATGCAAACTTTTAATCAACTAAAATGTCTTTG\n-TAAAAAAATCATAATTTTTAAACATCAAAATTACTATTATTAATTATTTTGATCGATATA\n-TTAACAATTATAAATAAGTTTTTTAAATAATTTAATTTAAATATTATTTTATATTCAAAA\n-CTAAAACCGAATATAAAATCCACGCATCGCGTGGACAACTTCTAG\n->HELITRONY3A\n-TCTACTTAACAATTTTTAAGTACATTTTAAGGAATTTAATCGGATTGGTTTTTTTTTATG\n-GGTTTAACCCTCTTTTTTTTCTGTTTTTTTCGGCCCATTTGTTGAATCTCTTTACTATTT\n-GGTCCACATCCTATAATTTTAAATGTTAATCAAATCTTACCAAAATTTACAAATCGCTTT\n-AAATACTAATTAGTTACAATTAATTATTTCCGTTCAGCAAAGTAAATCCGACTTATATGG\n-AATCGAATATGTAACATTGCAAAACAAATCCGACTCATATGGTAATTTAGTATTTATCCG\n-TTCAGCAAAGTAATTACTTGAGTAAATATTCTATTAACTACAAAATCTTCAAGTTAAACG\n-AAATCAAAATCTGCATTCCACTACATAATTTTCGGAGATCTTAAGAACTAAATTAAAGCA\n-TATTCCTTAAACATCCTCTATTCAATTCCTATAACTTTTGAAAAATATACTTTTCAAAGA\n-ATTCTAACAATCATTATGAAACAACCTAACGAGATACTTTTTCCATAACAACTAATCATT\n-CAATTCATTTTCTAATTCTAGCCAATAAGAAAATAGAAAAAAATATTGCTTGCACACAAA\n-GATATTTTTTTATTCGAAATCAAAACTAACCCTAATTGTGTATGAGCTACTATATATACT\n-CTCTCATTAGCAAGCCAAATATCACATCTTCATTGTTTTTTTCATTACCTGCACCTATAT\n-ATAACATCTCTCATCTTCATCTTCATCGTTTTGTTTTTGTTTTCTGTTATAAGAATTTTC\n-ATATTTTATGTTTTTAGTTAGAAAATATACTAAACATTTAATCTGTTTTGCAGCTAATCT\n-TACAAACATATCAACAAATACACTCTCAAATCTGGTGAGTTATTGAAATGGTCCATAGTC\n-TTTTATTATCTTCATAAATATGATTCTAAATTCTTTTATTATTTACTAACTAAATATGTT\n-CTATATCTTTATAGTTAAAGATTACTTTTTCAATTTCAATCTTCATCTCTATGGTATGGA\n-TGTATCTCTTCAAACTTTGATAGCTCAATCTGCATCAAAACTCCTAGATAATTCATTTGA\n-GCAGTGTTCAAATGTACAAAATTTTATTTTTTATGATGATATAAGAGTTGTATTTCTTAT\n-ACAATTTCTGCTCATCTTTTCTTTGTTAATAACTACAAAATATTAAATATATAGGAATAA\n-ATTTTGCAATATATTGAAAAATATAGCCTTGAATATCAAATCCTACTAATAAGGAAAGTT\n-AAAATTTATTCCTTTAACAACTTATATACTCGCTTACATCTTCTCTAATAAGGCAAATTA\n-CGAAATATTCTCTAAACATTTAATACACGATAGCTAAACGTTATCTATTCCTATTAAAAA\n-AAAAAAAATCTAACAAGAGAAAGTTGCAATTTCGGTAAGATTGAGTACTATATTATGTAT\n-AACTAATATTGTAAACAATCTAACCTTATTTGTATTTTTAAATCTCATAGAATATGCTTC\n-ATATAAACCCTAACATAAAGACCTAACCATAAACACTATAAATATAACCTTCACTATTTT\n-ACCGTAAGTCATAACCAAAAACTAACTATATTTATTTGCCTACACAATTACATTAGATTT\n-TATGTTTAATGACTAGGAAAACTTTGCTTGCTAACCGGATATGGTTGCTCTAAAATATTC\n-ATTGATCCTAATTTCAGATGTTTGGATAAAAAGAACTACATGTAAGTCACTCTCTTTACT\n-TAATTTTCCTAAATGTATAAAAAAAATTAAGGTTTACTTAATGATTTTTTTTGTTGTTTT\n-ATAGGAATGCATTTAAAGGTGATGATGATGACGACTATGAGCTTTATGAGGAGATTGTTG\n-AAACAAACTTCCAAATGTTTTTTAGTATCAAAAAATAGCAAACATTCATGTTTGATTGTC\n-CTTTCGTTTTTTTTTTTGTTTTGTACTTATGACTTATCAACATCGTACATTTTGTTATAT\n-TTCTACTAATCAAACTATCAAGTTATTAAAACAAGTTATAAGATTTTATGTTGGTGGTTT\n-ATTAATTTATTATTACTCAATTACGAGACATACATTCTAACAATATATTTTACAGTTCTA\n-ATACTTACTACTATAATTATATTTGTACTTTATGACAATTAAGTATAAGTATTGATCCCA\n-ACTAACCTTATAAATAGCGATTTATCCTTCACGTGCTCTTCAATCATAGTAATCAACTTC\n-CATCTACCAAATAATTATTTTGGAAATAAATTATGAAGGATGACATTATAACAATGATAT\n-AGTAAAAGGAATAAAATATTACAAAACTCTAATTTAGGTAAATATAATCATTAAGATCTT\n-TTCAAATAAACATAAATCAAAACAGAAGATTTTGGTAATATTAAATATAGGCCTCGATTA\n-CATTAGAAAACTTAACTAAGCAGTGATTATAGGGATTCTGTAAAGAAAATAACACGCATG\n-TTACTTTTTCTTTTTGGTTTTGTCATTACTCTCAACCATTTATCTTTCGACACATATTCA\n-CCTCTTAACTCTAAAATAACATCTTAGCTAAGATACATTTCTTAGATTAAGATTGTTAGT\n-TTGATTTAATTTAATCATTTTTAATCCTAAAATTTTGTTAAGATACTAAGCTAAGATATG\n-GGCTAAGATGCACCAATGGAGATGCTCTTAGAAACTAGAGGTAAATGTTTTGTTTCAAAA\n-GTATATATGAATAAATCATATAAAAACTTATAAAACTGAATCGTATATACTCCCGCGGGA\n-CGAATCTAGCATGGCTAGGCGGATTTATTAAAACACTATAACTATTAACTTATTTCAAAT\n-ATTATAGGGTGATCATATTTTAGAACTAATTAACTTACAAATTATATCCATATTTATTAT\n-ATGAACTACAAAATTTAAACCTATTAACACCTTCTTACTTTCAAAACAATTAATTTAAAT\n-TGGTACATTTTCAAATAAATATTTAATTTGTATTTACATCAAACAAACAACTGAGTGTAC\n-TAATTTATAGTATCGTTACATTGCATAATTAAAATAAATGAGTGTACTAATTTATAAACT\n-CGATCGCTTAATAAAATGTCATATATAATATACACACAACAAAAGATATAGTTTTCATAT\n-AAGAAAATGAAATATCAACAATAATTTGAAATCATATGCTTACAGAGATAGACACATTGT\n-ATAGAATAATTTTTATAAATCCGTAGAATAACCAATATTATCGTTACCTTAAATACGTGT\n-CATTTAATTCTACATACAACGGAACATATAATTTGCCAGTAATAAAAAATACAACAATCA\n-TACTATACATATACTACATTGTCAAAACCCAAAAAACCAAAACTATAAACAAACAAAAAT\n-CCTGCGGTGTACCGCGGGTCATATCCTAG\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/RestrictFromCoverage.py
--- a/SMART/Java/Python/RestrictFromCoverage.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,224 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2012\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os, struct, time, random\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.ncList.NCList import NCList\n-from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n-from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n-from SMART.Java.Python.ncList.FileSorter import FileSorter\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-from SMART.Java.Python.misc import Utils\n-try:\n-    import cPickle as pickle\n-except:\n-    import pickle\n-\n-REFERENCE = 0\n-QUERY = 1\n-TYPES = (REFERENCE, QUERY)\n-TYPETOSTRING = {0: "reference", 1: "query"}\n-\n-class RestrictFromCoverage(object):\n-\n-    def __init__(self, verbosity = 1):\n-        self._verbosity               = verbosity\n-        self._randomNumber            = random.randint(0, 100000)\n-        self._nbWritten               = 0\n-        self._nbLines                 = dict([type, 0]  for type in TYPES)\n-        self._splittedFileNames       = dict([type, {}] for type in TYPES)\n-        self._nbElementsPerChromosome = dict([type, {}] for type in TYPES)\n-        self._nbElements              = dict([type, 0]  for type in TYPES)\n-        \n-    def __del__(self):\n-        pass\n-\n-    def _close(self):\n-        self._writer.close()\n-        \n-    def setInputFileName(self, fileName, format, type):\n-        chooser = ParserChooser(self._verbosity)\n-        chooser.findFormat(format)\n-        parser = chooser.getParser(fileName)\n-        sortedFileName = "%s_%d_%d_sorted.pkl" % (os.path.splitext(fileName)[0], self._randomNumber, type)\n-        if self._verbosity > 2:\n-            print "Preparing %s file..." % (TYPETOSTRING[type])\n-        startTime = time.time()\n-        fs = FileSorter(parser, self._verbosity-1)\n-        fs.perChromosome(True)\n-        fs.setOutputFileName(sortedFileName)\n-        fs.sort()\n-        self._nbLines[type]                 = fs.getNbElements()\n-        self._splittedFileNames[type]       = fs.getOutputFileNames()\n-        self._nbElementsPerChromosome[type] = fs.getNbElementsPerChromosome()\n-        self._nbElements[type]              = fs.getNbElements()\n-        endTime = time.time'..b'ranscript):\n-        self._writer.addTranscript(transcript)\n-        self._nbWritten += 1\n-\n-    def run(self):\n-        for chromosome in sorted(self._splittedFileNames[QUERY].keys()):\n-            self._compareChromosome(chromosome)\n-        self._close()\n-        if self._verbosity > 0:\n-            print "# queries: %d" % (self._nbElements[QUERY])\n-            print "# refs:    %d" % (self._nbElements[REFERENCE])\n-            print "# written: %d (%d%%)" % (self._nbWritten, 0 if self._nbElements[QUERY] == 0 else round(float(self._nbWritten) / self._nbElements[QUERY] * 100))\n-        \n-\n-if __name__ == "__main__":\n-    description = "Restrict From Coverage v1.0.0: Select the elements from the first set which have a given coverage. [Category: Data Comparison]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input1",           dest="inputFileName1", action="store",                     type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format1",          dest="format1",        action="store",                     type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n-    parser.add_option("-j", "--input2",           dest="inputFileName2", action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n-    parser.add_option("-g", "--format2",          dest="format2",        action="store",                     type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n-    parser.add_option("-o", "--output",           dest="output",         action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-    parser.add_option("-n", "--minNucleotides",   dest="minNucleotides", action="store",      default=None,  type="int",    help="minimum number of nucleotides overlapping to declare an overlap [format: int]")\n-    parser.add_option("-N", "--maxNucleotides",   dest="maxNucleotides", action="store",      default=None,  type="int",    help="maximum number of nucleotides overlapping to declare an overlap [format: int]")\n-    parser.add_option("-p", "--minPercent",       dest="minPercent",     action="store",      default=None,  type="int",    help="minimum percentage of nucleotides overlapping to declare an overlap [format: int]")\n-    parser.add_option("-P", "--maxPercent",       dest="maxPercent",     action="store",      default=None,  type="int",    help="maximum percentage of nucleotides overlapping to declare an overlap [format: int]")\n-    parser.add_option("-e", "--minOverlap",       dest="minOverlap",     action="store",      default=None,  type="int",    help="minimum number of elements from 2nd file to declare an overlap [format: int]")\n-    parser.add_option("-E", "--maxOverlap",       dest="maxOverlap",     action="store",      default=None,  type="int",    help="maximum number of elements from 2nd file to declare an overlap [format: int]")\n-    parser.add_option("-s", "--strands",          dest="strands",        action="store_true", default=False,                help="consider the two strands separately [format: bool] [default: false]")\n-    parser.add_option("-v", "--verbosity",        dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-    rfc = RestrictFromCoverage(options.verbosity)\n-    rfc.setInputFileName(options.inputFileName1, options.format1, QUERY)\n-    rfc.setInputFileName(options.inputFileName2, options.format2, REFERENCE)\n-    rfc.setOutputFileName(options.output)\n-    rfc.setNbNucleotides(options.minNucleotides, options.maxNucleotides)\n-    rfc.setPercent(options.minPercent, options.maxPercent)\n-    rfc.setOverlap(options.minOverlap, options.maxOverlap)\n-    rfc.setStrands(options.strands)\n-    rfc.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/Rplots.pdf
b
Binary file SMART/Java/Python/Rplots.pdf has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/S1_S3_blast.blast
--- a/SMART/Java/Python/S1_S3_blast.blast Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,5405 +0,0 @@\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t83.33\t72\t12\t0\t20\t91\t1\t72\t3e-07\t48.1\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t96.15\t26\t1\t0\t256\t281\t356\t381\t5e-06\t44.1\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t96.00\t25\t1\t0\t251\t275\t431\t455\t2e-05\t42.1\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t91.67\t24\t2\t0\t588\t611\t13064\t13087\t0.019\t32.2\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t13\t0\t0\t529\t541\t7340\t7328\t1.2\t26.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t81\t92\t785\t796\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t222\t233\t3878\t3889\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t31\t42\t4243\t4232\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t535\t546\t6279\t6290\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t135\t146\t9265\t9254\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t164\t175\t12561\t12550\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t93.75\t16\t1\t0\t225\t240\t12716\t12701\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t376\t387\t12884\t12873\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRON5\t100.00\t12\t0\t0\t217\t228\t13045\t13056\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRONY1E\t95.83\t24\t1\t0\t588\t611\t1649\t1672\t8e-05\t40.1\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRONY1E\t90.91\t22\t2\t0\t550\t571\t1170\t1149\t0.30\t28.2\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRONY1E\t100.00\t14\t0\t0\t430\t443\t1650\t1663\t0.30\t28.2\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_Ath_Chr3_DHX-incomp_At_Col-B-G1463-Map16\tHELITRONY1E\t100.00\t12\t0\t0\t130\t141\t199\t188\t4.6\t24.3\n-chr1_5531278_5531960_ms1762_Ath_TAIR10_Ath_Chr1_ms1606_Ath_TAIR10_Ath_Chr1_ms1401_Ath_TAIR10_Ath_Chr1_ms20168_Ath_TAIR10_A'..b'882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1E\t100.00\t12\t0\t0\t10\t21\t58\t47\t1.2\t24.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1E\t100.00\t12\t0\t0\t10\t21\t291\t280\t1.2\t24.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t12\t0\t0\t66\t77\t4914\t4903\t1.2\t24.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t11\t0\t0\t5\t15\t7896\t7886\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t11\t0\t0\t121\t131\t9920\t9910\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t11\t0\t0\t123\t133\t9977\t9967\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON4\t100.00\t11\t0\t0\t20\t30\t20090\t20100\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON3\t100.00\t12\t0\t0\t5\t16\t7568\t7557\t1.2\t24.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON3\t100.00\t11\t0\t0\t65\t75\t1998\t2008\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY3\t100.00\t11\t0\t0\t141\t151\t3832\t3822\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1D\t100.00\t11\t0\t0\t16\t26\t217\t207\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1D\t100.00\t11\t0\t0\t127\t137\t1692\t1702\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1D\t100.00\t11\t0\t0\t124\t134\t2354\t2344\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRONY1D\t100.00\t11\t0\t0\t124\t134\t2502\t2492\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON5\t100.00\t11\t0\t0\t108\t118\t11817\t11807\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON1\t100.00\t11\t0\t0\t146\t156\t9667\t9677\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON1\t100.00\t11\t0\t0\t136\t146\t13674\t13664\t4.8\t22.3\n-chr5_9819093_9819279_ms68665_Ath_TAIR10_Ath_Chr5_ms58761_Ath_TAIR10_Ath_Chr5_ms52882_Ath_TAIR10_Ath_Chr5_ms43998_Ath_TAIR10_Ath_Chr5_DHX-incomp_At_Col-B-P65.35-Map20_reversed\tHELITRON1\t100.00\t11\t0\t0\t123\t133\t18664\t18654\t4.8\t22.3\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/SelectByTag.py
--- a/SMART/Java/Python/SelectByTag.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,148 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Select the transcript such that a tag value is not less than a given threshold"""
-import os
-import sys
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from commons.core.writer import MySqlTranscriptWriter
-from commons.core.writer.Gff3Writer import Gff3Writer
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.RPlotter import RPlotter
-
-class SelectByTag(object):
-    
-    def __init__(self, verbosity = 1):
-        self.input     = None
-        self.format    = None
-        self.tag       = None
-        self.value     = None
-        self.min       = None
-        self.max       = None
-        self.default   = None
-        self.output    = None
-        self.mysql     = None
-        self.verbosity = verbosity
-
-        self.parser      = None
-        self.writer      = None
-        self.mysqlWriter = None
-        self.nbElements  = None
-        self.nbWritten   = 0
-
-    
-    def setParser(self):
-        self.parser     = TranscriptContainer(self.input, self.format, self.verbosity)
-        self.nbElements = self.parser.getNbTranscripts()
-
-
-    def setWriter(self):
-        self.writer = Gff3Writer(self.output, self.verbosity)
-        if self.mysql:
-            self.mysqlWriter = MySqlTranscriptWriter(self.output, self.verbosity)
-
-
-    def isAccepted(self, transcript):
-        value = transcript.getTagValue(self.tag)
-        if value == None:
-            if self.default != None:
-                value = self.default
-            else:
-                raise Exception("Error! Transcript %s no tag called '%s'" % (transcript, self.tag))
-        if self.value != None:
-            if self.value == str(value):
-                return True
-            return self.value.isdigit() and value == float(self.value)
-        value = float(value)
-        return (self.min == None or self.min <= value) and (self.max == None or self.max >= value)
-
-
-    def readInputFile(self):
-        progress = Progress(self.parser.getNbTranscripts(), "Writing transcripts", self.verbosity)
-        for transcript in self.parser.getIterator():
-            if self.isAccepted(transcript):
-                self.writer.addTranscript(transcript)
-                if self.mysql:
-                    self.mysqlWriter.addTranscript(transcript)
-                self.nbWritten += 1
-            progress.inc()
-        progress.done()
-
-
-    def writeFile(self):
-        self.writer.write()
-        if self.mysql:
-            self.mysqlWriter.write()
-
-    
-    def run(self):
-        self.setParser()
-        self.setWriter()
-        self.readInputFile()
-        self.writeFile()
-        if self.verbosity > 0:
-            print "%d input" % (self.nbElements)
-            if self.nbElements != 0:
-                print "%d output (%.2f%%)" % (self.nbWritten, float(self.nbWritten) / self.nbElements * 100)
-
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Select by Tag v1.0.2: Keep the genomic coordinates such that a the value of a given tag is between two limits. [Category: Data Selection]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of the input [compulsory] [format: transcript file format]")
-    parser.add_option("-g", "--tag", dest="tag", action="store", default=None, type="string", help="the tag [compulsory] [format: string]")     
-    parser.add_option("-a", "--value", dest="value", action="store", default=None, type="string", help="the value to be found [format: string]")     
-    parser.add_option("-m", "--min", dest="min", action="store", default=None, type="float", help="the minimum threshold [format: float]")     
-    parser.add_option("-M", "--max", dest="max", action="store", default=None, type="float", help="the maximum threshold [format: float]")     
-    parser.add_option("-d", "--default", dest="default", action="store", default=None, type="float", help="value if tag is not present [format: float]")     
-    parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
-    parser.add_option("-y", "--mysql", dest="mysql", action="store_true", default=False, help="write output into MySQL tables [format: boolean] [default: False]")
-    parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    selectByTag         = SelectByTag(options.verbosity)
-    selectByTag.input   = options.inputFileName
-    selectByTag.format  = options.format
-    selectByTag.tag     = options.tag
-    selectByTag.value   = options.value
-    selectByTag.min     = options.min
-    selectByTag.max     = options.max
-    selectByTag.default = options.default
-    selectByTag.output  = options.outputFileName
-    selectByTag.mysql   = options.mysql
-    selectByTag.run()
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/SR1.fastq
--- a/SMART/Java/Python/TestFiles/SR1.fastq Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,5000 +0,0 @@\n-@HWI-EAS337_3:7:1:415:1217/1\n-GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n-+HWI-EAS337_3:7:1:415:1217/1\n-WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQ\n-@HWI-EAS337_3:7:1:208:1489/1\n-GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n-+HWI-EAS337_3:7:1:208:1489/1\n-WWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU\n-@HWI-EAS337_3:7:1:278:1153/1\n-GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n-+HWI-EAS337_3:7:1:278:1153/1\n-WWWWWWWWWWWWWWWWWWWWWWVWVVVWWVUUUUUR\n-@HWI-EAS337_3:7:1:1178:755/1\n-GGTGAGAGTGGTTGGTTGATGGTAAAACCATTGAAT\n-+HWI-EAS337_3:7:1:1178:755/1\n-WWWWWWWWWVWWWVVWWVVWVVVVWVWVVVUUUUUU\n-@HWI-EAS337_3:7:1:277:1259/1\n-GGGTGACAAAGAAAACAAAAGGGACATGGTACTTGG\n-+HWI-EAS337_3:7:1:277:1259/1\n-WWWWWWWWWWWWWWWWWWWWWWVWWWWWWVUUUUUU\n-@HWI-EAS337_3:7:1:447:1231/1\n-GACTTGTGGAAGAGTTGGAATGGAAAGCTGGAGCCT\n-+HWI-EAS337_3:7:1:447:1231/1\n-WWWWWWWWWWWVWVWWWVWWWVVVVVVVVVURUSUU\n-@HWI-EAS337_3:7:1:300:1199/1\n-GTTTTTGCATATAGATCTCTTTGTAAAGATATCCAT\n-+HWI-EAS337_3:7:1:300:1199/1\n-WVWWWWWWWWWWWVWWWWWWWWQWVVVTWWUUUURU\n-@HWI-EAS337_3:7:1:247:1210/1\n-GATAGCTTTGACTATAGGACTTTTATGTATGTGTTG\n-+HWI-EAS337_3:7:1:247:1210/1\n-WWWWWWWWWWWWVWWWVVWWWWWWWWTVVWRULUUR\n-@HWI-EAS337_3:7:1:1154:1517/1\n-GAATGTTGCAGACCTTACTCCTACCTATGAAGCACA\n-+HWI-EAS337_3:7:1:1154:1517/1\n-WWWWWWWVWWVWWWWWWWWWWWWWVWVWVWUSUUUU\n-@HWI-EAS337_3:7:1:164:1869/1\n-GTTTGATAGGAATTTATTTCTTCTTCGACATCCACC\n-+HWI-EAS337_3:7:1:164:1869/1\n-WWWWWWWWVVWWWWWWWWWWWWWWWWQWWWUUUUUU\n-@HWI-EAS337_3:7:1:415:1194/1\n-GATGGTTGACACATTAAGAACATTCTCACCGGTCTC\n-+HWI-EAS337_3:7:1:415:1194/1\n-WWWWWWWWWWWWWWWWWVWVWVWWWWWWWVSUUUUU\n-@HWI-EAS337_3:7:1:645:1892/1\n-GATAGTAAGCACCCCTCACTTCCAACCCAAAGATTG\n-+HWI-EAS337_3:7:1:645:1892/1\n-WWWWWWWWWWWWWWWWWVWWWWWVVWVWWVUUUUUU\n-@HWI-EAS337_3:7:1:33:1446/1\n-GTTATTCTTTCTTTCTCAAATGGATGCAGTAATGCA\n-+HWI-EAS337_3:7:1:33:1446/1\n-WWWWWWWWWWWWWWWWWWWWWUQWWVVWQWUUUSUU\n-@HWI-EAS337_3:7:1:1194:1427/1\n-GAAAAATCACATTTTTTTGTTTGATAAAAACCCAGA\n-+HWI-EAS337_3:7:1:1194:1427/1\n-WWWWWWWWVWVWWWWWWWVWWWUWWWWWWWUUUUSU\n-@HWI-EAS337_3:7:1:624:1913/1\n-GACATCTTCAACTCCGGAGTTTTGAGTAACATTATA\n-+HWI-EAS337_3:7:1:624:1913/1\n-WWWWWWWWWWVWWWWVVVVWWWWVVVWWVWUUUUUU\n-@HWI-EAS337_3:7:1:437:1202/1\n-GTACTTATGATGAAACTGAGATCAACTACCACCTCC\n-+HWI-EAS337_3:7:1:437:1202/1\n-WWWWWVWWWVWVWWWWWWWWVWWWWVWVVVUUUUUU\n-@HWI-EAS337_3:7:1:1386:1787/1\n-GTTTAGCTAGTATTAAGGCTAGAAATGGATATGATG\n-+HWI-EAS337_3:7:1:1386:1787/1\n-WWWWWWWWWWWWWWWWVVWWWVWVVWVVVWUUSUUO\n-@HWI-EAS337_3:7:1:227:1155/1\n-GATAGCAGCAAGGTTATTGGAATCTAAGCAATCTAC\n-+HWI-EAS337_3:7:1:227:1155/1\n-WWVWWVWWVVWVVIWVWVVUWVVVVWVTVVUUUUSU\n-@HWI-EAS337_3:7:1:472:1025/1\n-GAAGTGATACTCATAAAACTATTTAGAAAGTTAATT\n-+HWI-EAS337_3:7:1:472:1025/1\n-WWWWWWWWWWWWWWWVWVWWWWWWVVWWWVUUUUUU\n-@HWI-EAS337_3:7:1:220:1482/1\n-GCTATATGAGAATTCAGGCCACTTGTAGTTCGATAA\n-+HWI-EAS337_3:7:1:220:1482/1\n-WWWWWWWWWVWWWWWWWVWWWWWWWWVVWWURUUUU\n-@HWI-EAS337_3:7:1:1699:1966/1\n-GATGAAGGATACTACAAAAAAAAGGGTTATTTTGTG\n-+HWI-EAS337_3:7:1:1699:1966/1\n-WWWWWWWWWWWWWWWWVWVWWWWWWWVWWWUUUSUR\n-@HWI-EAS337_3:7:1:547:1084/1\n-GTGGTCAGGTCCTCTTCAAGTGATACAATGTTCCCC\n-+HWI-EAS337_3:7:1:547:1084/1\n-WWWWWWWWWWWWWWWWWWVVWVWWWWWWWVUUUUSU\n-@HWI-EAS337_3:7:1:464:1097/1\n-GAAATTGAAGCTAGTTATTGACAGTTTACCAAGTTA\n-+HWI-EAS337_3:7:1:464:1097/1\n-WWWWWWWWWWWVWWVWWWWWWWWVVWWWWVUUUUUR\n-@HWI-EAS337_3:7:1:171:1480/1\n-GATAATACTATTAGCACATCAGCCTCTAGATGAGAC\n-+HWI-EAS337_3:7:1:171:1480/1\n-WWWWWWWWWWWWWVWWWWWWWVWWWWWWTVUUUUUU\n-@HWI-EAS337_3:7:1:293:1251/1\n-GTGGTAGTGAGCTCCGTGGTGAACAAGATGACGGAA\n-+HWI-EAS337_3:7:1:293:1251/1\n-WWWWWWWVWVWWWWVVWWVVVVVVWVVVVVRPUURR\n-@HWI-EAS337_3:7:1:647:1863/1\n-GGGTTTCAGATTAGTAAGTTATAGTGAAAAAATATA\n-+HWI-EAS337_3:7:1:647:1863/1\n-WWVWWWWWVWWWWVWWVVWWWWWWWVWVVWUUUUUU\n-@HWI-EAS337_3:7:1:263:1275/1\n-GCTACGTCTGCTCTAACTCCTAATATGATCCTGTAT\n-+HWI-EAS337_3:7:1:263:1275/1\n-WWWWWWWWWWWWWWWWWWWWWVWWWWQVWWUUOUUU\n-@HWI-EAS337_3:7:1:1112:215/1\n-GGTGTTGATTTCACAAGGAGGAATACTCATCTAAAA\n-+HWI-EAS337_3:7:1:1112:215/1\n-WWVWWVVWVVWWWVWWVUWVVVWWWVWTVWUUUUUU\n-@HWI-EAS337_3:7:1:319:1275/1\n-GTTATAGTTCTTGACAACAAAGTACAGAGGTGGTCC\n-+HWI-EAS337_3:7:1:319:1275/1\n-WWWWWWWWWWWWVWWWWWWWWWWWWWVWVWUUSUUU\n-@HWI-EAS337_3:7:1:1310:1480/1'..b'A\n-+HWI-EAS337_3:7:1:986:591/1\n-WWWWWWWWWVWWWWWWWWWWWWWVWVVWVVUUUUUR\n-@HWI-EAS337_3:7:1:181:1099/1\n-GGTCGACGTAAGAGATCTGCAGGGCTATTACTCATT\n-+HWI-EAS337_3:7:1:181:1099/1\n-WWWWWWWWWWWWWVWWWWWWWVVWWWVWWVUUUUUU\n-@HWI-EAS337_3:7:1:509:832/1\n-GTGAAGTATGGGTGGAAATGCTTGCGTATGCTGCTA\n-+HWI-EAS337_3:7:1:509:832/1\n-WWWWVWWWWVVWWWWWWVWVWWWVVVVVWVSUUUUR\n-@HWI-EAS337_3:7:1:510:597/1\n-GGGTCTGGAATAGTAATGCGCTGATTCTAGTAAAGT\n-+HWI-EAS337_3:7:1:510:597/1\n-VWWWVWWWWWWWWWWWWVWWWWWVWWWWVVUUUUUU\n-@HWI-EAS337_3:7:1:1765:1489/1\n-GTCAATTTTTTCTTTGTTTAAATCCGGGGAGGCTAG\n-+HWI-EAS337_3:7:1:1765:1489/1\n-WVWWVWWWWWWWWWWVWWWWWWWWWQQVTVUSUUUR\n-@HWI-EAS337_3:7:1:417:1560/1\n-GTAACCTTCCCAGTGTCTCCTTAAGAAAGACTTGGA\n-+HWI-EAS337_3:7:1:417:1560/1\n-WWWWVWWSWSVWVVVVWWWWWWWWVSWWWWUQUUQU\n-@HWI-EAS337_3:7:1:1047:854/1\n-GTTGAAATTCCTGATTTTCCATGTGCATCATAAGCC\n-+HWI-EAS337_3:7:1:1047:854/1\n-WWWWWVWWWWWWWWUWWWWWVWVWVVVWVVUUUUUU\n-@HWI-EAS337_3:7:1:1296:202/1\n-GGTGTTGGAGTTGGATTTGTTTCTGCTTTGATATCC\n-+HWI-EAS337_3:7:1:1296:202/1\n-WWWWWWWVWVWWWVVWWWWWWWWWVVWWWTUUUUUF\n-@HWI-EAS337_3:7:1:502:642/1\n-GATGATTCTTGCTGGTTAAGTTGAGATGGGTTATAA\n-+HWI-EAS337_3:7:1:502:642/1\n-WWWWWVPVVVWWWVVVWWWVWWSVVVWVVVUUUUUR\n-@HWI-EAS337_3:7:1:82:1651/1\n-GGCATCCTGTTCATCCTTCTCTTCATTTTTAGGCGT\n-+HWI-EAS337_3:7:1:82:1651/1\n-WWWWWWWWVWWWWWWWWVWWWWWWVWWWWWUKJUQU\n-@HWI-EAS337_3:7:1:1505:1274/1\n-GAAACTTTTCAAAAAAAAAGTTGCATGAGAAATAAG\n-+HWI-EAS337_3:7:1:1505:1274/1\n-WWWWWWWWWWWWVWWWWWWWVWWWWWWVWWSUUUUR\n-@HWI-EAS337_3:7:1:5:1770/1\n-GTGTGAAAAAGTATTTCATTCACATATTGAGTTGTT\n-+HWI-EAS337_3:7:1:5:1770/1\n-WWWWWWWWWWVWWWWWWWWWWWWWWWWWVVSUUQUU\n-@HWI-EAS337_3:7:1:115:1005/1\n-GATTTTACTGGAATGGGGAAGGGTGAGGCATGGGTG\n-+HWI-EAS337_3:7:1:115:1005/1\n-WWWWWWWWWVVWWWVVVVWWVVVWWWVVWVUUUUUU\n-@HWI-EAS337_3:7:1:354:1708/1\n-GCATCCGACAGTGACTTAGACGATGAGGAATACGAG\n-+HWI-EAS337_3:7:1:354:1708/1\n-WWWWWWWWWWWWVWWWWWVWWVWWVWWVWWUUUUUR\n-@HWI-EAS337_3:7:1:1639:1500/1\n-GTGATTATTATCTAACTCTGCAACAGCATCCAGGGA\n-+HWI-EAS337_3:7:1:1639:1500/1\n-WWWWWWVVWWWWWVVWVVWUVVVVVVVVWVUUUUUR\n-@HWI-EAS337_3:7:1:766:243/1\n-GTGGCATCTATGGAAGATAAATTGGAGATTGTTGCT\n-+HWI-EAS337_3:7:1:766:243/1\n-WWVWWWWWWWWVVWWVWWWWWWWVVVTVWWRUUJSU\n-@HWI-EAS337_3:7:1:920:144/1\n-GTGCGATCACACTGTTTATGTTGTTGTTGATCATTG\n-+HWI-EAS337_3:7:1:920:144/1\n-WWVVWWWWWVWPWVWWVWWSWVSWWHWWLVUCPUUH\n-@HWI-EAS337_3:7:1:389:268/1\n-GGTCAATTAGAGAGGGCAACCACCCTCAAAGAATTT\n-+HWI-EAS337_3:7:1:389:268/1\n-WWWWWWWWWVWWWWVVWWWWWWWWWWWWVVSUUUUU\n-@HWI-EAS337_3:7:1:294:1868/1\n-GAAAAAAAATTGTTTGTCTTGAATTAATGTTTCAAT\n-+HWI-EAS337_3:7:1:294:1868/1\n-VWVWWWWWWWWVQWWWWOWVVWWVWVVWQWUURULU\n-@HWI-EAS337_3:7:1:1147:62/1\n-GAATTCCCCTCAGGTTGGAGTTGTGCACTTGGCACT\n-+HWI-EAS337_3:7:1:1147:62/1\n-WWWWWWWWWWWWVWWWWWWVWWVWVVWVVWUUUUUU\n-@HWI-EAS337_3:7:1:787:1759/1\n-GGTTTTATTAGAATTGGTAGCTGTTCTGATTTTCTG\n-+HWI-EAS337_3:7:1:787:1759/1\n-WVVWWWWVWWWWWWWVUWWUWWUVWVVTVVUUUUUH\n-@HWI-EAS337_3:7:1:425:1939/1\n-GCTAATTGTGGTGTCTGGGTCTATGTGGCTAAACTT\n-+HWI-EAS337_3:7:1:425:1939/1\n-WWWWVWWVWWWWVWWWVVVVWWWVVWVVVWUUUUUU\n-@HWI-EAS337_3:7:1:187:1132/1\n-GTGGGAGAGGCAAGGGGCTTGGCTCATATCCTCTTC\n-+HWI-EAS337_3:7:1:187:1132/1\n-WVWWWWWWWWWWWWWWWWWWWTVWVWWWVVUUUUUU\n-@HWI-EAS337_3:7:1:1739:1840/1\n-GGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\n-+HWI-EAS337_3:7:1:1739:1840/1\n-WWVWWWWWWWWWWWWWWVTWWWVVVWWWWWUUUUUU\n-@HWI-EAS337_3:7:1:1505:1876/1\n-GAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\n-+HWI-EAS337_3:7:1:1505:1876/1\n-WWWWWWWWWWWWWWWWWWWWVWWVWWWVVVUUUSUR\n-@HWI-EAS337_3:7:1:447:192/1\n-GACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\n-+HWI-EAS337_3:7:1:447:192/1\n-WWWWWWWWWWWWWWWTWWWVVWWVWWWWWVUUUUUU\n-@HWI-EAS337_3:7:1:21:2019/1\n-GTATGAGGTAAAAGATGATAACCTGTCTTCCAGCCC\n-+HWI-EAS337_3:7:1:21:2019/1\n-VWWVVWWVVWWWWWWWWWWWWWWWQVVWWWUURUUU\n-@HWI-EAS337_3:7:1:1593:652/1\n-GTGATGAGTAAAACATCATCATATGAACTTGAAGAG\n-+HWI-EAS337_3:7:1:1593:652/1\n-WWWVWVWWVWVWWVWWWWWWVVWWVWWVWWUUUSUU\n-@HWI-EAS337_3:7:1:1254:1660/1\n-GAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\n-+HWI-EAS337_3:7:1:1254:1660/1\n-WWWVVWWWWWWWWWWWVWVWVUWWWTWVQWUUUUMU\n-@HWI-EAS337_3:7:1:291:629/1\n-GTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\n-+HWI-EAS337_3:7:1:291:629/1\n-WWWWWWWWWWVVVWVWVVWTWWKOVVTRVSUSSMFR\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/Wig/chr1.wig
--- a/SMART/Java/Python/TestFiles/Wig/chr1.wig Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,9 +0,0 @@
-fixedStep  chrom=chr1  start=11  step=1
-1.1
-1.2
-fixedStep  chrom=chr1  start=14  step=1
-1.4
-1.5
-variableStep chrom=chr1
-17  1.7
-19  1.9
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/adress.txt
--- a/SMART/Java/Python/TestFiles/adress.txt Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,7 +0,0 @@
-0
-58
-115
-173
-231
-289
-347
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/clusterize_default_expected.gff3
--- a/SMART/Java/Python/TestFiles/clusterize_default_expected.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,1790 +0,0 @@\n-chr4\tS-MART\ttranscript\t1\t50000\t.\t+\t.\tnbElements=0;ID=region1;Name=region1\n-chr4\tS-MART\ttranscript\t49951\t99950\t.\t+\t.\tnbElements=0;ID=region2;Name=region2\n-chr4\tS-MART\ttranscript\t99901\t149900\t.\t+\t.\tnbElements=0;ID=region3;Name=region3\n-chr4\tS-MART\ttranscript\t149851\t199850\t.\t+\t.\tnbElements=0;ID=region4;Name=region4\n-chr4\tS-MART\ttranscript\t199801\t249800\t.\t+\t.\tnbElements=0;ID=region5;Name=region5\n-chr4\tS-MART\ttranscript\t249751\t299750\t.\t+\t.\tnbElements=0;ID=region6;Name=region6\n-chr4\tS-MART\ttranscript\t299701\t349700\t.\t+\t.\tnbElements=0;ID=region7;Name=region7\n-chr4\tS-MART\ttranscript\t349651\t399650\t.\t+\t.\tnbElements=0;ID=region8;Name=region8\n-chr4\tS-MART\ttranscript\t399601\t449600\t.\t+\t.\tnbElements=0;ID=region9;Name=region9\n-chr4\tS-MART\ttranscript\t449551\t499550\t.\t+\t.\tnbElements=0;ID=region10;Name=region10\n-chr4\tS-MART\ttranscript\t499501\t549500\t.\t+\t.\tnbElements=0;ID=region11;Name=region11\n-chr4\tS-MART\ttranscript\t549451\t599450\t.\t+\t.\tnbElements=0;ID=region12;Name=region12\n-chr4\tS-MART\ttranscript\t599401\t649400\t.\t+\t.\tnbElements=0;ID=region13;Name=region13\n-chr4\tS-MART\ttranscript\t649351\t699350\t.\t+\t.\tnbElements=0;ID=region14;Name=region14\n-chr4\tS-MART\ttranscript\t699301\t749300\t.\t+\t.\tnbElements=0;ID=region15;Name=region15\n-chr4\tS-MART\ttranscript\t749251\t799250\t.\t+\t.\tnbElements=0;ID=region16;Name=region16\n-chr4\tS-MART\ttranscript\t799201\t849200\t.\t+\t.\tnbElements=0;ID=region17;Name=region17\n-chr4\tS-MART\ttranscript\t849151\t899150\t.\t+\t.\tnbElements=0;ID=region18;Name=region18\n-chr4\tS-MART\ttranscript\t899101\t949100\t.\t+\t.\tnbElements=0;ID=region19;Name=region19\n-chr4\tS-MART\ttranscript\t949051\t999050\t.\t+\t.\tnbElements=0;ID=region20;Name=region20\n-chr4\tS-MART\ttranscript\t999001\t1049000\t.\t+\t.\tnbElements=0;ID=region21;Name=region21\n-chr4\tS-MART\ttranscript\t1048951\t1098950\t.\t+\t.\tnbElements=0;ID=region22;Name=region22\n-chr4\tS-MART\ttranscript\t1098901\t1148900\t.\t+\t.\tnbElements=0;ID=region23;Name=region23\n-chr4\tS-MART\ttranscript\t1148851\t1198850\t.\t+\t.\tnbElements=0;ID=region24;Name=region24\n-chr4\tS-MART\ttranscript\t1198801\t1248800\t.\t+\t.\tnbElements=0;ID=region25;Name=region25\n-chr4\tS-MART\ttranscript\t1248751\t1298750\t.\t+\t.\tnbElements=0;ID=region26;Name=region26\n-chr4\tS-MART\ttranscript\t1298701\t1348700\t.\t+\t.\tnbElements=0;ID=region27;Name=region27\n-chr4\tS-MART\ttranscript\t1348651\t1398650\t.\t+\t.\tnbElements=0;ID=region28;Name=region28\n-chr4\tS-MART\ttranscript\t1398601\t1448600\t.\t+\t.\tnbElements=0;ID=region29;Name=region29\n-chr4\tS-MART\ttranscript\t1448551\t1498550\t.\t+\t.\tnbElements=0;ID=region30;Name=region30\n-chr4\tS-MART\ttranscript\t1498501\t1548500\t.\t+\t.\tnbElements=0;ID=region31;Name=region31\n-chr4\tS-MART\ttranscript\t1548451\t1598450\t.\t+\t.\tnbElements=0;ID=region32;Name=region32\n-chr4\tS-MART\ttranscript\t1598401\t1648400\t.\t+\t.\tnbElements=0;ID=region33;Name=region33\n-chr4\tS-MART\ttranscript\t1648351\t1698350\t.\t+\t.\tnbElements=0;ID=region34;Name=region34\n-chr4\tS-MART\ttranscript\t1698301\t1748300\t.\t+\t.\tnbElements=0;ID=region35;Name=region35\n-chr4\tS-MART\ttranscript\t1748251\t1798250\t.\t+\t.\tnbElements=0;ID=region36;Name=region36\n-chr4\tS-MART\ttranscript\t1798201\t1848200\t.\t+\t.\tnbElements=0;ID=region37;Name=region37\n-chr4\tS-MART\ttranscript\t1848151\t1898150\t.\t+\t.\tnbElements=0;ID=region38;Name=region38\n-chr4\tS-MART\ttranscript\t1898101\t1948100\t.\t+\t.\tnbElements=0;ID=region39;Name=region39\n-chr4\tS-MART\ttranscript\t1948051\t1998050\t.\t+\t.\tnbElements=0;ID=region40;Name=region40\n-chr4\tS-MART\ttranscript\t1998001\t2048000\t.\t+\t.\tnbElements=0;ID=region41;Name=region41\n-chr4\tS-MART\ttranscript\t2047951\t2097950\t.\t+\t.\tnbElements=0;ID=region42;Name=region42\n-chr4\tS-MART\ttranscript\t2097901\t2147900\t.\t+\t.\tnbElements=0;ID=region43;Name=region43\n-chr4\tS-MART\ttranscript\t2147851\t2197850\t.\t+\t.\tnbElements=0;ID=region44;Name=region44\n-chr4\tS-MART\ttranscript\t2197801\t2247800\t.\t+\t.\tnbElements=0;ID=region45;Name=region45\n-chr4\tS-MART\ttranscript\t2247751\t2297750\t.\t+\t.\tnbElements=0;ID=region46;Name=region46\n-chr4\tS-MART\ttranscript\t2297701\t2347700\t.\t+\t.\tnbElements=0;ID=region47;Name=region47\n-chr4\tS-MART\ttranscript\t2347651\t2397650\t.\t+\t.\tnbEleme'..b'MART\ttranscript\t3946051\t3996050\t.\t+\t.\tnbElements=0;ID=region1746;Name=region1746\n-chr1\tS-MART\ttranscript\t3996001\t4046000\t.\t+\t.\tnbElements=0;ID=region1747;Name=region1747\n-chr1\tS-MART\ttranscript\t4045951\t4095950\t.\t+\t.\tnbElements=0;ID=region1748;Name=region1748\n-chr1\tS-MART\ttranscript\t4095901\t4145900\t.\t+\t.\tnbElements=0;ID=region1749;Name=region1749\n-chr1\tS-MART\ttranscript\t4145851\t4195850\t.\t+\t.\tnbElements=0;ID=region1750;Name=region1750\n-chr1\tS-MART\ttranscript\t4195801\t4245800\t.\t+\t.\tnbElements=0;ID=region1751;Name=region1751\n-chr1\tS-MART\ttranscript\t4245751\t4295750\t.\t+\t.\tnbElements=0;ID=region1752;Name=region1752\n-chr1\tS-MART\ttranscript\t4295701\t4345700\t.\t+\t.\tnbElements=0;ID=region1753;Name=region1753\n-chr1\tS-MART\ttranscript\t4345651\t4395650\t.\t+\t.\tnbElements=0;ID=region1754;Name=region1754\n-chr1\tS-MART\ttranscript\t4395601\t4445600\t.\t+\t.\tnbElements=0;ID=region1755;Name=region1755\n-chr1\tS-MART\ttranscript\t4445551\t4495550\t.\t+\t.\tnbElements=0;ID=region1756;Name=region1756\n-chr1\tS-MART\ttranscript\t4495501\t4545500\t.\t+\t.\tnbElements=0;ID=region1757;Name=region1757\n-chr1\tS-MART\ttranscript\t4545451\t4595450\t.\t+\t.\tnbElements=0;ID=region1758;Name=region1758\n-chr1\tS-MART\ttranscript\t4595401\t4645400\t.\t+\t.\tnbElements=0;ID=region1759;Name=region1759\n-chr1\tS-MART\ttranscript\t4645351\t4695350\t.\t+\t.\tnbElements=0;ID=region1760;Name=region1760\n-chr1\tS-MART\ttranscript\t4695301\t4745300\t.\t+\t.\tnbElements=0;ID=region1761;Name=region1761\n-chr1\tS-MART\ttranscript\t4745251\t4795250\t.\t+\t.\tnbElements=0;ID=region1762;Name=region1762\n-chr1\tS-MART\ttranscript\t4795201\t4845200\t.\t+\t.\tnbElements=0;ID=region1763;Name=region1763\n-chr1\tS-MART\ttranscript\t4845151\t4895150\t.\t+\t.\tnbElements=0;ID=region1764;Name=region1764\n-chr1\tS-MART\ttranscript\t4895101\t4945100\t.\t+\t.\tnbElements=0;ID=region1765;Name=region1765\n-chr1\tS-MART\ttranscript\t4945051\t4995050\t.\t+\t.\tnbElements=0;ID=region1766;Name=region1766\n-chr1\tS-MART\ttranscript\t4995001\t5045000\t.\t+\t.\tnbElements=0;ID=region1767;Name=region1767\n-chr1\tS-MART\ttranscript\t5044951\t5094950\t.\t+\t.\tnbElements=0;ID=region1768;Name=region1768\n-chr1\tS-MART\ttranscript\t5094901\t5144900\t.\t+\t.\tnbElements=0;ID=region1769;Name=region1769\n-chr1\tS-MART\ttranscript\t5144851\t5194850\t.\t+\t.\tnbElements=0;ID=region1770;Name=region1770\n-chr1\tS-MART\ttranscript\t5194801\t5244800\t.\t+\t.\tnbElements=0;ID=region1771;Name=region1771\n-chr1\tS-MART\ttranscript\t5244751\t5294750\t.\t+\t.\tnbElements=0;ID=region1772;Name=region1772\n-chr1\tS-MART\ttranscript\t5294701\t5344700\t.\t+\t.\tnbElements=0;ID=region1773;Name=region1773\n-chr1\tS-MART\ttranscript\t5344651\t5394650\t.\t+\t.\tnbElements=0;ID=region1774;Name=region1774\n-chr1\tS-MART\ttranscript\t5394601\t5444600\t.\t+\t.\tnbElements=0;ID=region1775;Name=region1775\n-chr1\tS-MART\ttranscript\t5444551\t5494550\t.\t+\t.\tnbElements=0;ID=region1776;Name=region1776\n-chr1\tS-MART\ttranscript\t5494501\t5544500\t.\t+\t.\tnbElements=0;ID=region1777;Name=region1777\n-chr1\tS-MART\ttranscript\t5544451\t5594450\t.\t+\t.\tnbElements=0;ID=region1778;Name=region1778\n-chr1\tS-MART\ttranscript\t5594401\t5644400\t.\t+\t.\tnbElements=0;ID=region1779;Name=region1779\n-chr1\tS-MART\ttranscript\t5644351\t5694350\t.\t+\t.\tnbElements=0;ID=region1780;Name=region1780\n-chr1\tS-MART\ttranscript\t5694301\t5744300\t.\t+\t.\tnbElements=0;ID=region1781;Name=region1781\n-chr1\tS-MART\ttranscript\t5744251\t5794250\t.\t+\t.\tnbElements=0;ID=region1782;Name=region1782\n-chr1\tS-MART\ttranscript\t5794201\t5844200\t.\t+\t.\tnbElements=0;ID=region1783;Name=region1783\n-chr1\tS-MART\ttranscript\t5844151\t5894150\t.\t+\t.\tnbElements=0;ID=region1784;Name=region1784\n-chr1\tS-MART\ttranscript\t5894101\t5944100\t.\t+\t.\tnbElements=0;ID=region1785;Name=region1785\n-chr1\tS-MART\ttranscript\t5944051\t5994050\t.\t+\t.\tnbElements=0;ID=region1786;Name=region1786\n-chr1\tS-MART\ttranscript\t5994001\t6044000\t.\t+\t.\tnbElements=0;ID=region1787;Name=region1787\n-chr1\tS-MART\ttranscript\t6043951\t6093950\t.\t+\t.\tnbElements=0;ID=region1788;Name=region1788\n-chr1\tS-MART\ttranscript\t6093901\t6143900\t.\t+\t.\tnbElements=0;ID=region1789;Name=region1789\n-chr1\tS-MART\ttranscript\t6143851\t6193850\t.\t+\t.\tnbElements=1.0;ID=region1790;Name=region1790\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/clusterize_default_expected.map
--- a/SMART/Java/Python/TestFiles/clusterize_default_expected.map Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,1790 +0,0 @@\n-region1\tchr4\t1\t50001\n-region2\tchr4\t49951\t99951\n-region3\tchr4\t99901\t149901\n-region4\tchr4\t149851\t199851\n-region5\tchr4\t199801\t249801\n-region6\tchr4\t249751\t299751\n-region7\tchr4\t299701\t349701\n-region8\tchr4\t349651\t399651\n-region9\tchr4\t399601\t449601\n-region10\tchr4\t449551\t499551\n-region11\tchr4\t499501\t549501\n-region12\tchr4\t549451\t599451\n-region13\tchr4\t599401\t649401\n-region14\tchr4\t649351\t699351\n-region15\tchr4\t699301\t749301\n-region16\tchr4\t749251\t799251\n-region17\tchr4\t799201\t849201\n-region18\tchr4\t849151\t899151\n-region19\tchr4\t899101\t949101\n-region20\tchr4\t949051\t999051\n-region21\tchr4\t999001\t1049001\n-region22\tchr4\t1048951\t1098951\n-region23\tchr4\t1098901\t1148901\n-region24\tchr4\t1148851\t1198851\n-region25\tchr4\t1198801\t1248801\n-region26\tchr4\t1248751\t1298751\n-region27\tchr4\t1298701\t1348701\n-region28\tchr4\t1348651\t1398651\n-region29\tchr4\t1398601\t1448601\n-region30\tchr4\t1448551\t1498551\n-region31\tchr4\t1498501\t1548501\n-region32\tchr4\t1548451\t1598451\n-region33\tchr4\t1598401\t1648401\n-region34\tchr4\t1648351\t1698351\n-region35\tchr4\t1698301\t1748301\n-region36\tchr4\t1748251\t1798251\n-region37\tchr4\t1798201\t1848201\n-region38\tchr4\t1848151\t1898151\n-region39\tchr4\t1898101\t1948101\n-region40\tchr4\t1948051\t1998051\n-region41\tchr4\t1998001\t2048001\n-region42\tchr4\t2047951\t2097951\n-region43\tchr4\t2097901\t2147901\n-region44\tchr4\t2147851\t2197851\n-region45\tchr4\t2197801\t2247801\n-region46\tchr4\t2247751\t2297751\n-region47\tchr4\t2297701\t2347701\n-region48\tchr4\t2347651\t2397651\n-region49\tchr4\t2397601\t2447601\n-region50\tchr4\t2447551\t2497551\n-region51\tchr4\t2497501\t2547501\n-region52\tchr4\t2547451\t2597451\n-region53\tchr4\t2597401\t2647401\n-region54\tchr4\t2647351\t2697351\n-region55\tchr4\t2697301\t2747301\n-region56\tchr4\t2747251\t2797251\n-region57\tchr4\t2797201\t2847201\n-region58\tchr4\t2847151\t2897151\n-region59\tchr4\t2897101\t2947101\n-region60\tchr4\t2947051\t2997051\n-region61\tchr4\t2997001\t3047001\n-region62\tchr4\t3046951\t3096951\n-region63\tchr4\t3096901\t3146901\n-region64\tchr4\t3146851\t3196851\n-region65\tchr4\t3196801\t3246801\n-region66\tchr4\t3246751\t3296751\n-region67\tchr4\t3296701\t3346701\n-region68\tchr4\t3346651\t3396651\n-region69\tchr4\t3396601\t3446601\n-region70\tchr4\t3446551\t3496551\n-region71\tchr4\t3496501\t3546501\n-region72\tchr4\t3546451\t3596451\n-region73\tchr4\t3596401\t3646401\n-region74\tchr4\t3646351\t3696351\n-region75\tchr4\t3696301\t3746301\n-region76\tchr4\t3746251\t3796251\n-region77\tchr4\t3796201\t3846201\n-region78\tchr4\t3846151\t3896151\n-region79\tchr4\t3896101\t3946101\n-region80\tchr4\t3946051\t3996051\n-region81\tchr4\t3996001\t4046001\n-region82\tchr4\t4045951\t4095951\n-region83\tchr4\t4095901\t4145901\n-region84\tchr4\t4145851\t4195851\n-region85\tchr4\t4195801\t4245801\n-region86\tchr4\t4245751\t4295751\n-region87\tchr4\t4295701\t4345701\n-region88\tchr4\t4345651\t4395651\n-region89\tchr4\t4395601\t4445601\n-region90\tchr4\t4445551\t4495551\n-region91\tchr4\t4495501\t4545501\n-region92\tchr4\t4545451\t4595451\n-region93\tchr4\t4595401\t4645401\n-region94\tchr4\t4645351\t4695351\n-region95\tchr4\t4695301\t4745301\n-region96\tchr4\t4745251\t4795251\n-region97\tchr4\t4795201\t4845201\n-region98\tchr4\t4845151\t4895151\n-region99\tchr4\t4895101\t4945101\n-region100\tchr4\t4945051\t4995051\n-region101\tchr4\t4995001\t5045001\n-region102\tchr4\t5044951\t5094951\n-region103\tchr4\t5094901\t5144901\n-region104\tchr4\t5144851\t5194851\n-region105\tchr4\t5194801\t5244801\n-region106\tchr4\t5244751\t5294751\n-region107\tchr4\t5294701\t5344701\n-region108\tchr4\t5344651\t5394651\n-region109\tchr4\t5394601\t5444601\n-region110\tchr4\t5444551\t5494551\n-region111\tchr4\t5494501\t5544501\n-region112\tchr4\t5544451\t5594451\n-region113\tchr4\t5594401\t5644401\n-region114\tchr4\t5644351\t5694351\n-region115\tchr4\t5694301\t5744301\n-region116\tchr4\t5744251\t5794251\n-region117\tchr4\t5794201\t5844201\n-region118\tchr4\t5844151\t5894151\n-region119\tchr4\t5894101\t5944101\n-region120\tchr4\t5944051\t5994051\n-region121\tchr4\t5994001\t6044001\n-region122\tchr4\t6043951\t6093951\n-region123\tchr4\t6093901\t6143901\n-region124\tchr4\t6143851\t6193851\n-region125\tchr4\t6193801\t6243801\n-region126\tchr4\t6243751\t6293751\n-region127\tchr4\t6293701\t6343701\n-region128\tchr4\t6343651\t6393651\n-region129\tchr4\t6393601\t6443601\n-region130'..b'9951\t99951\n-region1669\tchr1\t99901\t149901\n-region1670\tchr1\t149851\t199851\n-region1671\tchr1\t199801\t249801\n-region1672\tchr1\t249751\t299751\n-region1673\tchr1\t299701\t349701\n-region1674\tchr1\t349651\t399651\n-region1675\tchr1\t399601\t449601\n-region1676\tchr1\t449551\t499551\n-region1677\tchr1\t499501\t549501\n-region1678\tchr1\t549451\t599451\n-region1679\tchr1\t599401\t649401\n-region1680\tchr1\t649351\t699351\n-region1681\tchr1\t699301\t749301\n-region1682\tchr1\t749251\t799251\n-region1683\tchr1\t799201\t849201\n-region1684\tchr1\t849151\t899151\n-region1685\tchr1\t899101\t949101\n-region1686\tchr1\t949051\t999051\n-region1687\tchr1\t999001\t1049001\n-region1688\tchr1\t1048951\t1098951\n-region1689\tchr1\t1098901\t1148901\n-region1690\tchr1\t1148851\t1198851\n-region1691\tchr1\t1198801\t1248801\n-region1692\tchr1\t1248751\t1298751\n-region1693\tchr1\t1298701\t1348701\n-region1694\tchr1\t1348651\t1398651\n-region1695\tchr1\t1398601\t1448601\n-region1696\tchr1\t1448551\t1498551\n-region1697\tchr1\t1498501\t1548501\n-region1698\tchr1\t1548451\t1598451\n-region1699\tchr1\t1598401\t1648401\n-region1700\tchr1\t1648351\t1698351\n-region1701\tchr1\t1698301\t1748301\n-region1702\tchr1\t1748251\t1798251\n-region1703\tchr1\t1798201\t1848201\n-region1704\tchr1\t1848151\t1898151\n-region1705\tchr1\t1898101\t1948101\n-region1706\tchr1\t1948051\t1998051\n-region1707\tchr1\t1998001\t2048001\n-region1708\tchr1\t2047951\t2097951\n-region1709\tchr1\t2097901\t2147901\n-region1710\tchr1\t2147851\t2197851\n-region1711\tchr1\t2197801\t2247801\n-region1712\tchr1\t2247751\t2297751\n-region1713\tchr1\t2297701\t2347701\n-region1714\tchr1\t2347651\t2397651\n-region1715\tchr1\t2397601\t2447601\n-region1716\tchr1\t2447551\t2497551\n-region1717\tchr1\t2497501\t2547501\n-region1718\tchr1\t2547451\t2597451\n-region1719\tchr1\t2597401\t2647401\n-region1720\tchr1\t2647351\t2697351\n-region1721\tchr1\t2697301\t2747301\n-region1722\tchr1\t2747251\t2797251\n-region1723\tchr1\t2797201\t2847201\n-region1724\tchr1\t2847151\t2897151\n-region1725\tchr1\t2897101\t2947101\n-region1726\tchr1\t2947051\t2997051\n-region1727\tchr1\t2997001\t3047001\n-region1728\tchr1\t3046951\t3096951\n-region1729\tchr1\t3096901\t3146901\n-region1730\tchr1\t3146851\t3196851\n-region1731\tchr1\t3196801\t3246801\n-region1732\tchr1\t3246751\t3296751\n-region1733\tchr1\t3296701\t3346701\n-region1734\tchr1\t3346651\t3396651\n-region1735\tchr1\t3396601\t3446601\n-region1736\tchr1\t3446551\t3496551\n-region1737\tchr1\t3496501\t3546501\n-region1738\tchr1\t3546451\t3596451\n-region1739\tchr1\t3596401\t3646401\n-region1740\tchr1\t3646351\t3696351\n-region1741\tchr1\t3696301\t3746301\n-region1742\tchr1\t3746251\t3796251\n-region1743\tchr1\t3796201\t3846201\n-region1744\tchr1\t3846151\t3896151\n-region1745\tchr1\t3896101\t3946101\n-region1746\tchr1\t3946051\t3996051\n-region1747\tchr1\t3996001\t4046001\n-region1748\tchr1\t4045951\t4095951\n-region1749\tchr1\t4095901\t4145901\n-region1750\tchr1\t4145851\t4195851\n-region1751\tchr1\t4195801\t4245801\n-region1752\tchr1\t4245751\t4295751\n-region1753\tchr1\t4295701\t4345701\n-region1754\tchr1\t4345651\t4395651\n-region1755\tchr1\t4395601\t4445601\n-region1756\tchr1\t4445551\t4495551\n-region1757\tchr1\t4495501\t4545501\n-region1758\tchr1\t4545451\t4595451\n-region1759\tchr1\t4595401\t4645401\n-region1760\tchr1\t4645351\t4695351\n-region1761\tchr1\t4695301\t4745301\n-region1762\tchr1\t4745251\t4795251\n-region1763\tchr1\t4795201\t4845201\n-region1764\tchr1\t4845151\t4895151\n-region1765\tchr1\t4895101\t4945101\n-region1766\tchr1\t4945051\t4995051\n-region1767\tchr1\t4995001\t5045001\n-region1768\tchr1\t5044951\t5094951\n-region1769\tchr1\t5094901\t5144901\n-region1770\tchr1\t5144851\t5194851\n-region1771\tchr1\t5194801\t5244801\n-region1772\tchr1\t5244751\t5294751\n-region1773\tchr1\t5294701\t5344701\n-region1774\tchr1\t5344651\t5394651\n-region1775\tchr1\t5394601\t5444601\n-region1776\tchr1\t5444551\t5494551\n-region1777\tchr1\t5494501\t5544501\n-region1778\tchr1\t5544451\t5594451\n-region1779\tchr1\t5594401\t5644401\n-region1780\tchr1\t5644351\t5694351\n-region1781\tchr1\t5694301\t5744301\n-region1782\tchr1\t5744251\t5794251\n-region1783\tchr1\t5794201\t5844201\n-region1784\tchr1\t5844151\t5894151\n-region1785\tchr1\t5894101\t5944101\n-region1786\tchr1\t5944051\t5994051\n-region1787\tchr1\t5994001\t6044001\n-region1788\tchr1\t6043951\t6093951\n-region1789\tchr1\t6093901\t6143901\n-region1790\tchr1\t6143851\t6193851\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/clusterize_normalize_expected.gff3
--- a/SMART/Java/Python/TestFiles/clusterize_normalize_expected.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,1790 +0,0 @@\n-chr4\tS-MART\ttranscript\t1\t50000\t.\t+\t.\tnbElements=0;ID=region1;Name=region1\n-chr4\tS-MART\ttranscript\t49951\t99950\t.\t+\t.\tnbElements=0;ID=region2;Name=region2\n-chr4\tS-MART\ttranscript\t99901\t149900\t.\t+\t.\tnbElements=0;ID=region3;Name=region3\n-chr4\tS-MART\ttranscript\t149851\t199850\t.\t+\t.\tnbElements=0;ID=region4;Name=region4\n-chr4\tS-MART\ttranscript\t199801\t249800\t.\t+\t.\tnbElements=0;ID=region5;Name=region5\n-chr4\tS-MART\ttranscript\t249751\t299750\t.\t+\t.\tnbElements=0;ID=region6;Name=region6\n-chr4\tS-MART\ttranscript\t299701\t349700\t.\t+\t.\tnbElements=0;ID=region7;Name=region7\n-chr4\tS-MART\ttranscript\t349651\t399650\t.\t+\t.\tnbElements=0;ID=region8;Name=region8\n-chr4\tS-MART\ttranscript\t399601\t449600\t.\t+\t.\tnbElements=0;ID=region9;Name=region9\n-chr4\tS-MART\ttranscript\t449551\t499550\t.\t+\t.\tnbElements=0;ID=region10;Name=region10\n-chr4\tS-MART\ttranscript\t499501\t549500\t.\t+\t.\tnbElements=0;ID=region11;Name=region11\n-chr4\tS-MART\ttranscript\t549451\t599450\t.\t+\t.\tnbElements=0;ID=region12;Name=region12\n-chr4\tS-MART\ttranscript\t599401\t649400\t.\t+\t.\tnbElements=0;ID=region13;Name=region13\n-chr4\tS-MART\ttranscript\t649351\t699350\t.\t+\t.\tnbElements=0;ID=region14;Name=region14\n-chr4\tS-MART\ttranscript\t699301\t749300\t.\t+\t.\tnbElements=0;ID=region15;Name=region15\n-chr4\tS-MART\ttranscript\t749251\t799250\t.\t+\t.\tnbElements=0;ID=region16;Name=region16\n-chr4\tS-MART\ttranscript\t799201\t849200\t.\t+\t.\tnbElements=0;ID=region17;Name=region17\n-chr4\tS-MART\ttranscript\t849151\t899150\t.\t+\t.\tnbElements=0;ID=region18;Name=region18\n-chr4\tS-MART\ttranscript\t899101\t949100\t.\t+\t.\tnbElements=0;ID=region19;Name=region19\n-chr4\tS-MART\ttranscript\t949051\t999050\t.\t+\t.\tnbElements=0;ID=region20;Name=region20\n-chr4\tS-MART\ttranscript\t999001\t1049000\t.\t+\t.\tnbElements=0;ID=region21;Name=region21\n-chr4\tS-MART\ttranscript\t1048951\t1098950\t.\t+\t.\tnbElements=0;ID=region22;Name=region22\n-chr4\tS-MART\ttranscript\t1098901\t1148900\t.\t+\t.\tnbElements=0;ID=region23;Name=region23\n-chr4\tS-MART\ttranscript\t1148851\t1198850\t.\t+\t.\tnbElements=0;ID=region24;Name=region24\n-chr4\tS-MART\ttranscript\t1198801\t1248800\t.\t+\t.\tnbElements=0;ID=region25;Name=region25\n-chr4\tS-MART\ttranscript\t1248751\t1298750\t.\t+\t.\tnbElements=0;ID=region26;Name=region26\n-chr4\tS-MART\ttranscript\t1298701\t1348700\t.\t+\t.\tnbElements=0;ID=region27;Name=region27\n-chr4\tS-MART\ttranscript\t1348651\t1398650\t.\t+\t.\tnbElements=0;ID=region28;Name=region28\n-chr4\tS-MART\ttranscript\t1398601\t1448600\t.\t+\t.\tnbElements=0;ID=region29;Name=region29\n-chr4\tS-MART\ttranscript\t1448551\t1498550\t.\t+\t.\tnbElements=0;ID=region30;Name=region30\n-chr4\tS-MART\ttranscript\t1498501\t1548500\t.\t+\t.\tnbElements=0;ID=region31;Name=region31\n-chr4\tS-MART\ttranscript\t1548451\t1598450\t.\t+\t.\tnbElements=0;ID=region32;Name=region32\n-chr4\tS-MART\ttranscript\t1598401\t1648400\t.\t+\t.\tnbElements=0;ID=region33;Name=region33\n-chr4\tS-MART\ttranscript\t1648351\t1698350\t.\t+\t.\tnbElements=0;ID=region34;Name=region34\n-chr4\tS-MART\ttranscript\t1698301\t1748300\t.\t+\t.\tnbElements=0;ID=region35;Name=region35\n-chr4\tS-MART\ttranscript\t1748251\t1798250\t.\t+\t.\tnbElements=0;ID=region36;Name=region36\n-chr4\tS-MART\ttranscript\t1798201\t1848200\t.\t+\t.\tnbElements=0;ID=region37;Name=region37\n-chr4\tS-MART\ttranscript\t1848151\t1898150\t.\t+\t.\tnbElements=0;ID=region38;Name=region38\n-chr4\tS-MART\ttranscript\t1898101\t1948100\t.\t+\t.\tnbElements=0;ID=region39;Name=region39\n-chr4\tS-MART\ttranscript\t1948051\t1998050\t.\t+\t.\tnbElements=0;ID=region40;Name=region40\n-chr4\tS-MART\ttranscript\t1998001\t2048000\t.\t+\t.\tnbElements=0;ID=region41;Name=region41\n-chr4\tS-MART\ttranscript\t2047951\t2097950\t.\t+\t.\tnbElements=0;ID=region42;Name=region42\n-chr4\tS-MART\ttranscript\t2097901\t2147900\t.\t+\t.\tnbElements=0;ID=region43;Name=region43\n-chr4\tS-MART\ttranscript\t2147851\t2197850\t.\t+\t.\tnbElements=0;ID=region44;Name=region44\n-chr4\tS-MART\ttranscript\t2197801\t2247800\t.\t+\t.\tnbElements=0;ID=region45;Name=region45\n-chr4\tS-MART\ttranscript\t2247751\t2297750\t.\t+\t.\tnbElements=0;ID=region46;Name=region46\n-chr4\tS-MART\ttranscript\t2297701\t2347700\t.\t+\t.\tnbElements=0;ID=region47;Name=region47\n-chr4\tS-MART\ttranscript\t2347651\t2397650\t.\t+\t.\tnbEleme'..b'MART\ttranscript\t3946051\t3996050\t.\t+\t.\tnbElements=0;ID=region1746;Name=region1746\n-chr1\tS-MART\ttranscript\t3996001\t4046000\t.\t+\t.\tnbElements=0;ID=region1747;Name=region1747\n-chr1\tS-MART\ttranscript\t4045951\t4095950\t.\t+\t.\tnbElements=0;ID=region1748;Name=region1748\n-chr1\tS-MART\ttranscript\t4095901\t4145900\t.\t+\t.\tnbElements=0;ID=region1749;Name=region1749\n-chr1\tS-MART\ttranscript\t4145851\t4195850\t.\t+\t.\tnbElements=0;ID=region1750;Name=region1750\n-chr1\tS-MART\ttranscript\t4195801\t4245800\t.\t+\t.\tnbElements=0;ID=region1751;Name=region1751\n-chr1\tS-MART\ttranscript\t4245751\t4295750\t.\t+\t.\tnbElements=0;ID=region1752;Name=region1752\n-chr1\tS-MART\ttranscript\t4295701\t4345700\t.\t+\t.\tnbElements=0;ID=region1753;Name=region1753\n-chr1\tS-MART\ttranscript\t4345651\t4395650\t.\t+\t.\tnbElements=0;ID=region1754;Name=region1754\n-chr1\tS-MART\ttranscript\t4395601\t4445600\t.\t+\t.\tnbElements=0;ID=region1755;Name=region1755\n-chr1\tS-MART\ttranscript\t4445551\t4495550\t.\t+\t.\tnbElements=0;ID=region1756;Name=region1756\n-chr1\tS-MART\ttranscript\t4495501\t4545500\t.\t+\t.\tnbElements=0;ID=region1757;Name=region1757\n-chr1\tS-MART\ttranscript\t4545451\t4595450\t.\t+\t.\tnbElements=0;ID=region1758;Name=region1758\n-chr1\tS-MART\ttranscript\t4595401\t4645400\t.\t+\t.\tnbElements=0;ID=region1759;Name=region1759\n-chr1\tS-MART\ttranscript\t4645351\t4695350\t.\t+\t.\tnbElements=0;ID=region1760;Name=region1760\n-chr1\tS-MART\ttranscript\t4695301\t4745300\t.\t+\t.\tnbElements=0;ID=region1761;Name=region1761\n-chr1\tS-MART\ttranscript\t4745251\t4795250\t.\t+\t.\tnbElements=0;ID=region1762;Name=region1762\n-chr1\tS-MART\ttranscript\t4795201\t4845200\t.\t+\t.\tnbElements=0;ID=region1763;Name=region1763\n-chr1\tS-MART\ttranscript\t4845151\t4895150\t.\t+\t.\tnbElements=0;ID=region1764;Name=region1764\n-chr1\tS-MART\ttranscript\t4895101\t4945100\t.\t+\t.\tnbElements=0;ID=region1765;Name=region1765\n-chr1\tS-MART\ttranscript\t4945051\t4995050\t.\t+\t.\tnbElements=0;ID=region1766;Name=region1766\n-chr1\tS-MART\ttranscript\t4995001\t5045000\t.\t+\t.\tnbElements=0;ID=region1767;Name=region1767\n-chr1\tS-MART\ttranscript\t5044951\t5094950\t.\t+\t.\tnbElements=0;ID=region1768;Name=region1768\n-chr1\tS-MART\ttranscript\t5094901\t5144900\t.\t+\t.\tnbElements=0;ID=region1769;Name=region1769\n-chr1\tS-MART\ttranscript\t5144851\t5194850\t.\t+\t.\tnbElements=0;ID=region1770;Name=region1770\n-chr1\tS-MART\ttranscript\t5194801\t5244800\t.\t+\t.\tnbElements=0;ID=region1771;Name=region1771\n-chr1\tS-MART\ttranscript\t5244751\t5294750\t.\t+\t.\tnbElements=0;ID=region1772;Name=region1772\n-chr1\tS-MART\ttranscript\t5294701\t5344700\t.\t+\t.\tnbElements=0;ID=region1773;Name=region1773\n-chr1\tS-MART\ttranscript\t5344651\t5394650\t.\t+\t.\tnbElements=0;ID=region1774;Name=region1774\n-chr1\tS-MART\ttranscript\t5394601\t5444600\t.\t+\t.\tnbElements=0;ID=region1775;Name=region1775\n-chr1\tS-MART\ttranscript\t5444551\t5494550\t.\t+\t.\tnbElements=0;ID=region1776;Name=region1776\n-chr1\tS-MART\ttranscript\t5494501\t5544500\t.\t+\t.\tnbElements=0;ID=region1777;Name=region1777\n-chr1\tS-MART\ttranscript\t5544451\t5594450\t.\t+\t.\tnbElements=0;ID=region1778;Name=region1778\n-chr1\tS-MART\ttranscript\t5594401\t5644400\t.\t+\t.\tnbElements=0;ID=region1779;Name=region1779\n-chr1\tS-MART\ttranscript\t5644351\t5694350\t.\t+\t.\tnbElements=0;ID=region1780;Name=region1780\n-chr1\tS-MART\ttranscript\t5694301\t5744300\t.\t+\t.\tnbElements=0;ID=region1781;Name=region1781\n-chr1\tS-MART\ttranscript\t5744251\t5794250\t.\t+\t.\tnbElements=0;ID=region1782;Name=region1782\n-chr1\tS-MART\ttranscript\t5794201\t5844200\t.\t+\t.\tnbElements=0;ID=region1783;Name=region1783\n-chr1\tS-MART\ttranscript\t5844151\t5894150\t.\t+\t.\tnbElements=0;ID=region1784;Name=region1784\n-chr1\tS-MART\ttranscript\t5894101\t5944100\t.\t+\t.\tnbElements=0;ID=region1785;Name=region1785\n-chr1\tS-MART\ttranscript\t5944051\t5994050\t.\t+\t.\tnbElements=0;ID=region1786;Name=region1786\n-chr1\tS-MART\ttranscript\t5994001\t6044000\t.\t+\t.\tnbElements=0;ID=region1787;Name=region1787\n-chr1\tS-MART\ttranscript\t6043951\t6093950\t.\t+\t.\tnbElements=0;ID=region1788;Name=region1788\n-chr1\tS-MART\ttranscript\t6093901\t6143900\t.\t+\t.\tnbElements=0;ID=region1789;Name=region1789\n-chr1\tS-MART\ttranscript\t6143851\t6193850\t.\t+\t.\tnbElements=1.0;ID=region1790;Name=region1790\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/clusterize_output_tag_expected.gff3
--- a/SMART/Java/Python/TestFiles/clusterize_output_tag_expected.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,1790 +0,0 @@\n-chr4\tS-MART\ttranscript\t1\t50000\t.\t+\t.\tnbElements=0;newTag=0;ID=region1;Name=region1\n-chr4\tS-MART\ttranscript\t49951\t99950\t.\t+\t.\tnbElements=0;newTag=0;ID=region2;Name=region2\n-chr4\tS-MART\ttranscript\t99901\t149900\t.\t+\t.\tnbElements=0;newTag=0;ID=region3;Name=region3\n-chr4\tS-MART\ttranscript\t149851\t199850\t.\t+\t.\tnbElements=0;newTag=0;ID=region4;Name=region4\n-chr4\tS-MART\ttranscript\t199801\t249800\t.\t+\t.\tnbElements=0;newTag=0;ID=region5;Name=region5\n-chr4\tS-MART\ttranscript\t249751\t299750\t.\t+\t.\tnbElements=0;newTag=0;ID=region6;Name=region6\n-chr4\tS-MART\ttranscript\t299701\t349700\t.\t+\t.\tnbElements=0;newTag=0;ID=region7;Name=region7\n-chr4\tS-MART\ttranscript\t349651\t399650\t.\t+\t.\tnbElements=0;newTag=0;ID=region8;Name=region8\n-chr4\tS-MART\ttranscript\t399601\t449600\t.\t+\t.\tnbElements=0;newTag=0;ID=region9;Name=region9\n-chr4\tS-MART\ttranscript\t449551\t499550\t.\t+\t.\tnbElements=0;newTag=0;ID=region10;Name=region10\n-chr4\tS-MART\ttranscript\t499501\t549500\t.\t+\t.\tnbElements=0;newTag=0;ID=region11;Name=region11\n-chr4\tS-MART\ttranscript\t549451\t599450\t.\t+\t.\tnbElements=0;newTag=0;ID=region12;Name=region12\n-chr4\tS-MART\ttranscript\t599401\t649400\t.\t+\t.\tnbElements=0;newTag=0;ID=region13;Name=region13\n-chr4\tS-MART\ttranscript\t649351\t699350\t.\t+\t.\tnbElements=0;newTag=0;ID=region14;Name=region14\n-chr4\tS-MART\ttranscript\t699301\t749300\t.\t+\t.\tnbElements=0;newTag=0;ID=region15;Name=region15\n-chr4\tS-MART\ttranscript\t749251\t799250\t.\t+\t.\tnbElements=0;newTag=0;ID=region16;Name=region16\n-chr4\tS-MART\ttranscript\t799201\t849200\t.\t+\t.\tnbElements=0;newTag=0;ID=region17;Name=region17\n-chr4\tS-MART\ttranscript\t849151\t899150\t.\t+\t.\tnbElements=0;newTag=0;ID=region18;Name=region18\n-chr4\tS-MART\ttranscript\t899101\t949100\t.\t+\t.\tnbElements=0;newTag=0;ID=region19;Name=region19\n-chr4\tS-MART\ttranscript\t949051\t999050\t.\t+\t.\tnbElements=0;newTag=0;ID=region20;Name=region20\n-chr4\tS-MART\ttranscript\t999001\t1049000\t.\t+\t.\tnbElements=0;newTag=0;ID=region21;Name=region21\n-chr4\tS-MART\ttranscript\t1048951\t1098950\t.\t+\t.\tnbElements=0;newTag=0;ID=region22;Name=region22\n-chr4\tS-MART\ttranscript\t1098901\t1148900\t.\t+\t.\tnbElements=0;newTag=0;ID=region23;Name=region23\n-chr4\tS-MART\ttranscript\t1148851\t1198850\t.\t+\t.\tnbElements=0;newTag=0;ID=region24;Name=region24\n-chr4\tS-MART\ttranscript\t1198801\t1248800\t.\t+\t.\tnbElements=0;newTag=0;ID=region25;Name=region25\n-chr4\tS-MART\ttranscript\t1248751\t1298750\t.\t+\t.\tnbElements=0;newTag=0;ID=region26;Name=region26\n-chr4\tS-MART\ttranscript\t1298701\t1348700\t.\t+\t.\tnbElements=0;newTag=0;ID=region27;Name=region27\n-chr4\tS-MART\ttranscript\t1348651\t1398650\t.\t+\t.\tnbElements=0;newTag=0;ID=region28;Name=region28\n-chr4\tS-MART\ttranscript\t1398601\t1448600\t.\t+\t.\tnbElements=0;newTag=0;ID=region29;Name=region29\n-chr4\tS-MART\ttranscript\t1448551\t1498550\t.\t+\t.\tnbElements=0;newTag=0;ID=region30;Name=region30\n-chr4\tS-MART\ttranscript\t1498501\t1548500\t.\t+\t.\tnbElements=0;newTag=0;ID=region31;Name=region31\n-chr4\tS-MART\ttranscript\t1548451\t1598450\t.\t+\t.\tnbElements=0;newTag=0;ID=region32;Name=region32\n-chr4\tS-MART\ttranscript\t1598401\t1648400\t.\t+\t.\tnbElements=0;newTag=0;ID=region33;Name=region33\n-chr4\tS-MART\ttranscript\t1648351\t1698350\t.\t+\t.\tnbElements=0;newTag=0;ID=region34;Name=region34\n-chr4\tS-MART\ttranscript\t1698301\t1748300\t.\t+\t.\tnbElements=0;newTag=0;ID=region35;Name=region35\n-chr4\tS-MART\ttranscript\t1748251\t1798250\t.\t+\t.\tnbElements=0;newTag=0;ID=region36;Name=region36\n-chr4\tS-MART\ttranscript\t1798201\t1848200\t.\t+\t.\tnbElements=0;newTag=0;ID=region37;Name=region37\n-chr4\tS-MART\ttranscript\t1848151\t1898150\t.\t+\t.\tnbElements=0;newTag=0;ID=region38;Name=region38\n-chr4\tS-MART\ttranscript\t1898101\t1948100\t.\t+\t.\tnbElements=0;newTag=0;ID=region39;Name=region39\n-chr4\tS-MART\ttranscript\t1948051\t1998050\t.\t+\t.\tnbElements=0;newTag=0;ID=region40;Name=region40\n-chr4\tS-MART\ttranscript\t1998001\t2048000\t.\t+\t.\tnbElements=0;newTag=0;ID=region41;Name=region41\n-chr4\tS-MART\ttranscript\t2047951\t2097950\t.\t+\t.\tnbElements=0;newTag=0;ID=region42;Name=region42\n-chr4\tS-MART\ttranscript\t2097901\t2147900\t.\t+\t.\tnbElements=0;newTag=0;ID=region43;Name=region43\n-chr4\t'..b'851\t4195850\t.\t+\t.\tnbElements=0;newTag=0;ID=region1750;Name=region1750\n-chr1\tS-MART\ttranscript\t4195801\t4245800\t.\t+\t.\tnbElements=0;newTag=0;ID=region1751;Name=region1751\n-chr1\tS-MART\ttranscript\t4245751\t4295750\t.\t+\t.\tnbElements=0;newTag=0;ID=region1752;Name=region1752\n-chr1\tS-MART\ttranscript\t4295701\t4345700\t.\t+\t.\tnbElements=0;newTag=0;ID=region1753;Name=region1753\n-chr1\tS-MART\ttranscript\t4345651\t4395650\t.\t+\t.\tnbElements=0;newTag=0;ID=region1754;Name=region1754\n-chr1\tS-MART\ttranscript\t4395601\t4445600\t.\t+\t.\tnbElements=0;newTag=0;ID=region1755;Name=region1755\n-chr1\tS-MART\ttranscript\t4445551\t4495550\t.\t+\t.\tnbElements=0;newTag=0;ID=region1756;Name=region1756\n-chr1\tS-MART\ttranscript\t4495501\t4545500\t.\t+\t.\tnbElements=0;newTag=0;ID=region1757;Name=region1757\n-chr1\tS-MART\ttranscript\t4545451\t4595450\t.\t+\t.\tnbElements=0;newTag=0;ID=region1758;Name=region1758\n-chr1\tS-MART\ttranscript\t4595401\t4645400\t.\t+\t.\tnbElements=0;newTag=0;ID=region1759;Name=region1759\n-chr1\tS-MART\ttranscript\t4645351\t4695350\t.\t+\t.\tnbElements=0;newTag=0;ID=region1760;Name=region1760\n-chr1\tS-MART\ttranscript\t4695301\t4745300\t.\t+\t.\tnbElements=0;newTag=0;ID=region1761;Name=region1761\n-chr1\tS-MART\ttranscript\t4745251\t4795250\t.\t+\t.\tnbElements=0;newTag=0;ID=region1762;Name=region1762\n-chr1\tS-MART\ttranscript\t4795201\t4845200\t.\t+\t.\tnbElements=0;newTag=0;ID=region1763;Name=region1763\n-chr1\tS-MART\ttranscript\t4845151\t4895150\t.\t+\t.\tnbElements=0;newTag=0;ID=region1764;Name=region1764\n-chr1\tS-MART\ttranscript\t4895101\t4945100\t.\t+\t.\tnbElements=0;newTag=0;ID=region1765;Name=region1765\n-chr1\tS-MART\ttranscript\t4945051\t4995050\t.\t+\t.\tnbElements=0;newTag=0;ID=region1766;Name=region1766\n-chr1\tS-MART\ttranscript\t4995001\t5045000\t.\t+\t.\tnbElements=0;newTag=0;ID=region1767;Name=region1767\n-chr1\tS-MART\ttranscript\t5044951\t5094950\t.\t+\t.\tnbElements=0;newTag=0;ID=region1768;Name=region1768\n-chr1\tS-MART\ttranscript\t5094901\t5144900\t.\t+\t.\tnbElements=0;newTag=0;ID=region1769;Name=region1769\n-chr1\tS-MART\ttranscript\t5144851\t5194850\t.\t+\t.\tnbElements=0;newTag=0;ID=region1770;Name=region1770\n-chr1\tS-MART\ttranscript\t5194801\t5244800\t.\t+\t.\tnbElements=0;newTag=0;ID=region1771;Name=region1771\n-chr1\tS-MART\ttranscript\t5244751\t5294750\t.\t+\t.\tnbElements=0;newTag=0;ID=region1772;Name=region1772\n-chr1\tS-MART\ttranscript\t5294701\t5344700\t.\t+\t.\tnbElements=0;newTag=0;ID=region1773;Name=region1773\n-chr1\tS-MART\ttranscript\t5344651\t5394650\t.\t+\t.\tnbElements=0;newTag=0;ID=region1774;Name=region1774\n-chr1\tS-MART\ttranscript\t5394601\t5444600\t.\t+\t.\tnbElements=0;newTag=0;ID=region1775;Name=region1775\n-chr1\tS-MART\ttranscript\t5444551\t5494550\t.\t+\t.\tnbElements=0;newTag=0;ID=region1776;Name=region1776\n-chr1\tS-MART\ttranscript\t5494501\t5544500\t.\t+\t.\tnbElements=0;newTag=0;ID=region1777;Name=region1777\n-chr1\tS-MART\ttranscript\t5544451\t5594450\t.\t+\t.\tnbElements=0;newTag=0;ID=region1778;Name=region1778\n-chr1\tS-MART\ttranscript\t5594401\t5644400\t.\t+\t.\tnbElements=0;newTag=0;ID=region1779;Name=region1779\n-chr1\tS-MART\ttranscript\t5644351\t5694350\t.\t+\t.\tnbElements=0;newTag=0;ID=region1780;Name=region1780\n-chr1\tS-MART\ttranscript\t5694301\t5744300\t.\t+\t.\tnbElements=0;newTag=0;ID=region1781;Name=region1781\n-chr1\tS-MART\ttranscript\t5744251\t5794250\t.\t+\t.\tnbElements=0;newTag=0;ID=region1782;Name=region1782\n-chr1\tS-MART\ttranscript\t5794201\t5844200\t.\t+\t.\tnbElements=0;newTag=0;ID=region1783;Name=region1783\n-chr1\tS-MART\ttranscript\t5844151\t5894150\t.\t+\t.\tnbElements=0;newTag=0;ID=region1784;Name=region1784\n-chr1\tS-MART\ttranscript\t5894101\t5944100\t.\t+\t.\tnbElements=0;newTag=0;ID=region1785;Name=region1785\n-chr1\tS-MART\ttranscript\t5944051\t5994050\t.\t+\t.\tnbElements=0;newTag=0;ID=region1786;Name=region1786\n-chr1\tS-MART\ttranscript\t5994001\t6044000\t.\t+\t.\tnbElements=0;newTag=0;ID=region1787;Name=region1787\n-chr1\tS-MART\ttranscript\t6043951\t6093950\t.\t+\t.\tnbElements=0;newTag=0;ID=region1788;Name=region1788\n-chr1\tS-MART\ttranscript\t6093901\t6143900\t.\t+\t.\tnbElements=0;newTag=0;ID=region1789;Name=region1789\n-chr1\tS-MART\ttranscript\t6143851\t6193850\t.\t+\t.\tnbElements=1.0;newTag=1.000000;ID=region1790;Name=region1790\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/clusterize_strands_expected.gff3
--- a/SMART/Java/Python/TestFiles/clusterize_strands_expected.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,3580 +0,0 @@\n-chr4\tS-MART\ttranscript\t1\t50000\t.\t-\t.\tnbElements=0;ID=region1;Name=region1\n-chr4\tS-MART\ttranscript\t49951\t99950\t.\t-\t.\tnbElements=0;ID=region2;Name=region2\n-chr4\tS-MART\ttranscript\t99901\t149900\t.\t-\t.\tnbElements=0;ID=region3;Name=region3\n-chr4\tS-MART\ttranscript\t149851\t199850\t.\t-\t.\tnbElements=0;ID=region4;Name=region4\n-chr4\tS-MART\ttranscript\t199801\t249800\t.\t-\t.\tnbElements=0;ID=region5;Name=region5\n-chr4\tS-MART\ttranscript\t249751\t299750\t.\t-\t.\tnbElements=0;ID=region6;Name=region6\n-chr4\tS-MART\ttranscript\t299701\t349700\t.\t-\t.\tnbElements=0;ID=region7;Name=region7\n-chr4\tS-MART\ttranscript\t349651\t399650\t.\t-\t.\tnbElements=0;ID=region8;Name=region8\n-chr4\tS-MART\ttranscript\t399601\t449600\t.\t-\t.\tnbElements=0;ID=region9;Name=region9\n-chr4\tS-MART\ttranscript\t449551\t499550\t.\t-\t.\tnbElements=0;ID=region10;Name=region10\n-chr4\tS-MART\ttranscript\t499501\t549500\t.\t-\t.\tnbElements=0;ID=region11;Name=region11\n-chr4\tS-MART\ttranscript\t549451\t599450\t.\t-\t.\tnbElements=0;ID=region12;Name=region12\n-chr4\tS-MART\ttranscript\t599401\t649400\t.\t-\t.\tnbElements=0;ID=region13;Name=region13\n-chr4\tS-MART\ttranscript\t649351\t699350\t.\t-\t.\tnbElements=0;ID=region14;Name=region14\n-chr4\tS-MART\ttranscript\t699301\t749300\t.\t-\t.\tnbElements=0;ID=region15;Name=region15\n-chr4\tS-MART\ttranscript\t749251\t799250\t.\t-\t.\tnbElements=0;ID=region16;Name=region16\n-chr4\tS-MART\ttranscript\t799201\t849200\t.\t-\t.\tnbElements=0;ID=region17;Name=region17\n-chr4\tS-MART\ttranscript\t849151\t899150\t.\t-\t.\tnbElements=0;ID=region18;Name=region18\n-chr4\tS-MART\ttranscript\t899101\t949100\t.\t-\t.\tnbElements=0;ID=region19;Name=region19\n-chr4\tS-MART\ttranscript\t949051\t999050\t.\t-\t.\tnbElements=0;ID=region20;Name=region20\n-chr4\tS-MART\ttranscript\t999001\t1049000\t.\t-\t.\tnbElements=0;ID=region21;Name=region21\n-chr4\tS-MART\ttranscript\t1048951\t1098950\t.\t-\t.\tnbElements=0;ID=region22;Name=region22\n-chr4\tS-MART\ttranscript\t1098901\t1148900\t.\t-\t.\tnbElements=0;ID=region23;Name=region23\n-chr4\tS-MART\ttranscript\t1148851\t1198850\t.\t-\t.\tnbElements=0;ID=region24;Name=region24\n-chr4\tS-MART\ttranscript\t1198801\t1248800\t.\t-\t.\tnbElements=0;ID=region25;Name=region25\n-chr4\tS-MART\ttranscript\t1248751\t1298750\t.\t-\t.\tnbElements=0;ID=region26;Name=region26\n-chr4\tS-MART\ttranscript\t1298701\t1348700\t.\t-\t.\tnbElements=0;ID=region27;Name=region27\n-chr4\tS-MART\ttranscript\t1348651\t1398650\t.\t-\t.\tnbElements=0;ID=region28;Name=region28\n-chr4\tS-MART\ttranscript\t1398601\t1448600\t.\t-\t.\tnbElements=0;ID=region29;Name=region29\n-chr4\tS-MART\ttranscript\t1448551\t1498550\t.\t-\t.\tnbElements=0;ID=region30;Name=region30\n-chr4\tS-MART\ttranscript\t1498501\t1548500\t.\t-\t.\tnbElements=0;ID=region31;Name=region31\n-chr4\tS-MART\ttranscript\t1548451\t1598450\t.\t-\t.\tnbElements=0;ID=region32;Name=region32\n-chr4\tS-MART\ttranscript\t1598401\t1648400\t.\t-\t.\tnbElements=0;ID=region33;Name=region33\n-chr4\tS-MART\ttranscript\t1648351\t1698350\t.\t-\t.\tnbElements=0;ID=region34;Name=region34\n-chr4\tS-MART\ttranscript\t1698301\t1748300\t.\t-\t.\tnbElements=0;ID=region35;Name=region35\n-chr4\tS-MART\ttranscript\t1748251\t1798250\t.\t-\t.\tnbElements=0;ID=region36;Name=region36\n-chr4\tS-MART\ttranscript\t1798201\t1848200\t.\t-\t.\tnbElements=0;ID=region37;Name=region37\n-chr4\tS-MART\ttranscript\t1848151\t1898150\t.\t-\t.\tnbElements=0;ID=region38;Name=region38\n-chr4\tS-MART\ttranscript\t1898101\t1948100\t.\t-\t.\tnbElements=0;ID=region39;Name=region39\n-chr4\tS-MART\ttranscript\t1948051\t1998050\t.\t-\t.\tnbElements=0;ID=region40;Name=region40\n-chr4\tS-MART\ttranscript\t1998001\t2048000\t.\t-\t.\tnbElements=0;ID=region41;Name=region41\n-chr4\tS-MART\ttranscript\t2047951\t2097950\t.\t-\t.\tnbElements=0;ID=region42;Name=region42\n-chr4\tS-MART\ttranscript\t2097901\t2147900\t.\t-\t.\tnbElements=0;ID=region43;Name=region43\n-chr4\tS-MART\ttranscript\t2147851\t2197850\t.\t-\t.\tnbElements=0;ID=region44;Name=region44\n-chr4\tS-MART\ttranscript\t2197801\t2247800\t.\t-\t.\tnbElements=0;ID=region45;Name=region45\n-chr4\tS-MART\ttranscript\t2247751\t2297750\t.\t-\t.\tnbElements=0;ID=region46;Name=region46\n-chr4\tS-MART\ttranscript\t2297701\t2347700\t.\t-\t.\tnbElements=0;ID=region47;Name=region47\n-chr4\tS-MART\ttranscript\t2347651\t2397650\t.\t-\t.\tnbEleme'..b'MART\ttranscript\t3946051\t3996050\t.\t+\t.\tnbElements=0;ID=region3536;Name=region3536\n-chr1\tS-MART\ttranscript\t3996001\t4046000\t.\t+\t.\tnbElements=0;ID=region3537;Name=region3537\n-chr1\tS-MART\ttranscript\t4045951\t4095950\t.\t+\t.\tnbElements=0;ID=region3538;Name=region3538\n-chr1\tS-MART\ttranscript\t4095901\t4145900\t.\t+\t.\tnbElements=0;ID=region3539;Name=region3539\n-chr1\tS-MART\ttranscript\t4145851\t4195850\t.\t+\t.\tnbElements=0;ID=region3540;Name=region3540\n-chr1\tS-MART\ttranscript\t4195801\t4245800\t.\t+\t.\tnbElements=0;ID=region3541;Name=region3541\n-chr1\tS-MART\ttranscript\t4245751\t4295750\t.\t+\t.\tnbElements=0;ID=region3542;Name=region3542\n-chr1\tS-MART\ttranscript\t4295701\t4345700\t.\t+\t.\tnbElements=0;ID=region3543;Name=region3543\n-chr1\tS-MART\ttranscript\t4345651\t4395650\t.\t+\t.\tnbElements=0;ID=region3544;Name=region3544\n-chr1\tS-MART\ttranscript\t4395601\t4445600\t.\t+\t.\tnbElements=0;ID=region3545;Name=region3545\n-chr1\tS-MART\ttranscript\t4445551\t4495550\t.\t+\t.\tnbElements=0;ID=region3546;Name=region3546\n-chr1\tS-MART\ttranscript\t4495501\t4545500\t.\t+\t.\tnbElements=0;ID=region3547;Name=region3547\n-chr1\tS-MART\ttranscript\t4545451\t4595450\t.\t+\t.\tnbElements=0;ID=region3548;Name=region3548\n-chr1\tS-MART\ttranscript\t4595401\t4645400\t.\t+\t.\tnbElements=0;ID=region3549;Name=region3549\n-chr1\tS-MART\ttranscript\t4645351\t4695350\t.\t+\t.\tnbElements=0;ID=region3550;Name=region3550\n-chr1\tS-MART\ttranscript\t4695301\t4745300\t.\t+\t.\tnbElements=0;ID=region3551;Name=region3551\n-chr1\tS-MART\ttranscript\t4745251\t4795250\t.\t+\t.\tnbElements=0;ID=region3552;Name=region3552\n-chr1\tS-MART\ttranscript\t4795201\t4845200\t.\t+\t.\tnbElements=0;ID=region3553;Name=region3553\n-chr1\tS-MART\ttranscript\t4845151\t4895150\t.\t+\t.\tnbElements=0;ID=region3554;Name=region3554\n-chr1\tS-MART\ttranscript\t4895101\t4945100\t.\t+\t.\tnbElements=0;ID=region3555;Name=region3555\n-chr1\tS-MART\ttranscript\t4945051\t4995050\t.\t+\t.\tnbElements=0;ID=region3556;Name=region3556\n-chr1\tS-MART\ttranscript\t4995001\t5045000\t.\t+\t.\tnbElements=0;ID=region3557;Name=region3557\n-chr1\tS-MART\ttranscript\t5044951\t5094950\t.\t+\t.\tnbElements=0;ID=region3558;Name=region3558\n-chr1\tS-MART\ttranscript\t5094901\t5144900\t.\t+\t.\tnbElements=0;ID=region3559;Name=region3559\n-chr1\tS-MART\ttranscript\t5144851\t5194850\t.\t+\t.\tnbElements=0;ID=region3560;Name=region3560\n-chr1\tS-MART\ttranscript\t5194801\t5244800\t.\t+\t.\tnbElements=0;ID=region3561;Name=region3561\n-chr1\tS-MART\ttranscript\t5244751\t5294750\t.\t+\t.\tnbElements=0;ID=region3562;Name=region3562\n-chr1\tS-MART\ttranscript\t5294701\t5344700\t.\t+\t.\tnbElements=0;ID=region3563;Name=region3563\n-chr1\tS-MART\ttranscript\t5344651\t5394650\t.\t+\t.\tnbElements=0;ID=region3564;Name=region3564\n-chr1\tS-MART\ttranscript\t5394601\t5444600\t.\t+\t.\tnbElements=0;ID=region3565;Name=region3565\n-chr1\tS-MART\ttranscript\t5444551\t5494550\t.\t+\t.\tnbElements=0;ID=region3566;Name=region3566\n-chr1\tS-MART\ttranscript\t5494501\t5544500\t.\t+\t.\tnbElements=0;ID=region3567;Name=region3567\n-chr1\tS-MART\ttranscript\t5544451\t5594450\t.\t+\t.\tnbElements=0;ID=region3568;Name=region3568\n-chr1\tS-MART\ttranscript\t5594401\t5644400\t.\t+\t.\tnbElements=0;ID=region3569;Name=region3569\n-chr1\tS-MART\ttranscript\t5644351\t5694350\t.\t+\t.\tnbElements=0;ID=region3570;Name=region3570\n-chr1\tS-MART\ttranscript\t5694301\t5744300\t.\t+\t.\tnbElements=0;ID=region3571;Name=region3571\n-chr1\tS-MART\ttranscript\t5744251\t5794250\t.\t+\t.\tnbElements=0;ID=region3572;Name=region3572\n-chr1\tS-MART\ttranscript\t5794201\t5844200\t.\t+\t.\tnbElements=0;ID=region3573;Name=region3573\n-chr1\tS-MART\ttranscript\t5844151\t5894150\t.\t+\t.\tnbElements=0;ID=region3574;Name=region3574\n-chr1\tS-MART\ttranscript\t5894101\t5944100\t.\t+\t.\tnbElements=0;ID=region3575;Name=region3575\n-chr1\tS-MART\ttranscript\t5944051\t5994050\t.\t+\t.\tnbElements=0;ID=region3576;Name=region3576\n-chr1\tS-MART\ttranscript\t5994001\t6044000\t.\t+\t.\tnbElements=0;ID=region3577;Name=region3577\n-chr1\tS-MART\ttranscript\t6043951\t6093950\t.\t+\t.\tnbElements=0;ID=region3578;Name=region3578\n-chr1\tS-MART\ttranscript\t6093901\t6143900\t.\t+\t.\tnbElements=0;ID=region3579;Name=region3579\n-chr1\tS-MART\ttranscript\t6143851\t6193850\t.\t+\t.\tnbElements=1.0;ID=region3580;Name=region3580\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/expOutputGff.gff3
--- a/SMART/Java/Python/TestFiles/expOutputGff.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2292 +0,0 @@\n-C02HBa0185P07_LR40\tS-MART\ttranscript\t3889\t3924\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\n-C02HBa0185P07_LR40\tS-MART\ttranscript\t3830\t3865\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\n-C11SLe0053P22_LR298\tS-MART\ttranscript\t2130\t2165\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\n-C11SLe0053P22_LR298\tS-MART\ttranscript\t1980\t2015\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\n-C06HBa0144J05_LR355\tS-MART\ttranscript\t1\t36\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\n-C06HBa0144J05_LR355\tS-MART\ttranscript\t101\t136\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\n-C08HBa0165B06_LR218\tS-MART\ttranscript\t3619\t3654\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\n-C08HBa0165B06_LR218\tS-MART\ttranscript\t3575\t3610\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\n-C02HBa0329G05_LR52\tS-MART\ttranscript\t4746\t4781\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1154:1517;identity=100.000000;Name=HWI-EAS337_3:7:1:1154:1517\n-C02HBa0329G05_LR52\tS-MART\ttranscript\t4680\t4715\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1154:1517;identity=100.000000;Name=HWI-EAS337_3:7:1:1154:1517\n-C04HBa80D3_LR100\tS-MART\ttranscript\t423\t458\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:164:1869;identity=100.000000;Name=HWI-EAS337_3:7:1:164:1869\n-C04HBa80D3_LR100\tS-MART\ttranscript\t397\t432\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:164:1869;identity=100.000000;Name=HWI-EAS337_3:7:1:164:1869\n-C01HBa0216G16_LR11\tS-MART\ttranscript\t648\t683\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1194;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1194\n-C01HBa0216G16_LR11\tS-MART\ttranscript\t511\t546\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:415:1194;identity=97.222222;Name=HWI-EAS337_3:7:1:415:1194\n-C05HBa0145P19_LR136\tS-MART\ttranscript\t3686\t3721\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=2.000000;ID=HWI-EAS337_3:7:1:645:1892;identity=94.444444;Name=HWI-EAS337_3:7:1:645:1892\n-C05HBa0145P19_LR136\tS-MART\ttranscript\t3573\t3608\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:645:1892;identity=100.000000;Name=HWI-EAS337_3:7:1:645:1892\n-C08HBa0012O06_LR211\tS-MART\ttranscript\t1768\t1803\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:33:1446;identity=97.222222;Name=HWI-EAS337_3:7:1:33:1446\n-C08HBa0012O06_LR211\tS-MART\ttranscript\t1649\t1684\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:33:1446;identity=100.000000;Name=HWI-EAS337_3:7:1:33:1446\n-C09HBa0194K19_LR362\tS-MART\ttranscript\t9168\t9203\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1194:1427;identity=100.000000;Name=HWI-EAS337_3:7:1:1194:1427'..b':1:1147:62\n-C02HBa0204D01_LR334\tS-MART\ttranscript\t6704\t6739\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1147:62;identity=100.000000;Name=HWI-EAS337_3:7:1:1147:62\n-C02SLe0018B07_LR335\tS-MART\ttranscript\t8378\t8413\t.\t-\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:787:1759;identity=100.000000;Name=HWI-EAS337_3:7:1:787:1759\n-C02SLe0018B07_LR335\tS-MART\ttranscript\t8208\t8243\t.\t+\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:787:1759;identity=100.000000;Name=HWI-EAS337_3:7:1:787:1759\n-C09SLm0143I09_LR365\tS-MART\ttranscript\t1546\t1581\t.\t-\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:425:1939;identity=100.000000;Name=HWI-EAS337_3:7:1:425:1939\n-C09SLm0143I09_LR365\tS-MART\ttranscript\t1490\t1525\t.\t+\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:425:1939;identity=100.000000;Name=HWI-EAS337_3:7:1:425:1939\n-C02SLe0018B07_LR335\tS-MART\ttranscript\t9178\t9213\t.\t-\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:187:1132;identity=97.222222;Name=HWI-EAS337_3:7:1:187:1132\n-C02SLe0018B07_LR335\tS-MART\ttranscript\t9065\t9100\t.\t+\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:187:1132;identity=100.000000;Name=HWI-EAS337_3:7:1:187:1132\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t2868\t2903\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1739:1840;identity=100.000000;Name=HWI-EAS337_3:7:1:1739:1840\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t3189\t3224\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:1739:1840;identity=97.222222;Name=HWI-EAS337_3:7:1:1739:1840\n-C07SLe0111B06_LR194\tS-MART\ttranscript\t8673\t8708\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1505:1876;identity=100.000000;Name=HWI-EAS337_3:7:1:1505:1876\n-C07SLe0111B06_LR194\tS-MART\ttranscript\t8677\t8712\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1505:1876;identity=100.000000;Name=HWI-EAS337_3:7:1:1505:1876\n-C09SLm0143I09_LR365\tS-MART\ttranscript\t6957\t6992\t.\t+\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:192;identity=100.000000;Name=HWI-EAS337_3:7:1:447:192\n-C09SLm0143I09_LR365\tS-MART\ttranscript\t7039\t7074\t.\t-\t.\tquality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:192;identity=100.000000;Name=HWI-EAS337_3:7:1:447:192\n-C09SLm0037I08_LR367\tS-MART\ttranscript\t1298\t1333\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:21:2019;identity=100.000000;Name=HWI-EAS337_3:7:1:21:2019\n-C09SLm0037I08_LR367\tS-MART\ttranscript\t955\t990\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:21:2019;identity=100.000000;Name=HWI-EAS337_3:7:1:21:2019\n-C04HBa8K13_LR338\tS-MART\ttranscript\t2175\t2210\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1.000000;ID=HWI-EAS337_3:7:1:1593:652;identity=97.222222;Name=HWI-EAS337_3:7:1:1593:652\n-C04HBa8K13_LR338\tS-MART\ttranscript\t2226\t2261\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1593:652;identity=100.000000;Name=HWI-EAS337_3:7:1:1593:652\n-C12HBa326K10_LR306\tS-MART\ttranscript\t8100\t8135\t.\t+\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1254:1660;identity=100.000000;Name=HWI-EAS337_3:7:1:1254:1660\n-C12HBa326K10_LR306\tS-MART\ttranscript\t8243\t8278\t.\t-\t.\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1254:1660;identity=100.000000;Name=HWI-EAS337_3:7:1:1254:1660\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/expRef.fasta
--- a/SMART/Java/Python/TestFiles/expRef.fasta Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,33148 +0,0 @@\n->C10HBa0111D09_LR276\n-GAACAAACAACCCCTTTTTGGAGGTGTTGGCGCGTCGTGCAGCTTACACTCAAAAGTTAA\n-AAAGTTGCCTTGCGATGCGGTCATGTTACAAACCTCTCTGCCTTAAATTAAATTCCATAA\n-CCAAGATTTGGAGGTGCCTCAACGATGCGCAGCCATGTCCCATATTTGGTCGCCTCGTTT\n-AAAAGTCAAGTTAGACTTAATTAAGAGGTCCAACTAGTGTAGGGGCGTTTTGAGTACTTG\n-TGGGATTTATTATAAACGGTTTTGAGTCACTTTAAACCCACTTCACCAATTAAAACAAAA\n-TCCTCAAGTTAAAACTCAATATCTTTCCATTCTCTCTCTCTAAAACCTTCATTGGAGATA\n-TTTGAAGCTCCACGGAAGAAGGTTAATTTTCCAAGGTTTCAATGAAAATTTCGTGTATAG\n-GTCTTCAATAAGGTATGGTGATTTCATCCTTGATTCTTCTATCATTCAAGGATCCAATTC\n-AAAGGTTTTTCAAAAGATCTCAAAAATCCTATTTCGAATTCTAAGTATGGGTTCTTCCAT\n-TTAAAGGTTTAAATGGATGAATTATGATGTTTTCAATGTTAGTTGATGTTTTTATGATAA\n-AAAAACTCCATGAACCCATGAGCATCCTAATTCTCTAATTTTGTCTTGTAAATTGAGTTT\n-GATAATTGTGATTGGTTATGGATGGAATTGTATTTAGATTGCTCTATATTGTTGATTCTT\n-ATTGTTAACCTATCTCTATATATGTAGAATTGAGATTGTAAGGATGAGTTAGTAATCTTG\n-GCTTTATGGGCTTTCGAATCCGGGTTTACCCCCTGGATGTAACCGGCATCCTCGCCCTTT\n-TTCAAGGACTAAGACCAACCTTTTAGTCTCATGTCATTACATTCATAGGTTGACAAATGC\n-GGAAAAATTTAAAACTTTCATTATCACTACTTGGAGGTTTACATAGACCTCTACATACAC\n-ATAAGATATATTCATATAGAGTATACATAGACCCTTCGTATAGGAAGGTTACATAGCCAT\n-CTACTTTTATTACACATACATATATATAAAATATAAAAATAGTCTAACGATTGTCTCATC\n-TCATACCCTCTAAACGATTATCACAATATGGGCATAACCCTTACATCAATCAAACAAGAG\n-CACATATAGGTCATACAAAAGTATAGTACTCAATTAAAAAGGAAAGAAATGAAAGAGTCT\n-TTAAGCTCATAACAAGTCCATAAGCTAGATTATGGCATTGACCTCAAAAGTTGAGGACCT\n-TATGTGCGTACACAAGCAAAACATGCTAAAAAGGGACTTTTTAGTCAAAACATGCCCATT\n-TATCCCTTTAAGAACCTACTACAAAGCCAACAAGTCATACCAACCAACCAAACATGCTTA\n-CTATCTCAACAAGTAATACTTATCCCAACATACTTGAAACCATGATTTACTACAACCCTA\n-TCACCAAGGAAAAATATCACAAGAATGAATAAGAGTCAATCATATCATGATAGAGAGACA\n-ACTATTCATGAATCCTTATCAACTCAACAAGTGCAATAACCAAGCAAAGCCTCATAACCT\n-TACTCAATCAAGTATCCTCAAAAAGAAACCATGACCAATGTCCAACTTTACCTAACATAG\n-CATTTAGGTTTACATTTTATCATATATTAACATTATGACCCAAGGCATACTCATTAGTAA\n-ACTAATTAATATATAATATCAACAATGTGCCATAGTAATCATATATACATAATATATCAT\n-CATAACATAAACATATATAAAAACCTCCTTCTAAGACTCCCCTCAAGGCTAACTAGTGAA\n-ATGTTTAGGTAGAGCCCCATACCCCTACCTAGATTAAGCTAGACCCCTTAGGTTATCCAA\n-GTTAGAGTTCAAGTCCTTTAATTCGTTTTACCTTTTGGGAACATCTTGCCCTAACCGACA\n-TAGACCACATGAGCTAGTGTGGGATACGGTTCCAAAAAACCCTACACAGAAAGAAGGCGG\n-ACTACTTGCCAAAGTATTACCAAAACATGAAACATAGCAACTACGTTGATCCACTAGCAA\n-GTATTTCTATAGGGGCAACATAGTTCAAGAACTCTGAGATATACTTGAGACCCTCTTTAT\n-GCGCCATGCATTATAGTCTCCAACCTCAAGAGTAATGTAGTGTTCCTACCTTCCCCATGT\n-GAGAAAGGACACTCCTCAATCTAGTTCACTCGGTGCTAAGCTAGAGACCCTTTTTGAAAT\n-GTCTTTAAGCCTTTAATTATCAATCATAGCTTAGCTTAGGTCATAGGGTATATCTCTTGT\n-ATAATCATCATCATCAATAGCTCAATAATAATTGTATGAGTATAAGTCCTTTCATCACAA\n-TTCATATAAGTGAGGTTAACATGTTAGCATTTCATTGCATATCAAGAAACATTGATGATT\n-CTTACCATCCTTGTATCACATACACCTTAATCAATCTCACAACATAGTCAGGACATATCA\n-ATTCAACATCATACCACCCTATAATCCTAATATAAGGCATACTCCAATATAACTTCACGT\n-CTTAACAAAAATTTATCACAATTGGAATTAAAGATAGAGATTCTAAGACTTAACAAGTCT\n-TCCTTGTAGTTCATCATCAAGGTCTTACCATCAACCCATAACTCAACCAAGTTTGGGGAG\n-TAACATCATCACACAATGATAATCAATAGGATAACAAGGCTAATTTCATCTCTATAACAC\n-AATTCAACACTAGATCATAACTTAAGACAAGATACATAGGCTAATTTCACACTATAATTC\n-ATAACCTAAATCACATCTCAAGAAATAGCATTATAGTCCTATAATTCATATTAATTTGTT\n-CATAATAACACAATAGGATAGTAATTTAATCAATAACCAAGTCAATTGAATGATCACAAT\n-ACAATATACATCAATATCACAAGCTAGGGTTAGGGATGAAGGATCATATTCTTCAATTTA\n-GACCAAACCACTAACAATTACCATAATAAAGTTTAAATTCATGTAAATGTATTCAATATA\n-ACCTAAATAAATCATTAACAACTCAATCCATAACTTCAATTTCGTAATTGAATGAAACCC\n-ATAAGAAAATTCACCTTTTGAAATCCATTTTAAAGAAACCCTTTGAGGAAAGAGCCTCAA\n-AGGTGAATTAGATCCCATATATTAATGTTTGATGATGAATTCGCCCCTTTCCATCCCCCA\n-AACCCTTATCCTTGCTAGTTTTTAATGGTGAGTTCAAGTAGAGAGAGAAATAAGAGAGAA\n-GGAAGAGAGTTTTTGTCTTAGAGTTCTAATTAATTTAATTGGGGTTGGGGATTTTATATG\n-CGTTTTAAGTTAGTTAATTAGTCACCCCTCAATACCTAACTAACCCCTGAACCACCTAAT\n-TAATTAAATGAATCAATATAAAAACATACAGGAAATTTGACCTTCACAGACGAGACCCCG\n-AACGACGGGCCATCTGTGAGTCAACGGTCCCTCACCCCTCCGTCCTGCACTCTATCGATC\n-AGTTCATAGACTGTGCAGGCAGATCAATTCTTCAACTTGTCTAAGTATGGGATGACGGTG\n-GTATCGACTCCCCGTCAGTCCACACACGGACCGTAGGTGGTCCCATCGATGCGCATTGTC\n-TAGTCCTTGTTTGTTCAAACACAAGGGCCTCAAGGGCCCTTGGTTGGTGCTTGGGGAGTC\n-GTACCCATACGTTTCAATCATGAAACAACTCAAAAACCTATAATCTATCCTTCCACCAAT\n-TTTTGTACCTTTCCGACTCTTAAAAGGTAGTCAAATAGGCTAAGGCACGCTAACACCCCT\n-TTGAACCAACTTCCTGGACGTTCTTATACATTTTGGTTCTTAAACTTCCT'..b'CAATATTCCATATTGATCGCCAGCTTCCATTGCTACAAAAGA\n-TGCATATAGTATCAGCTTCTTTAGACAAAGCTATAAGAAGTATCAGTTGACCCAATTGAC\n-AAGTCACACGTGCTATAACAAGATATCACTTGACGCAAGTGACAAGTGTGAAGCTGACAG\n-GAAAATAGGCAATAGAATCCCTCATTCTTTGTTTATATATAGCAACCTATTACTTCAGTA\n-TCTGTTTACAAGTTCTGCACCACGATAAGTATAACTATTTAGAAATTATGAAGGGAGTGT\n-TGCACAAATTAGTCAGGGTTAGAATTTTAATAATCCAACACACCAGAAATTCTGAGGACT\n-ATGCCTAGCAGCTGAAATCACCACAACAAGTTCAAAGTCAAATCCTGGCTCTTCCACATC\n-CTTTCCCTTCGTACAGTAAACTGAACAGATGCCTTTTGGATATGTTTCACTCACATACTT\n-CATAATTTCAGCATCCATGGCAGACCTACACAAGAATTACCATGAATAGGAGAATTTGGC\n-AAGCAACGGTAATCACAAAGTCAGTACTCATAAACCAAGCCTCTGATGAGAAGAACTAAC\n-CATGTTAAGAAGGGAACTACTCACATAAAACAAAAACATGATTTTTATAATCTGCATGCC\n-AAGCATTAGGAACTATTGAAGAATTCACTGCGATCTCAGAATATAATCCTTTCATACTCT\n-TAATGCCTCCCAATTATACCAAATTTAGGTGTCTTACTACCATTTTAGTATCTGACATTG\n-ATGTCATTACCCATTAATTTGGAGACACTGGTTCCTCACCGATAAAGATGAACAAAAAGG\n-TAAACCAGTGTATTATGAATCAGATCTCTTTATGGAATAAAAACACACATGAATCATGAT\n-GTAGGATCTTATTAACACTTGAGGACTGAGGCGAAGATTACTAAGAATATCCCGAAAGAT\n-AAGACATAGAATGTAATATAGAAGGACAGAGAACAAATGATACTGATTAAGAAGAAGAGC\n-ATCTATATAGGAGAGATTAGCCTAAACTATTTATTTTCAGACTGATTTCGGTGAGCGCAG\n-ACCAAAACATGCAGCTTTTTTTTCAATTAAGCCGGAAAGACAATTTCCACAAGAAATGCA\n-ACTGTTCTAGCATATCTTAAACTATAATCTGAGTGCTGTCTGAGAACTGAGGTTGAATTT\n-GCAAGTCTTGTTGAGCATGGTTAAAAAATAAGTCCAATTAGGCAAAATAATTGTGAATGT\n-CATAATATAGAAGAACTTCAACATCTCAATGGGAAAAACAGAAAGTGAGTAGCTAAAAAG\n-GGGAGCAATACCAAAGATATTAAACTGAGAAATATATCTCATACCCTACAGATGCATACC\n-TATACTCTTCCACGAAAGCAGATGGAAGTTCTTCATCTCTTGCTGGCCTAACGTCTTTAC\n-AAACCTAGAATGCAGACATACCATGAGCTTACAAGAAGGGAGCATAAATTATTACACGGC\n-AACAATAACTAGGAAAAAAAGAGAGAGAGGAAATACATTTAATTTCACAGCCCTTGAATT\n-ATGAATAAAACCATGCACTTGTTATATTAAGCAGAGACTACTTCCACTTTTCCAATCTAT\n-TTACATCTACTTCATCACATTAACAAGAATAGAAAAGAAATGCATGTTATGTGACATTCA\n-GCACTGTAGACTAATGAGGATTAATTTACAACCATGGACCAGCTATAGCAGAAGAGACCT\n-TATTTTACTTTTCCAAAATTGGTATTTACTCCATCACCATTAACAAGAATAAAAAATGCA\n-TCTTATGTGAAGTTCATAACTATTGAATAATGAGGTTAATCAAGAGCTTGCTATATCAAA\n-GCACATTTTTCAAGTTTACAATTTCTTCATTCTTGGTATCCACTTCTATCATGAAAACCA\n-ACCTAACAGTTAACACAATTCACTCTGGTGTTTTGTTCTTTATCCCTAATGCCTGAGTGT\n-GTTTAACTAATCAAGTTCCAATCAGCCAGAAGATGTCTAAACATACTAAACTATAGATAA\n-ACAACATGAGCATAAAACCAGTTTATGTAGAGATTTTTAATTGCACAGCATAAAAAGGAG\n-TACCCCCGGCACTATATGTGCTTCTCTTTCTTTTTTTCTTACTTCGTCCTGATCTTTTTA\n-CTTTTCTTTTCCTTTCTTTCAGAATAAGCACATTTTTGGATATAGTCCCACCATCTCTAC\n-CAGGTTTATGTCTGTCCATATTATTGCTTTTTTGAATTACCTTCTAAACAAAATACTCAT\n-CCTTACAAACCTGCTTCCTCTTCTATGGATCCCACCACTCCATGCCCCGAAAAAAAAGAA\n-AGAAAAAGATAAGTTTTTCATAGTCACTTTAGATTATCATTTATTAAATTCTGTCGGCTC\n-AACTGATAGGAACAGTGAAATGGACTTTTCAATCATAAAAAGATAATAGAAGTTATTAAA\n-TGATTCCCTTCAAGATTATGAGCTTTTAAACTTACATATCATGCAACTATTGGGGAATTA\n-AGGGACTGGGGATTTGATGATAAATTCCAGCACCATTTTTGGTGCTTTTGTGTTTTTGCA\n-AGGTAGTTTGTTAGTGGCACATGGAAGGAGGTGCCTCATCCAATGAAATTATTAATCTTA\n-TCAACAAGAAGAAAGTCAAAACACCAAACTGTAAAAAATCCAAAAATAGCATTTTGCATT\n-GTGTACTAACTGAACAACGTACTTGCTTGACATGGTCAACTCTGGCAACCTGCGCAGTCC\n-GGGGATCAAGATACTCATCCTTATGAACCTCACTAAATGATGTAATCAGTACCTACAAAT\n-TAGTGAACAGCAACTTTACACAGCTAGATCATGAAAAATAGCTTCCAAGTGTCCATTACT\n-ACATAAATGAAAAGCATTATACTTTCTTTTTAGAAGAGGGGAACAAAATCTTAGACTTCA\n-TAAGGAACAGTTCTCCGGAAAGTTTTCTTTTCTATATTGAAGAAGTAATCATTTATTGAA\n-GTGGTGGAAATTCCCTAAGCTTTAAACAGATGTAGAGAACTTGTCCATAAACATGGTGCT\n-CAACAAACAACACCCAACTCATTCATATATAGTAGCGCCAGCATCCAAGTACCATGAAGT\n-ATATCTCTATCCTAAAAGCTTTACTTGAAGACTTATTAGATTAATCTTACTTATTTCTCA\n-TGCAACTTTTTTTTTGAAAAGTTTCTCAAGCATAACTTTTATCCTTTGTTTTCATTCAGT\n-CTTTGAGCTCTAAAGGATGCCTAGAGAGGTCATGAACCAAGTAAGGAAATTGCAACATCA\n-TATTGCTTCCAATTTACCTCATTTTTTATCTTCAACTACCGAAAACTACCAAATCTGAAA\n-TTATCAACTAAGGAAAATTACAACATCAGTACAATAAGTATTGCTTACAGTTCACTTAAT\n-ATTTCAATCTTCGACTACGGAAAACTGTCAAATCTGAAATTATCAATTTGAATGACACGA\n-TTAGTCTAACTTACATTTTCAAAGATCTAACTTACCCAATGAAAAAAGAAAACAAGAGAG\n-AGACTTACATCGCCACTTCTGTTTGGGAATTCGAGACAAATCAAGTGAGATTTGTTGTAC\n-GAAGGAAATGACTCCTCGGCCGCTTTCTTATATATATTTTCGTCCTTTAAAATAGCTCTA\n-ACATCTGCAAATCCAACCAAATGACGCTCAAAACCAAAAATGTAAAAAATAAACTGCCGA\n-TCGCAAATGAACACCAATGCGGTCACATTTTCAAGCACGAAAAAAGCTTCAAAATACAAA\n-AAACTTTAGCGCAGAAAATAAACGAAAGAGAAGAAGAAGAAGACCTTTGGCGACGTACTG\n-AATTTCGCCGGCTGGGGCATTAAGAAGGAACCATTTGGCAATCTCAAT\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/inputCR.gff3
--- a/SMART/Java/Python/TestFiles/inputCR.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-chr1 test match 6155418 6155441 24 + . Name=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100
-chr2 test match 26303950 26303981 32 + . Name=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93
-chr3 test match 28320540 28320574 35 + . Name=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94
-chr4 test match 28565007 28565041 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88
-chr1 test match 6155418 6155441 24 + . Name=test3/1;occurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50
-chr1 test match 6155418 6155441 24 - . Name=test3/1;occurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/inputFileTest1.bed
--- a/SMART/Java/Python/TestFiles/inputFileTest1.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-track name=reads description="Reads" useScore=0 visibility=full offset=0
-arm_X 1000 2000 test1.1 1000 + 1000 2000 0 1 1000, 0,
-arm_X 1000 2000 test1.2 1000 - 1000 2000 0 1 1000, 0,
-arm_X 100 200 test1.3 1000 + 100 200 0 1 100, 0,
-arm_X 100 3200 test1.4 1000 + 100 3200 0 1 3100, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/inputFileTest2.bed
--- a/SMART/Java/Python/TestFiles/inputFileTest2.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-track name=reads454Relaxed description="reads454Relaxed" useScore=0 visibility=full offset=0
-arm_X 1000 2000 test2.1 1000 + 1000 2000 0 1 1000, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/inputMSWC1.gff3
--- a/SMART/Java/Python/TestFiles/inputMSWC1.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-chr1 test match 6155418 6155441 24 + . Name=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100
-chr2 test match 26303950 26303981 32 + . Name=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93
-chr3 test match 28320540 28320574 35 + . Name=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94
-chr4 test match 28565007 28565041 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88
-chr6 test match 48565007 48565041 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/inputMSWC2.gff3
--- a/SMART/Java/Python/TestFiles/inputMSWC2.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-chr1 test match 6155418 6155441 24 + . Name=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100
-chr2 test match 26303990 26304021 32 + . Name=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93
-chr3 test match 28320540 28320574 35 + . Name=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94
-chr4 test match 28565017 28565051 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88
-chr5 test match 30000000 30000050 50 + . Name=test3/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=test3/1-4;identity=50
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/inputMTC.sam
--- a/SMART/Java/Python/TestFiles/inputMTC.sam Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2698 +0,0 @@\n-@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n-@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n-@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n-@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n-@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n-@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n-@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n-@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n-@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n-@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n-@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n-@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n-@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n-@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n-@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n-@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n-@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n-@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n-@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n-@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n-@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n-@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n-@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n-@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n-@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n-@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n-@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n-@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n-@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n-@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n-@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n-@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n-@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n-@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n-@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n-@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n-@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n-@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n-@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n-@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n-@SQ\tSN:BAC19_LR16\tLN:9760\n-@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n-@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n-@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n-@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n-@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n-@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n-@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n-@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n-@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n-@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n-@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n-@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n-@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n-@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n-@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n-@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n-@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n-@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n-@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n-@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n-@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n-@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n-@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n-@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n-@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n-@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n-@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n-@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n-@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n-@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n-@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n-@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n-@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n-@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n-@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n-@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n-@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n-@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n-@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n-@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n-@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n-@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n-@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n-@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n-@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n-@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n-@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n-@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n-@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n-@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n-@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n-@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n-@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n-@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n-@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n-@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n-@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n-@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n-@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n-@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n-@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n-@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n-@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n-@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n-@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n-@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n-@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n-@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n-@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n-@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n-@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n-@SQ\tSN:C05HBa0006N20_LR128\tLN:101'..b'018B07_LR335,+8208,36M,0;\n-HWI-EAS337_3:7:1:425:1939\t83\tC09SLm0143I09_LR365\t1546\t0\t36M\t=\t1490\t-92\tAAGTTTAGCCACATAGACCCAGACACCACAATTAGC\tUUUUUUWVVVWVVWWWVVVVWWWVWWWWVWWVWWWW\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,-1546,36M,0;\n-HWI-EAS337_3:7:1:425:1939\t163\tC09SLm0143I09_LR365\t1490\t0\t36M\t=\t1546\t92\tTAACTTTTCTATCTGGTTTCTATGTTTTCCAGCTCT\tWVWWWWWWWWWWWWVVVWWWWWWUVWVWVVTQTTTS\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,+1490,36M,0;\n-HWI-EAS337_3:7:1:187:1132\t83\tC02SLe0018B07_LR335\t9178\t0\t36M\t=\t9065\t-149\tGAAGAGGATATGAGCCAAGCCCCTTGCCTCTCCCAC\tUUUUUUVVWWWVWVTWWWWWWWWWWWWWWWWWWWVW\tXT:A:R\tNM:i:1\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:14A21\tXA:Z:C02SLe0018B07_LR335,-9178,36M,1;\n-HWI-EAS337_3:7:1:187:1132\t163\tC02SLe0018B07_LR335\t9065\t0\t36M\t=\t9178\t149\tGAATAAAAAAAGACAACAACATATCAAGATACAAAG\tWWWVWVWWWWWVWWVWWWVWWVWWWVWWVWTTTTTR\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02SLe0018B07_LR335,+9065,36M,0;\n-HWI-EAS337_3:7:1:1739:1840\t99\tC02HBa0072A04_LR26\t2868\t60\t36M\t=\t3189\t357\tGGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\tWWVWWWWWWWWWWWWWWVTWWWVVVWWWWWUUUUUU\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1739:1840\t147\tC02HBa0072A04_LR26\t3189\t60\t36M\t=\t2868\t-357\tCTTTTGACCCAAAAGTTTGACGGGAAGGACAGTTTT\tRTTTTTVVVVWWCVWVVWWWVWWWWWWWWWWWWVWW\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:12T23\n-HWI-EAS337_3:7:1:1505:1876\t99\tC07SLe0111B06_LR194\t8673\t60\t36M\t=\t8677\t40\tGAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\tWWWWWWWWWWWWWWWWWWWWVWWVWWWVVVUUUSUR\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1505:1876\t147\tC07SLe0111B06_LR194\t8677\t60\t36M\t=\t8673\t-40\tGATCAAGTGTTGTCAAGTTCACTAGTTTAGAGAATG\tSTTTTTVVVWVVWVWWWWWVWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:447:192\t99\tC09SLm0143I09_LR365\t6957\t0\t36M\t=\t7039\t118\tGACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\tWWWWWWWWWWWWWWWTWWWVVWWVWWWWWVUUUUUU\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,+6957,36M,0;\n-HWI-EAS337_3:7:1:447:192\t147\tC09SLm0143I09_LR365\t7039\t0\t36M\t=\t6957\t-118\tAACTGAACAGATGCCTTTTGGATATGTTTCACTCAC\tSTTTTTWVVWVWWVVVVVWWWVVWVWWWWWVWWWVW\tXT:A:R\tNM:i:0\tSM:i:0\tAM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,-7039,36M,0;\n-HWI-EAS337_3:7:1:21:2019\t83\tC09SLm0037I08_LR367\t1298\t60\t36M\t=\t955\t-379\tGGGCTGGAAGACAGGTTATCATCTTTTACCTCATAC\tUUURUUWWWVVQWWWWWWWWWWWWWWWVVWWVVWWV\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:21:2019\t163\tC09SLm0037I08_LR367\t955\t60\t36M\t=\t1298\t379\tATTATGTTTACGGGACAATTGTATGTTCCATTATCT\tVWVWWWWWWWWWWWWWWWWWVWVWUWVVWWTTTTTR\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1593:652\t99\tC04HBa8K13_LR338\t2175\t60\t36M\t=\t2226\t87\tGTGATGAGTAAAACATCATCATATGAACTTGAAGAG\tWWWVWVWWVWVWWVWWWWWWVVWWVWWVWWUUUSUU\tXT:A:U\tNM:i:1\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:28A7\n-HWI-EAS337_3:7:1:1593:652\t147\tC04HBa8K13_LR338\t2226\t60\t36M\t=\t2175\t-87\tTATGCTTAAAACAAGAGGAATTATACAGCTAAATAA\tSTTTKTWWWWWVWWWWVWVVVWVWWWWWWWVWVVWW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1254:1660\t99\tC12HBa326K10_LR306\t8100\t60\t36M\t=\t8243\t179\tGAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\tWWWVVWWWWWWWWWWWVWVWVUWWWTWVQWUUUUMU\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1254:1660\t147\tC12HBa326K10_LR306\t8243\t60\t36M\t=\t8100\t-179\tTGTACATTTTTCCTACCCATATGTGATGCCATTACT\tSTTTTTWVVVVWWVVWWVWWWVWVVWVWVWVWWWVW\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:291:629\t77\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\tWWWWWWWWWWVVVWVWVVWTWWKOVVTRVSUSSMFR\n-HWI-EAS337_3:7:1:291:629\t141\t*\t0\t0\t*\t*\t0\t0\tATGAAGGGTTTTTTTGTTCTCTAATGTCATCTTATT\tWWWWVVWWWWWWWWWVWWVWVWVVVQWVVWTTTTTS\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/inputMapping.map
--- a/SMART/Java/Python/TestFiles/inputMapping.map Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-BlastclustCluster1Mb1 dmel 44957 60589
-BlastclustCluster2Mb1 dmel 441296 453986
-BlastclustCluster3Mb1 dmel 1263264 1272001
-BlastclustCluster4Mb1 dmel 691910 700435
-BlastclustCluster5Mb1 dmel 4887 13246
-BlastclustCluster6Mb1 dmel 340294 348412
-BlastclustCluster7Mb1 dmel 802363 809343
-BlastclustCluster8Mb1 dmel 303029 309770
-BlastclustCluster9Mb1 dmel 34275 40713
-BlastclustCluster10Mb1 dmel 976199 981423
-BlastclustCluster11Mb1 dmel 231806 236301
-BlastclustCluster12Mb1 dmel 323712 327988
-BlastclustCluster13Mb1 dmel 1011279 1014955
-BlastclustCluster14Mb1 dmel 474293 477597
-BlastclustCluster15Mb1 dmel 930649 933730
-BlastclustCluster16Mb1 dmel 1241523 1244351
-BlastclustCluster17Mb1 dmel 532049 534729
-BlastclustCluster18Mb1 dmel 335473 337381
-BlastclustCluster19Mb1 dmel 686181 687792
-BlastclustCluster20Mb1 dmel 1239136 1240579
-BlastclustCluster21Mb1 dmel 1261233 1262370
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3
--- a/SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-chr1 test match 6155418 6155441 24 + . Name=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100
-chr2 test match 26303950 26303981 32 + . Name=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93
-chr3 test match 28320540 28320574 35 + . Name=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94
-chr4 test match 28565007 28565041 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/mapperAnalyzerMappings.axt
--- a/SMART/Java/Python/TestFiles/mapperAnalyzerMappings.axt Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,12 +0,0 @@
-1 chr1 6155418 6155441 test1/1 1 24 + 66
-GTAACAGATTCAGAACATTAGCAG
-GTAACAGATTCAGAACATTAGCAG
-2 chr2 26303950 26303981 test2/1 3 36 + 0
-AT-ATT-AAAAAAAAAAAAAAAAAAAAAAAAAAA
-ATGATTGAAAAAAAAAAAAAAAAAAAAAAAAAAA
-3 chr3 28320540 28320574 test2/1 3 36 + 0
-ATGTTTGACAAAAAAAAAAAAAAAAAAAAAAAAAA
-ATGATTGA-AAAAAAAAAAAAAAAAAAAAAAAAAA
-4 chr4 28565007 28565041 test2/1 1 36 + 0
-ATAAGATT-AAAAAAAAAAAAAAAAAAAAGGAAAAA
-ATATGATTGAAAAAAAAAAAAAAAAAAAAAAAAAAA
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/mapperAnalyzerOutput.gff3
--- a/SMART/Java/Python/TestFiles/mapperAnalyzerOutput.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-chr1 test match 6155418 6155441 24 + . Name=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100
-chr2 test match 26303950 26303981 32 + . Name=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93
-chr3 test match 28320540 28320574 35 + . Name=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94
-chr4 test match 28565007 28565041 35 + . Name=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/mapperAnalyzerSequences.mfq
--- a/SMART/Java/Python/TestFiles/mapperAnalyzerSequences.mfq Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,12 +0,0 @@
-@test1/1
-GTAACAGATTCAGAACATTAGCAG
-+test1/1
-bb`b_bbbbb_bbbbababbbbb^
-@test2/1
-ATATGATTGAAAAAAAAAAAAAAAAAAAAAAAAAAA
-+test2/1
-BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
-@test3/1
-TTTGAATAAAACGGGAGGATATA
-+test3/1
-X^_Y`_____\R^BBBBBBBBBB
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/sorted_file_oneline.gff3
--- a/SMART/Java/Python/TestFiles/sorted_file_oneline.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-chr1 test test2.1 0 1000 1001 + . ID=test2.1;Name=test2.1
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/sorted_query.gff3
--- a/SMART/Java/Python/TestFiles/sorted_query.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-chr1 query test1.1 25 150 126 + . ID=query_1;Name=test1.1
-chr1 query test1.2 70 850 781 + . ID=query_2;Name=test1.2
-chr1 query test1.3 550 850 201 + . ID=query_3;Name=test1.3
-chr1 query test1.4 925 1025 101 + . ID=query_4;Name=test1.4
-chr1 query test1.5 1201 1210 10 + . ID=query_5;Name=test1.5
-chr1 query test1.6 1500 1600 101 + . ID=query_6;Name=test1.6
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/sorted_query_wig.wig
--- a/SMART/Java/Python/TestFiles/sorted_query_wig.wig Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1040 +0,0 @@
-track type=wiggle_0 name="SMART"
-variableStep chrom=chr1
-25 1
-26 1
-27 1
-28 1
-29 1
-30 1
-31 1
-32 1
-33 1
-34 1
-35 1
-36 1
-37 1
-38 1
-39 1
-40 1
-41 1
-42 1
-43 1
-44 1
-45 1
-46 1
-47 1
-48 1
-49 1
-50 1
-51 1
-52 1
-53 1
-54 1
-55 1
-56 1
-57 1
-58 1
-59 1
-60 1
-61 1
-62 1
-63 1
-64 1
-65 1
-66 1
-67 1
-68 1
-69 1
-70 2
-71 2
-72 2
-73 2
-74 2
-75 2
-76 2
-77 2
-78 2
-79 2
-80 2
-81 2
-82 2
-83 2
-84 2
-85 2
-86 2
-87 2
-88 2
-89 2
-90 2
-91 2
-92 2
-93 2
-94 2
-95 2
-96 2
-97 2
-98 2
-99 2
-100 2
-101 2
-102 2
-103 2
-104 2
-105 2
-106 2
-107 2
-108 2
-109 2
-110 2
-111 2
-112 2
-113 2
-114 2
-115 2
-116 2
-117 2
-118 2
-119 2
-120 2
-121 2
-122 2
-123 2
-124 2
-125 2
-126 2
-127 2
-128 2
-129 2
-130 2
-131 2
-132 2
-133 2
-134 2
-135 2
-136 2
-137 2
-138 2
-139 2
-140 2
-141 2
-142 2
-143 2
-144 2
-145 2
-146 2
-147 2
-148 2
-149 2
-150 2
-151 1
-152 1
-153 1
-154 1
-155 1
-156 1
-157 1
-158 1
-159 1
-160 1
-161 1
-162 1
-163 1
-164 1
-165 1
-166 1
-167 1
-168 1
-169 1
-170 1
-171 1
-172 1
-173 1
-174 1
-175 1
-176 1
-177 1
-178 1
-179 1
-180 1
-181 1
-182 1
-183 1
-184 1
-185 1
-186 1
-187 1
-188 1
-189 1
-190 1
-191 1
-192 1
-193 1
-194 1
-195 1
-196 1
-197 1
-198 1
-199 1
-200 1
-201 1
-202 1
-203 1
-204 1
-205 1
-206 1
-207 1
-208 1
-209 1
-210 1
-211 1
-212 1
-213 1
-214 1
-215 1
-216 1
-217 1
-218 1
-219 1
-220 1
-221 1
-222 1
-223 1
-224 1
-225 1
-226 1
-227 1
-228 1
-229 1
-230 1
-231 1
-232 1
-233 1
-234 1
-235 1
-236 1
-237 1
-238 1
-239 1
-240 1
-241 1
-242 1
-243 1
-244 1
-245 1
-246 1
-247 1
-248 1
-249 1
-250 1
-251 1
-252 1
-253 1
-254 1
-255 1
-256 1
-257 1
-258 1
-259 1
-260 1
-261 1
-262 1
-263 1
-264 1
-265 1
-266 1
-267 1
-268 1
-269 1
-270 1
-271 1
-272 1
-273 1
-274 1
-275 1
-276 1
-277 1
-278 1
-279 1
-280 1
-281 1
-282 1
-283 1
-284 1
-285 1
-286 1
-287 1
-288 1
-289 1
-290 1
-291 1
-292 1
-293 1
-294 1
-295 1
-296 1
-297 1
-298 1
-299 1
-300 1
-301 1
-302 1
-303 1
-304 1
-305 1
-306 1
-307 1
-308 1
-309 1
-310 1
-311 1
-312 1
-313 1
-314 1
-315 1
-316 1
-317 1
-318 1
-319 1
-320 1
-321 1
-322 1
-323 1
-324 1
-325 1
-326 1
-327 1
-328 1
-329 1
-330 1
-331 1
-332 1
-333 1
-334 1
-335 1
-336 1
-337 1
-338 1
-339 1
-340 1
-341 1
-342 1
-343 1
-344 1
-345 1
-346 1
-347 1
-348 1
-349 1
-350 1
-351 1
-352 1
-353 1
-354 1
-355 1
-356 1
-357 1
-358 1
-359 1
-360 1
-361 1
-362 1
-363 1
-364 1
-365 1
-366 1
-367 1
-368 1
-369 1
-370 1
-371 1
-372 1
-373 1
-374 1
-375 1
-376 1
-377 1
-378 1
-379 1
-380 1
-381 1
-382 1
-383 1
-384 1
-385 1
-386 1
-387 1
-388 1
-389 1
-390 1
-391 1
-392 1
-393 1
-394 1
-395 1
-396 1
-397 1
-398 1
-399 1
-400 1
-401 1
-402 1
-403 1
-404 1
-405 1
-406 1
-407 1
-408 1
-409 1
-410 1
-411 1
-412 1
-413 1
-414 1
-415 1
-416 1
-417 1
-418 1
-419 1
-420 1
-421 1
-422 1
-423 1
-424 1
-425 1
-426 1
-427 1
-428 1
-429 1
-430 1
-431 1
-432 1
-433 1
-434 1
-435 1
-436 1
-437 1
-438 1
-439 1
-440 1
-441 1
-442 1
-443 1
-444 1
-445 1
-446 1
-447 1
-448 1
-449 1
-450 1
-451 1
-452 1
-453 1
-454 1
-455 1
-456 1
-457 1
-458 1
-459 1
-460 1
-461 1
-462 1
-463 1
-464 1
-465 1
-466 1
-467 1
-468 1
-469 1
-470 1
-471 1
-472 1
-473 1
-474 1
-475 1
-476 1
-477 1
-478 1
-479 1
-480 1
-481 1
-482 1
-483 1
-484 1
-485 1
-486 1
-487 1
-488 1
-489 1
-490 1
-491 1
-492 1
-493 1
-494 1
-495 1
-496 1
-497 1
-498 1
-499 1
-500 1
-501 1
-502 1
-503 1
-504 1
-505 1
-506 1
-507 1
-508 1
-509 1
-510 1
-511 1
-512 1
-513 1
-514 1
-515 1
-516 1
-517 1
-518 1
-519 1
-520 1
-521 1
-522 1
-523 1
-524 1
-525 1
-526 1
-527 1
-528 1
-529 1
-530 1
-531 1
-532 1
-533 1
-534 1
-535 1
-536 1
-537 1
-538 1
-539 1
-540 1
-541 1
-542 1
-543 1
-544 1
-545 1
-546 1
-547 1
-548 1
-549 1
-550 2
-551 2
-552 2
-553 2
-554 2
-555 2
-556 2
-557 2
-558 2
-559 2
-560 2
-561 2
-562 2
-563 2
-564 2
-565 2
-566 2
-567 2
-568 2
-569 2
-570 2
-571 2
-572 2
-573 2
-574 2
-575 2
-576 2
-577 2
-578 2
-579 2
-580 2
-581 2
-582 2
-583 2
-584 2
-585 2
-586 2
-587 2
-588 2
-589 2
-590 2
-591 2
-592 2
-593 2
-594 2
-595 2
-596 2
-597 2
-598 2
-599 2
-600 2
-601 2
-602 2
-603 2
-604 2
-605 2
-606 2
-607 2
-608 2
-609 2
-610 2
-611 2
-612 2
-613 2
-614 2
-615 2
-616 2
-617 2
-618 2
-619 2
-620 2
-621 2
-622 2
-623 2
-624 2
-625 2
-626 2
-627 2
-628 2
-629 2
-630 2
-631 2
-632 2
-633 2
-634 2
-635 2
-636 2
-637 2
-638 2
-639 2
-640 2
-641 2
-642 2
-643 2
-644 2
-645 2
-646 2
-647 2
-648 2
-649 2
-650 2
-651 2
-652 2
-653 2
-654 2
-655 2
-656 2
-657 2
-658 2
-659 2
-660 2
-661 2
-662 2
-663 2
-664 2
-665 2
-666 2
-667 2
-668 2
-669 2
-670 2
-671 2
-672 2
-673 2
-674 2
-675 2
-676 2
-677 2
-678 2
-679 2
-680 2
-681 2
-682 2
-683 2
-684 2
-685 2
-686 2
-687 2
-688 2
-689 2
-690 2
-691 2
-692 2
-693 2
-694 2
-695 2
-696 2
-697 2
-698 2
-699 2
-700 2
-701 2
-702 2
-703 2
-704 2
-705 2
-706 2
-707 2
-708 2
-709 2
-710 2
-711 2
-712 2
-713 2
-714 2
-715 2
-716 2
-717 2
-718 2
-719 2
-720 2
-721 2
-722 2
-723 2
-724 2
-725 2
-726 2
-727 2
-728 2
-729 2
-730 2
-731 2
-732 2
-733 2
-734 2
-735 2
-736 2
-737 2
-738 2
-739 2
-740 2
-741 2
-742 2
-743 2
-744 2
-745 2
-746 2
-747 2
-748 2
-749 2
-750 2
-751 2
-752 2
-753 2
-754 2
-755 2
-756 2
-757 2
-758 2
-759 2
-760 2
-761 2
-762 2
-763 2
-764 2
-765 2
-766 2
-767 2
-768 2
-769 2
-770 2
-771 2
-772 2
-773 2
-774 2
-775 2
-776 2
-777 2
-778 2
-779 2
-780 2
-781 2
-782 2
-783 2
-784 2
-785 2
-786 2
-787 2
-788 2
-789 2
-790 2
-791 2
-792 2
-793 2
-794 2
-795 2
-796 2
-797 2
-798 2
-799 2
-800 2
-801 2
-802 2
-803 2
-804 2
-805 2
-806 2
-807 2
-808 2
-809 2
-810 2
-811 2
-812 2
-813 2
-814 2
-815 2
-816 2
-817 2
-818 2
-819 2
-820 2
-821 2
-822 2
-823 2
-824 2
-825 2
-826 2
-827 2
-828 2
-829 2
-830 2
-831 2
-832 2
-833 2
-834 2
-835 2
-836 2
-837 2
-838 2
-839 2
-840 2
-841 2
-842 2
-843 2
-844 2
-845 2
-846 2
-847 2
-848 2
-849 2
-850 2
-925 1
-926 1
-927 1
-928 1
-929 1
-930 1
-931 1
-932 1
-933 1
-934 1
-935 1
-936 1
-937 1
-938 1
-939 1
-940 1
-941 1
-942 1
-943 1
-944 1
-945 1
-946 1
-947 1
-948 1
-949 1
-950 1
-951 1
-952 1
-953 1
-954 1
-955 1
-956 1
-957 1
-958 1
-959 1
-960 1
-961 1
-962 1
-963 1
-964 1
-965 1
-966 1
-967 1
-968 1
-969 1
-970 1
-971 1
-972 1
-973 1
-974 1
-975 1
-976 1
-977 1
-978 1
-979 1
-980 1
-981 1
-982 1
-983 1
-984 1
-985 1
-986 1
-987 1
-988 1
-989 1
-990 1
-991 1
-992 1
-993 1
-994 1
-995 1
-996 1
-997 1
-998 1
-999 1
-1000 1
-1001 1
-1002 1
-1003 1
-1004 1
-1005 1
-1006 1
-1007 1
-1008 1
-1009 1
-1010 1
-1011 1
-1012 1
-1013 1
-1014 1
-1015 1
-1016 1
-1017 1
-1018 1
-1019 1
-1020 1
-1021 1
-1022 1
-1023 1
-1024 1
-1025 1
-1201 1
-1202 1
-1203 1
-1204 1
-1205 1
-1206 1
-1207 1
-1208 1
-1209 1
-1210 1
-1500 1
-1501 1
-1502 1
-1503 1
-1504 1
-1505 1
-1506 1
-1507 1
-1508 1
-1509 1
-1510 1
-1511 1
-1512 1
-1513 1
-1514 1
-1515 1
-1516 1
-1517 1
-1518 1
-1519 1
-1520 1
-1521 1
-1522 1
-1523 1
-1524 1
-1525 1
-1526 1
-1527 1
-1528 1
-1529 1
-1530 1
-1531 1
-1532 1
-1533 1
-1534 1
-1535 1
-1536 1
-1537 1
-1538 1
-1539 1
-1540 1
-1541 1
-1542 1
-1543 1
-1544 1
-1545 1
-1546 1
-1547 1
-1548 1
-1549 1
-1550 1
-1551 1
-1552 1
-1553 1
-1554 1
-1555 1
-1556 1
-1557 1
-1558 1
-1559 1
-1560 1
-1561 1
-1562 1
-1563 1
-1564 1
-1565 1
-1566 1
-1567 1
-1568 1
-1569 1
-1570 1
-1571 1
-1572 1
-1573 1
-1574 1
-1575 1
-1576 1
-1577 1
-1578 1
-1579 1
-1580 1
-1581 1
-1582 1
-1583 1
-1584 1
-1585 1
-1586 1
-1587 1
-1588 1
-1589 1
-1590 1
-1591 1
-1592 1
-1593 1
-1594 1
-1595 1
-1596 1
-1597 1
-1598 1
-1599 1
-1600 1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/sorted_ref.gff3
--- a/SMART/Java/Python/TestFiles/sorted_ref.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,7 +0,0 @@
-chr1 test test2.1 9 1000 1001 + . ID=test2.1;Name=test2.1
-chr1 test test2.2 50 350 301 + . ID=test2.2;Name=test2.2
-chr1 test test2.3 100 600 501 + . ID=test2.3;Name=test2.3
-chr1 test test2.4 200 450 251 + . ID=test2.4;Name=test2.4
-chr1 test test2.5 700 950 251 + . ID=test2.5;Name=test2.5
-chr1 test test2.6 800 900 101 + . ID=test2.6;Name=test2.6
-chr1 test test2.7 1200 1300 101 + . ID=test2.7;Name=test2.7
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testBedParser1.bed
--- a/SMART/Java/Python/TestFiles/testBedParser1.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-track name=reads description="Reads" useScore=0 visibility=full offset=0
-arm_X 1000 3000 test1.1 1000 + 1000 3000 0 2 100,1000, 0,1000,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testC2S.fa
--- a/SMART/Java/Python/TestFiles/testC2S.fa Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,100 +0,0 @@
->chr1
-CAACATTAGCGCCATGCCCACTGTGGGGAATTTACCAGCAGCCCGCACAC
-TTAGCCGGCCTGCTGCAAAGCGGGATTTATTTAATTCATCCTCCAAGAGC
-CCAAACGAGCATCCTATGAGTTTCTCGGAAGTGGTAGCTGGAGCAGGTCC
-AGTTTCTATGGCACCCCCTAATCCGGCACCACTGACGAAAACCCCAGGAA
-AGCGGACAAACGACGATCTGGATTGCTCAAATTTTAAGACGCCCAATAAA
-AAATTATGCGCGACATCCAATTTTGTAACTCCCTGCATTTTTCCCCCGCT
-CATCACACCAGTTTTCAAAAGCAAGGCAGCTCAATCTGTTTATGAGGAAT
-CAAAAGCCAGAAACGGACCCACCCGCCAGCCGTTACCCTGTAGCATCAAT
-GTCTCTGCTTGCAGCGCAGCGGCGCCACCCGTTATCGCCCCCCTACCCCC
-TCAAAATACAGATGCACAGCTGCCTCCATGGAAAATCGTGCCCCAGAGCC
-GTAGAGCACCCCCTATACTCGTCAATGATGTGAAGGAAATTGTCCCTCTC
-CTGGAAAAGCTAAATTATACAGCAGGAGTATCCAGCTACACCACCAGAGC
-AATAGAAGGAAACGGGGTCAGGATCCAGGCCAAGGATATGACCGCCTACA
-ACAAAATTAAAGAAGTCCTGGTGGCCAACGGATTTCCTTTATTCACCAAC
-CAGCCCAAGTCTGAGAGAGGCTTCCGAGTCATCATCAGACACCTCCATCA
-TTCCACACCATGCTCGTGGATAGTCGAGGAGCTGCTGAAGCTCGGTCTCC
-AAGCGCGCTTCGTCAGAAACATGACGAATCCAGCTACAGGTGGCCCCATG
-CGAATGTTTGAAGTGGAGATCGTCATGGCCAAAGATGGCAGCCACGATAA
-AATTCTCTCACTCAAACAAATCGGTGGGCAAAAGGTGGATATTGAAAGGA
-AAAATAGGACACAGGAGCCGGTTCAGTGCTACAGATGCCAGGGCTTCAGA
-CATGCCAAAAATTCATGCATGAGGACGCCTAAATGCATGAAATGCGCTGG
-CGATCACCTGTCATCCTGCTGCACCAAGCCAAAATCCACCCCCGCCACCT
-GCATCAACTGCTCTGGGGAACATATCAGCGCTTATAAAGGATGCCCCGCC
-TACAAGGCCGAAAAACGAAAGCTGGCGGCTAACATTATTGACACAAACAA
-AATAAGGACAATCAAAGACGCAACTAATCACTTTTATAAACGACAAGGCC
-CCCCTCCACGTAATAATACCCCTCGACTACCGCACAGCTCAGCAATCCTG
-ACCAAGTCAATCGCTGAAGCTCGCCAGGAAGCCGCCAAAAAGTCGATGCT
-AAATCCTTATTGGCAAAGCTCGAACGACAGAAGGCCACGTTTCTCCTCCC
-ATGACACTGCCATTCAAAAACGGCTAAACAAATGGCGCCGAAACTCTAAT
-AAGATACCCAAAAAGGGTAGGATATCCTCAAAGGACAATGCAAAGCCAAG
-ACCGGCATCCAAGACAAGCAACCCAGCGCAAAGACATCTGGAAAAATACC
-AGGACATGCTCCGAAACGAAAGGAGTGAAGAAATTGACCATGAACCTGAA
-AAAGGTACTCCTAATCCCAGCCGAGTCGGCAACGACAGCCCTCCGACCAC
-CAGCAGAGCCGCCAGAGCTAACTTTAAGCCAAGAATTATCGATGAAGCTA
-CGCCATCGCCAAGAAACTCCAATCCTTACCTACAGAAGAGCTTCTCGGAC
-GACCCCACCATTAATCTAGCTAATAGAGTCGATAATTTAGAAAAGAAAAT
-TGACATTTTAATGGCTTTAATCATACAAGGAACCAATAACAATAATCTTG
-ACATTGATACATCAATCTAAATTTACATTACACTTATTTATATTTATACC
-TATTATAAATATATATATCCGACACAAAAGCGCACGTCTGCCCACCCTTA
-TAATGTTCTAATTATTATCACCTTCCTCGACGCAAAGCTTAAACCTCTGT
-TGAAAAACAAATCAATTAGATGGATGACATAAAAACGTAAATAAATAATC
-TTCTCACCTCAAGCATCCGGATAAAAAAGGCAATACGCACTCCAACTCCT
-GATGAAGCTATGTGAAGAAAACTACACCAGGATTCAAAAGTCGAATCGGA
-GGATGGACATGAGAAGAATCTGTGCGGCAGAAGCATGATGAATAGAGGCG
-ACTCGCTGCAGCAAAATATGCACTACGCCACTTACCTGAATCTTCTGCGG
-CGCAGTCTTTTTATGTACCATCATCTCCGCCGCAACCGCTTCACACAGCT
-CCACAACATAAGATGCGCCACCAAAGCTGCCTCCGTACTGAACTGGACAT
-CATGCGTTGCGCTGCAAATCCTATCCTATTGACGAGCGCCAACAGCGGGT
-CTGCGCTAAAAACCTAAAAACAAAACAAAACAAAACAATTAATCAACAAC
-AAATTGAACATAACAATCAAACAATAACAATCACTTACCTCCTTGACTGC
-ATCCAATCGCTGACCCAAATCCAACACAACCGACAACAGGAGACGGGCTT
-CGCAAATGCAAAACAAAATCGCCAACTTTTGCGATTATAAATACAAAAAA
-TTGACAATTTTCTGATGCCATCTCCATCCTTTGATCCCACTGCCCAAATA
-AGGATCATTAGCGCGGAGCTGAAGCCACATTAATAAGCTGTAAAATTGAT
-CCCCAAAATGTATATTTCTCCTCAATACCGTATCTTCAACGAACTTTCCG
-CCAACCTGCAATGAAAGGGAAATTAATAATAATGCTATACAGAATTAATC
-AGCGACACATAGAAAATAGCAAACCAGACAGGCAAAGTAGTAGATGCAAA
-CAGGCGACTCCATCCCGCCGACGACAAGCATTCAAATCCTTCATACTGAA
-ACAAGGAAGCACAAGCCAATACTGGGAATTATTTACTCAAACAAAATACT
-TATCTAATTACCAACTCGACGACTCCAAATACGCGGCACACCGGCTGCGA
-TAGCTCTTAAATAAAGGGCCTCCTAATTAACTACAAAACGTACCTGAAAA
-ATAAAACAATTAACGCAATCGTAAATAATTACAATTATAATACTCACCTC
-CAGATTAGCCTAATGTACCTGAAAAACAAAAACAAAAATTAATGCAATAA
-TTATAAAAACAAATAAATACAAACACAATACTTACCTCCAAATTACCTCC
-CAGCCAAAGCACCTGAAATACAAAAACAAAGAATTAATGCAATAAATAAA
-TCAAATAAATACAAATACAATACTCACCCCAAATAACCTCCCAGCTAATT
-TACCTGAAAAAACAAAAATTAATACAATATTAAAAACGAATAACAAATGT
-AATACTTACCAAATTTTAACTTTGTATTCATTTCCATGGCCCAAATCGTT
-GCGACGGTCCTCGGCAACAAATCATGTTCCGGCGGCTCCTAGCTGCCAAT
-CCCGACGCATTGGCCACAAGACGCGGCGCTCCTGGCAACTCTCGATGAAT
-AACCGAGCTCCAATTTCCACGACGACTCTTCTGCCAAACGAGTCAGATTA
-CACCAACATAATGCCAGCAGCTCCCAAACAATGCAATGACGGCTGCGCGG
-GATCCATCTTCAGATTTTCTTCTTCCTGACGACCGGCTAAGCTGCCCTGC
-AATTTAAGAAATTTTATTAAACAATTGCAAATATCTACCACTGAGGGTGG
-TAGAGACAACCACCAAATGACAGCGGCGCGGGATACACCCACCACGAATA
-GGCTTTCTGCAGCGCTGGCCGGACATGCATGTTGCGACGCGCATTCAGCG
-TCCACAACAAGCCCCAGCCAGAATACAACAAACACTCACCTGCAATGTTT
-CCTGAGGCTTCCAGCGACTCGGTGCTTCCGTCCTTCTGGCGGGGGTACCT
-GAAAAGAATTAATTCAAATTATGTTAGTCTTAAATTCCAATGTTTCTTGT
-TAAATAATTCAAATTATCAAATGTAAACATAACATACAATGTGATAATGT
-TACCAGTCCATGTTACTGCCAAAAACCTAAGTTTACAAAAAAATACTTAC
-CTCTTAATATTAATACTAAATCTATGTCCAATCCCCAAACTCACCCCACG
-TAATGTACACCTCAAAAATTCAAATAATTGTACCTACATATTGCATTCTA
-TGTAATCAAAGGCAAAATAAATTGTGGATGCGGAACAGAATTCATTCTGT
-CTCCGTACCTCCACCAGCAAAGTTAAAAATGAAATATCCCTCATCACCGC
-TGCAATCTACATACATGGATACAGCGCAAAAGACGGTCAACCACGTCGTC
-TCCGAGTCGTTCAGGACACCTTGCTGCTCTCAATAACCTCCAGCCTGACG
-AGCGCCAACAGCGAGTTGACGCTAAAACCTAAAAACAAACAACAACAAAT
-TAAATACAAACAAATAAAATAAAATCAAACAAAACACTTACTTCACTGAC
-AACAGCCAATTGCTGATCCACATTCAACGCAACAGACAACAGGAGACGGG
-CCCCGCAAACGCAAAACAAAATCGCCAATTTTTGCGATTTTAAATACAAA
-AAATCGACAATTTTACTAAGCCCTCTCCATCTCCTGATGCCACCGCCACA
-ATAAGGATCACTAGCGCGGCGCTGATGCCACATCAATAAACCGCAATATT
-TGTCCTCAAAACGTATACTTCTTCTCAGTATCGCAACTTCTACGAATTTC
-CCGCTAACCTACAATGAAAGGAAAATCAATAAGAATGTGATACAAAAAAT
-TAATCAAGGGCAAATAGAAAATAGCTTACCGGACAGGCATACTAGCAGAT
-GCTAATATGCAACTCCATCCTTCTGAGACAAATACGCAACTCCTTTTTTC
-CAAGATTGCAAATACTGAAACAAGGAAGCACAAGCCAATACTGGGAATTA
-TTTAATTAAACAAAATACTTATCTAATTGCCAATTCGACGACTCCAAATC
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testC2S.gff3
--- a/SMART/Java/Python/TestFiles/testC2S.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-chr1 S-MART SMART 1 10 . + . Name=region0
-chr1 S-MART SMART 51 60 . + . Name=region1
-chr1 S-MART SMART 51 60 . - . Name=region2
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense1.gff3
--- a/SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense1.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-chr1 test test1.1 1000 2000 1001 + . ID=test1.1;Name=test1.1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense2.gff3
--- a/SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense2.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-chr1 test test2.1 1500 2500 1001 + . ID=test2.1;Name=test2.1
-chr1 test test2.2 3000 4000 1001 - . ID=test2.2;Name=test2.2
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple1.gff3
--- a/SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple1.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-chr1 test test1.1 1000 2000 1001 + . ID=test1.1;Name=test1.1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple2.gff3
--- a/SMART/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple2.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-chr1 test test2.1 1500 2500 1001 + . ID=test2.1;Name=test2.1
-chr1 test test2.2 3000 4000 1001 + . ID=test2.2;Name=test2.2
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testDifferentialExpressionExpected.gff3
--- a/SMART/Java/Python/TestFiles/testDifferentialExpressionExpected.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-chr1 S-MART S-MART_transcript 100 200 101 + . Name=test3.1;nbReadsCond1=15;regulation=equal;nbReadsCond2=15;pValue=1.0;ID=test3.1
-chr1 S-MART S-MART_transcript 200 300 101 + . Name=test3.2;nbReadsCond1=30;regulation=equal;nbReadsCond2=30;pValue=1.0;ID=test3.2
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testDifferentialExpressionOutput.gff3
--- a/SMART/Java/Python/TestFiles/testDifferentialExpressionOutput.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-chr1 S-MART S-MART_transcript 100 200 101 + . Name=test3.1;nbReadsCond1=15;regulation=equal;nbReadsCond2=15;pValue=1.0;ID=test3.1
-chr1 S-MART S-MART_transcript 200 300 101 + . Name=test3.2;nbReadsCond1=30;regulation=equal;nbReadsCond2=30;pValue=1.0;ID=test3.2
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testDifferentialExpressionReference.gff3
--- a/SMART/Java/Python/TestFiles/testDifferentialExpressionReference.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-chr1 S-MART transcript 100 200 100 + . Name=test3.1
-chr1 S-MART transcript 200 300 100 + . Name=test3.2
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testDifferentialExpressionSample1.gff3
--- a/SMART/Java/Python/TestFiles/testDifferentialExpressionSample1.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-chr1 S-MART S-MART_transcript 100 200 100 + . Name=test1.1;nbElements=10
-chr1 S-MART S-MART_transcript 200 300 100 + . Name=test1.2;nbElements=20
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testDifferentialExpressionSample2.gff3
--- a/SMART/Java/Python/TestFiles/testDifferentialExpressionSample2.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-chr1 S-MART S-MART_transcript 100 200 100 + . Name=test2.1;nbElements=20
-chr1 S-MART S-MART_transcript 200 300 100 + . Name=test2.2;nbElements=40
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testGffParser1.gff3
--- a/SMART/Java/Python/TestFiles/testGffParser1.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,7 +0,0 @@
-arm_X test test_transcript 1000 2000 1 + . ID=id1-1;Name=test1;field=value1
-arm_X test test_exon 1000 2000 1 + . ID=id1-1-exon1;Name=test1-exon1;Parent=id1-1
-arm_X test test_transcript 10000 20000 1 - . ID=id2-1;Name=test2;field=value2
-arm_X test test_exon 10000 10100 1 - . ID=id2-1-exon1;Name=test2-exon1;Parent=id2-1
-arm_X test test_exon 10500 20000 1 - . ID=id2-1-exon2;Name=test2-exon2;Parent=id2-1
-arm_X test test_transcript 1000 2000 1 + . ID=test1.1-1;Name=test1.1
-arm_X test test_exon 1000 2000 1 + . ID=test1.1-1-exon1;Name=test1.1-exon1;Parent=test1.1-1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testPlot.gff3
--- a/SMART/Java/Python/TestFiles/testPlot.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-chr1 S-MART SMART 100 200 . + . value2=6;value3=1
-chr1 S-MART SMART 200 300 . + . value1=2;value2=5;value3=2
-chr1 S-MART SMART 300 400 . + . value1=3;value2=4;value3=3
-chr1 S-MART SMART 400 500 . + . value1=4;value2=3;value3=4
-chr1 S-MART SMART 500 600 . + . value1=5;value2=2;value3=5
-chr1 S-MART SMART 600 700 . + . value1=6;value3=6
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testSW.gff3
--- a/SMART/Java/Python/TestFiles/testSW.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-chr1 S-MART SMART 101 111 . + . value1=1
-chr1 S-MART SMART 111 121 . + . value1=2
-chr1 S-MART SMART 201 211 . + . value1=10
-chr1 S-MART SMART 211 221 . + . value1=12
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList1.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList1.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-arm_X 1000 2000 test1.1 1000 + 1000 2000 0 1 1000, 0,
-arm_X 1000 2000 test1.2 1000 - 1000 2000 0 1 1000, 0,
-arm_X 100 200 test1.3 1000 + 100 200 0 1 100, 0,
-arm_X 100 3200 test1.4 1000 + 100 3200 0 2 100,100, 0,3000,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList2.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList2.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-arm_X 1000 2000 test2.1 1000 + 1000 2000 0 1 1000, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference1.gff3
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference1.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-arm_X test test_transcript 1000 4000 1 + . ID=id1-1;Name=test1;field=value1;nbElements=2
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference2.gff3
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference2.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-arm_X test test_transcript 2000 3000 1 + . ID=id2;Name=test2;field=value1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation1.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation1.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-arm_X 10000100 10000200 test1.1 100 - 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.2 100 - 10000000 10000100 0 1 100, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation2.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeAggregation2.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-arm_X 10000050 10000150 test2.1 100 - 10000050 10000150 0 1 100, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,7 +0,0 @@
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1_modif.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway1_modif.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,112 +0,0 @@
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
-arm_X 10000100 10000200 test1.1 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000100 10000200 test1.2 100 - 10000100 10000200 0 1 100, 0,
-arm_2R 10000100 10000200 test1.3 100 + 10000100 10000200 0 1 100, 0,
-arm_X 10000000 10000100 test1.4 100 + 10000000 10000100 0 1 100, 0,
-arm_X 10000200 10000300 test1.5 100 + 10000200 10000300 0 1 100, 0,
-arm_X 9999900 9999950 test1.6 100 + 9999900 9999950 0 1 50, 0,
-arm_X 10000000 10000050 test1.7 100 - 10000000 10000050 0 1 50, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway2.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSenseAntiSenseAway2.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-arm_X 10000000 10000050 test2.1 50 - 10000000 10000050 0 1 50, 0,
-arm_3R 10000000 10000050 test2.2 50 - 10000000 10000050 0 1 50, 0,
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple1.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple1.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-arm_X 1000 3000 test1.1 1000 + 1000 3000 0 1 2000, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple2.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListMergeSimple2.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-arm_X 2000 4000 test1.2 1000 + 2000 4000 0 1 2000, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMerge1.gff3
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMerge1.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-arm_X test test_transcript 1000 2000 1 + . ID=id1-1;Name=test1;field=value1;nbElements=2
-arm_X test test_exon 1000 2000 1 + . ID=id1-1-exon1;Name=test1-exon1;Parent=id1-1
-arm_X test test_transcript 1000 2000 1 + . ID=id2-1;Name=test2;field=value2
-arm_X test test_exon 1000 2000 1 + . ID=id2-1-exon1;Name=test2-exon1;Parent=id2-1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeDifferentClusters1.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeDifferentClusters1.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-arm_X 1010 1110 test1.1 1000 + 1010 1110 0 1 100, 0,
-arm_X 100 100100 test1.2 1000 + 100 100100 0 1 100000, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeSense1.bed
--- a/SMART/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListSelfMergeSense1.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-arm_X 1000 6000 test1.1 1000 + 1000 6000 0 2 1000,1000, 0,4000,
-arm_X 1000 4000 test1.2 1000 + 1000 4000 0 2 1000,1000, 0,2000,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/testTranscriptNormalize.gff3
--- a/SMART/Java/Python/TestFiles/testTranscriptNormalize.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-chr1 S-MART transcript 1000 2000 1000 + . Name=test1;nbOccurrences=2
-chr1 S-MART transcript 1500 2500 1000 + . Name=test2;nbOccurrences=2
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/test_distance.bed
--- a/SMART/Java/Python/TestFiles/test_distance.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-arm_X 1000 2000 test2.1 1000 + 1000 2000 0 1 1000, 0,
-arm_X 250 350 test2.2 1000 + 250 350 0 1 100, 0,
-arm_X 150 250 test2.3 1000 + 150 250 0 1 100, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/TestFiles/test_minoverlapp.bed
--- a/SMART/Java/Python/TestFiles/test_minoverlapp.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-track name=reads description="Reads" useScore=0 visibility=full offset=0
-arm_X 1000 2000 test1.1 1000 + 1000 2000 0 1 1000, 0,
-arm_X 1000 2000 test1.2 1000 - 1000 2000 0 1 1000, 0,
-arm_X 100 200 test1.3 1000 + 100 200 0 1 100, 0,
-arm_X 100 3200 test1.4 1000 + 100 3200 0 2 100,100, 0,3000,
-arm_X 1500 2000 test1.5 1000 + 1500 2000 0 1 500, 0,
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/WrappGetDistribution.py
--- a/SMART/Java/Python/WrappGetDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,96 +0,0 @@
-#! /usr/bin/env python
-from optparse import OptionParser
-import tarfile
-import os
-import re
-import shutil
-import subprocess
-
-SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
-
-def toTar(tarFileName, directory):
-    fileName = os.path.splitext(tarFileName)[0]
-    fileNameBaseName = os.path.basename(fileName)
-    tfile = tarfile.open(fileName + ".tmp.tar", "w")
-    list = os.listdir(directory)
-    for file in list:
-        if re.search(str(fileNameBaseName), file):
-            tfile.add(file)
-    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
-    tfile.close()
-    
-
-if __name__ == "__main__":
-    
-    magnifyingFactor = 1000
-    
-    # parse command line
-    description = "Get Distribution v1.0.1: Get the distribution of the genomic coordinates on a genome. [Category: Visualization]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",       dest="inputFileName",     action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",      dest="format",            action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",      dest="outTarFileName",    action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-r", "--reference",   dest="referenceFileName", action="store",      default=None,  type="string", help="file containing the genome [compulsory] [format: file in FASTA format]")
-    parser.add_option("-n", "--nbBins",      dest="nbBins",            action="store",      default=1000,  type="int",    help="number of bins [default: 1000] [format: int]")
-    parser.add_option("-2", "--bothStrands", dest="bothStrands",       action="store_true", default=False,                help="plot one curve per strand [format: bool] [default: false]")
-    parser.add_option("-w", "--raw",         dest="raw",               action="store_true", default=False,                help="plot raw number of occurrences instead of density [format: bool] [default: false]")
-    parser.add_option("-x", "--csv",         dest="csv",               action="store_true", default=False,                help="write a .csv file [format: bool]")
-    parser.add_option("-c", "--chromosome",  dest="chromosome",        action="store",      default=None,  type="string", help="plot only a chromosome [format: string]")
-    parser.add_option("-s", "--start",       dest="start",             action="store",      default=None,  type="int",    help="start from a given region [format: int]")
-    parser.add_option("-e", "--end",         dest="end",               action="store",      default=None,  type="int",    help="end from a given region [format: int]")
-    parser.add_option("-y", "--yMin",        dest="yMin",              action="store",      default=None,  type="int",    help="minimum value on the y-axis to plot [format: int]")
-    parser.add_option("-Y", "--yMax",        dest="yMax",              action="store",      default=None,  type="int",    help="maximum value on the y-axis to plot [format: int]")
-    parser.add_option("-g", "--gff",         dest="gff",               action="store_true", default=False,                help="also write GFF3 file [format: bool] [default: false]")
-    parser.add_option("-H", "--height",      dest="height",            action="store",      default=None,  type="int",    help="height of the graphics [format: int] [default: 300]")
-    parser.add_option("-W", "--width",       dest="width",             action="store",      default=None,  type="int",    help="width of the graphics [format: int] [default: 1000]")
-    parser.add_option("-v", "--verbosity",   dest="verbosity",         action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
-    parser.add_option("-l", "--log",         dest="log",               action="store_true", default=False,                help="write a log file [format: bool]")
-    (options, args) = parser.parse_args()
-
-
-    absPath = os.getcwd()
-    print "the current path is :", absPath
-    directory = "/tmp/wrappGetDistribution"
-    print "the dir path is :", directory
-    if not os.path.exists(directory):
-        os.makedirs(directory)
-    os.chdir(directory)
-    if options.inputFileName != None and options.format != None and options.outTarFileName != None:
-        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
-        cmd = "python %s/Java/Python/getDistribution.py -i %s -f %s -o %s -D %s" % (SMART_PATH, options.inputFileName, options.format, outputFileName, directory)
-    if options.referenceFileName != None :
-        cmd += " -r %s" % options.referenceFileName
-    if options.nbBins != None :
-        cmd += " -n %s" % options.nbBins
-    if options.chromosome :
-        cmd += " -c %s" % options.chromosome 
-    if options.start != None :
-        cmd += " -s %s" % options.start
-    if options.end != None :
-        cmd += " -e %s" % options.end
-    if options.yMin != None :
-        cmd += " -y %s" % options.yMin
-    if options.yMax != None :
-        cmd += " -Y %s" % options.yMax
-    if options.height != None :
-        cmd += " -H %s" % options.height
-    if options.width != None :
-        cmd += " -W %s" % options.width
-    if options.bothStrands :
-        cmd += " -2" 
-    if options.raw :
-        cmd += " -w" 
-    if options.csv :
-        cmd += " -x" 
-    if options.gff :
-        cmd += " -g"
-    if options.log :
-        cmd += " -l" 
-    print "cmd is: ", cmd    
-    status = subprocess.call(cmd, shell=True)
-    if status != 0:
-            raise Exception("Problem with the execution of command!")
-    toTar(options.outTarFileName, directory)
-    shutil.rmtree(directory)
-    
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/WrappGetReadDistribution.py
--- a/SMART/Java/Python/WrappGetReadDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,58 +0,0 @@
-#! /usr/bin/env python
-from optparse import OptionParser
-import tarfile
-import os
-import re
-import shutil
-import subprocess
-
-SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
-
-def toTar(tarFileName, directory):
-    fileName = os.path.splitext(tarFileName)[0]
-    fileNameBaseName = os.path.basename(fileName)
-    tfile = tarfile.open(fileName + ".tmp.tar", "w")
-    list = os.listdir(directory)
-    for file in list:
-        if re.search(str(fileNameBaseName), file):
-            tfile.add(file)
-    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
-    tfile.close()
-    
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Get Read Distribution v1.0.1: Plot the number of identical reads and give the most represented. [Category: Visualization]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file sequence [compulsory] [format: file in sequence format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of the file [compulsory] [format: sequence file format]")
-    parser.add_option("-n", "--number",    dest="number",         action="store", default=None, type="int",    help="keep the best n    [format: int]")
-    parser.add_option("-p", "--percent",   dest="percent",        action="store", default=None, type="float",  help="keep the best n\% [format: float]")
-    parser.add_option("-o", "--output",    dest="outTarFileName", action="store",               type="string", help="output file [compulsory] [format: zip]")
-
-    (options, args) = parser.parse_args()
-
-
-    absPath = os.getcwd()
-    print "the current path is :", absPath
-    directory = "/tmp/wrappGetReadDistribution"
-    print "the dir path is :", directory
-    if not os.path.exists(directory):
-        os.makedirs(directory)
-    os.chdir(directory)
-    if options.inputFileName != None and options.format != None and options.outTarFileName != None:
-        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
-        cmd = "python %s/Java/Python/getReadDistribution.py -i %s -f %s -o %s -D %s" % (SMART_PATH, options.inputFileName, options.format, outputFileName, directory)
-    if options.number != None :
-        cmd += " -n %s" % options.number
-    if options.percent != None :
-        cmd += " -p %s" % options.percent
-    print "cmd is: ", cmd    
-    status = subprocess.call(cmd, shell=True)
-    if status != 0:
-            raise Exception("Problem with the execution of command!")
-    toTar(options.outTarFileName, directory)
-    shutil.rmtree(directory)
-    
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/WrappPlotCoverage.py
--- a/SMART/Java/Python/WrappPlotCoverage.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,89 +0,0 @@
-#! /usr/bin/env python
-from optparse import OptionParser
-import tarfile
-import os
-import re
-import shutil
-import subprocess
-
-SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
-
-def toTar(tarFileName, directory):
-    fileName = os.path.splitext(tarFileName)[0]
-    fileNameBaseName = os.path.basename(fileName)
-    tfile = tarfile.open(fileName + ".tmp.tar", "w")
-    list = os.listdir(directory)
-    for file in list:
-        if re.search(str(fileNameBaseName), file):
-            tfile.add(file)
-    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
-    tfile.close()
-
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Plot Coverage v1.0.1: Plot the coverage of the first data with respect to the second one. [Category: Visualization]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input1",       dest="inputFileName1", action="store",                       type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--inputFormat1", dest="inputFormat1",   action="store",                       type="string", help="format of input file 1 [compulsory] [format: transcript file format]")
-    parser.add_option("-j", "--input2",       dest="inputFileName2", action="store",                       type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
-    parser.add_option("-g", "--inputFormat2", dest="inputFormat2",   action="store",                       type="string", help="format of input file 2 [compulsory] [format: transcript file format]")
-    parser.add_option("-q", "--sequence",     dest="inputSequence",  action="store",      default=None,    type="string", help="input sequence file [format: file in FASTA format] [default: None]")
-    parser.add_option("-o", "--output",       dest="outTarFileName", action="store",                       type="string", help="output file [compulsory] [format: output file in zip format]")
-    parser.add_option("-w", "--width",        dest="width",          action="store",      default=1500,    type="int",    help="width of the plots (in px) [format: int] [default: 1500]")
-    parser.add_option("-e", "--height",       dest="height",         action="store",      default=1000,    type="int",    help="height of the plots (in px) [format: int] [default: 1000]")
-    parser.add_option("-t", "--title",        dest="title",          action="store",      default="",      type="string", help="title of the plots [format: string]")
-    parser.add_option("-x", "--xlab",         dest="xLabel",         action="store",      default="",      type="string", help="label on the x-axis [format: string]")
-    parser.add_option("-y", "--ylab",         dest="yLabel",         action="store",      default="",      type="string", help="label on the y-axis [format: string]")
-    parser.add_option("-p", "--plusColor",    dest="plusColor",      action="store",      default="red",   type="string", help="color for the elements on the plus strand [format: string] [default: red]")
-    parser.add_option("-m", "--minusColor",   dest="minusColor",     action="store",      default="blue",  type="string", help="color for the elements on the minus strand [format: string] [default: blue]")
-    parser.add_option("-s", "--sumColor",     dest="sumColor",       action="store",      default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]")
-    parser.add_option("-l", "--lineColor",    dest="lineColor",      action="store",      default="black", type="string", help="color for the lines [format: string] [default: black]")
-    parser.add_option("-1", "--merge",        dest="merge",          action="store_true", default=False,                  help="merge the 2 plots in 1 [format: boolean] [default: false]")
-    parser.add_option("-v", "--verbosity",    dest="verbosity",      action="store",      default=1,       type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    absPath = os.getcwd()
-    directory = "/tmp/wrappPlotCov"
-    if not os.path.exists(directory):
-        os.makedirs(directory)
-    os.chdir(directory)
-    if options.inputFileName1 != None and options.inputFormat1 != None and options.inputFileName2 != None and options.inputFormat2 != None and options.outTarFileName != None:
-        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
-        print 'outputfile is :', outputFileName
-        cmd = "python %s/Java/Python/plotCoverage.py -i %s -f %s -j %s -g %s -o %s -D %s" % (SMART_PATH, options.inputFileName1, options.inputFormat1, options.inputFileName2, options.inputFormat2, outputFileName, directory)
-    if options.inputSequence!= None:
-        cmd += " -q %s" % options.inputSequence
-    if options.width != None:
-        cmd += " -w %s" % options.width
-    if options.height != None:
-        cmd += " -e %s" % options.height
-    if options.title != None:
-        cmd += " -t %s" % options.title
-    if options.xLabel != None:
-        cmd += " -x %s" % options.xLabel
-    if options.yLabel != None:
-        cmd += " -y %s" % options.yLabel
-    if options.plusColor != None:
-        cmd += " -p %s" % options.plusColor
-    if options.minusColor != None:
-        cmd += " -m %s" % options.minusColor
-    if options.sumColor != None:
-        cmd += " -s %s" % options.sumColor
-    if options.lineColor != None:
-        cmd += " -l %s" % options.lineColor
-    if options.merge:
-        cmd += " -1"
-    status = subprocess.call(cmd, shell=True)
-    if status != 0:
-            raise Exception("Problem with the execution of command!")
-    toTar(options.outTarFileName, directory)
-    shutil.rmtree(directory)
-

-
-
-        
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/WrappPlotRepartition.py
--- a/SMART/Java/Python/WrappPlotRepartition.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,71 +0,0 @@
-#! /usr/bin/env python
-from optparse import OptionParser
-import tarfile
-import os
-import re
-import shutil
-import subprocess
-
-SMART_PATH = "%sSMART" % os.environ["REPET_PATH"]
-
-def toTar(tarFileName, directory):
-    fileName = os.path.splitext(tarFileName)[0]
-    fileNameBaseName = os.path.basename(fileName)
-    tfile = tarfile.open(fileName + ".tmp.tar", "w")
-    list = os.listdir(directory)
-    for file in list:
-        if re.search(str(fileNameBaseName), file):
-            tfile.add(file)
-    os.system("mv %s %s" % (fileName + ".tmp.tar", options.outTarFileName))
-    tfile.close()
-    
-
-if __name__ == "__main__":
-    
-    magnifyingFactor = 1000
-    
-    # parse command line
-    description = "Plot the repartition of different data on a whole genome. (This tool uses 1 input file only, the different values being stored in the tags.    See documentation to know more about it.) [Category: Visualization]"
-
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",dest="inputFileName",action="store",type="string",help="input file name [compulsory] [format: file in GFF3 format]")
-    parser.add_option("-n", "--names",dest="names", action="store", type="string", help="name for the tags (separated by commas and no space) [compulsory] [format: string]")
-    parser.add_option("-o", "--output",dest="outTarFileName",action="store",type="string", help="output file [compulsory] [format: output file tar format]")
-    parser.add_option("-c", "--color",dest="colors",action="store",default=None,type="string", help="color of the lines (separated by commas and no space) [format: string]")
-    parser.add_option("-f", "--format",dest="format",action="store",default="png",type="string", help="format of the output file [format: string] [default: png]")
-    parser.add_option("-r", "--normalize",dest="normalize",action="store_true", default=False,help="normalize data (when panels are different) [format: bool] [default: false]")
-    parser.add_option("-l", "--log",dest="log",action="store",default="",type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string]")
-    parser.add_option("-v", "--verbosity",dest="verbosity",action="store",default=1,type="int",help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-
-    absPath = os.getcwd()
-    print "the current path is :", absPath
-    directory = "/tmp/wrappPlotRepartition"
-    print "the dir path is :", directory
-    if not os.path.exists(directory):
-        os.makedirs(directory)
-    os.chdir(directory)
-    if options.inputFileName != None and options.format != None and options.outTarFileName != None:
-        outputFileName = os.path.splitext(os.path.basename(options.outTarFileName))[0]
-        cmd = "python %s/Java/Python/plotRepartition.py -i %s -o %s -D %s" % (SMART_PATH, options.inputFileName, outputFileName, directory)
-    if options.names != None :
-        cmd += " -n %s" % options.names
-    else: print "You must choose tag names !"
-    if options.colors != None :
-        cmd += " -c %s" % options.colors
-    if options.format != None:
-        cmd += " -f %s" % options.format
-    if options.normalize :
-        cmd += " -r " 
-    if options.log != "" :
-        cmd += " -l %s" % options.log
-    
-    print "cmd is: ", cmd    
-    status = subprocess.call(cmd, shell=True)
-    if status != 0:
-            raise Exception("Problem with the execution of command!")
-    toTar(options.outTarFileName, directory)
-    shutil.rmtree(directory)
-    
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/__init__.pyc
b
Binary file SMART/Java/Python/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/adaptorStripper.py
--- a/SMART/Java/Python/adaptorStripper.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,115 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Remove adaptors"""
-
-import os
-from optparse import OptionParser
-from SMART.Java.Python.structure.Sequence import Sequence
-from SMART.Java.Python.structure.SequenceList import SequenceList
-from commons.core.parsing.FastaParser import FastaParser
-from commons.core.writer.FastaWriter import FastaWriter
-from SMART.Java.Python.misc.Progress import Progress
-
-
-def distance (string1, string2):
-    if len(string1) != len(string2):
-        return None
-    distance = 0
-    for i in range(0, len(string1)):
-        if string1[i] != string2[i]:
-            distance += 1
-    return distance
-
-
-
-if __name__ == "__main__":
-    nbRemaining = 0
-    
-    # parse command line
-    description = "Adaptor Stripper v1.0.1: Remove the adaptor of a list of reads. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",         dest="inputFileName",      action="store",                     type="string", help="input file [compulsory] [format: file in FASTA format]")
-    parser.add_option("-o", "--output",        dest="outputFileName",     action="store",                     type="string", help="output file [compulsory] [format: output file in FASTA format]")
-    parser.add_option("-5", "--5primeAdaptor", dest="fivePrimeAdaptor",   action="store",                     type="string", help="five prime adaptor [format: string]")
-    parser.add_option("-3", "--3primeAdaptor", dest="threePrimeAdaptor",  action="store",                     type="string", help="three prime adaptor [format: string]")
-    parser.add_option("-d", "--5primeDist",    dest="fivePrimeDistance",  action="store",      default=3,     type="int",    help="five prime distance [format: int] [default: 3]")
-    parser.add_option("-e", "--3primeDist",    dest="threePrimeDistance", action="store",      default=3,     type="int",    help="three prime distance [format: int [default: 3]]")
-    parser.add_option("-m", "--3primeSize",    dest="threePrimeSize",     action="store",      default=10,    type="int",    help="three prime size [format: int] [default: 10]")
-    parser.add_option("-v", "--verbosity",     dest="verbosity",          action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
-    parser.add_option("-l", "--log",           dest="log",                action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    if options.log:
-        logHandle = open(options.outputFileName + ".log", "w")
-
-
-    writer         = FastaWriter(options.outputFileName + ".fas", options.verbosity)
-    sequenceParser = FastaParser(options.inputFileName, options.verbosity)
-    nbSequences    = sequenceParser.getNbSequences()
-
-    # treat sequences
-    progress = Progress(sequenceParser.getNbSequences(), "Analyzing " + options.inputFileName, options.verbosity)
-    for sequence in sequenceParser.getIterator():
-        fivePrimeAdaptor  = sequence.getSequence()[0:len(options.fivePrimeAdaptor)]
-        threePrimeAdaptor = sequence.getSequence()[len(sequence.sequence)-len(options.threePrimeAdaptor):]
-
-        # check 5' adaptor
-        fivePrimeDistance = distance(fivePrimeAdaptor, options.fivePrimeAdaptor)
-        # check 3' adaptor
-        threePrimeDistance = len(threePrimeAdaptor)
-        for i in range(options.threePrimeSize, len(threePrimeAdaptor)+1):
-            threePrimeDistance = min(threePrimeDistance, distance(threePrimeAdaptor[-i:], options.threePrimeAdaptor[:i]))
-
-        # sort candidates
-        if fivePrimeDistance > options.fivePrimeDistance:
-            if options.log:
-                logHandle.write("Sequence %s does not start with the right adaptor (%s != %s)\n" % (sequence.getSequence(), fivePrimeAdaptor, options.fivePrimeAdaptor))
-        elif threePrimeDistance > options.threePrimeDistance:
-            if options.log:
-                logHandle.write("Sequence %s does not end with the right adaptor (%s != %s)\n" % (sequence.getSequence(), threePrimeAdaptor, options.threePrimeAdaptor))
-        else:
-            nbRemaining += 1
-            sequence.setSequence(sequence.getSequence()[len(options.fivePrimeAdaptor):len(sequence.getSequence())-len(options.threePrimeAdaptor)])
-            writer.addSequence(sequence)
-
-        progress.inc()
-
-    progress.done()
-
-    if options.log:
-        logHandle.close()
-
-    writer.write()
-
-    print "kept %i over %i (%.f%%)" % (nbRemaining, nbSequences, float(nbRemaining) / nbSequences * 100)
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/changeGffFeatures.sh
--- a/SMART/Java/Python/changeGffFeatures.sh Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-#!/bin/bash
-sed "s/\t$2\t/\t$3\t/g" $1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/changeTagName.py
--- a/SMART/Java/Python/changeTagName.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,90 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Change the name of a tag
-"""
-
-import os
-import random
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.misc.Progress import Progress
-from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
-from commons.core.writer.Gff3Writer import Gff3Writer
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Change Tag Name v1.0.1: Change the name of tag of a list of transcripts. [Category: Data Modification]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                      type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                      type="string", help="format of the input file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-t", "--tag",         dest="tag",            action="store",                      type="string", help="name of the tag to change [compulsory] [format: string]")
-    parser.add_option("-n", "--name",        dest="name",           action="store",                      type="string", help="new name for the tag [compulsory] [format: string]")
-    parser.add_option("-y", "--mysql",       dest="mysql",          action="store_true", default=False,                 help="mySQL output [format: bool] [default: false]")    
-    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,      type="int",    help="trace level [format: int] [default: 1]")
-    parser.add_option("-l", "--log",         dest="log",            action="store_true", default=False,                 help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    if options.log:
-        logHandle = open("%s.log" % options.outputFileName, "w")
-
-    # create parser and writer(s)
-    parser      = TranscriptContainer(options.inputFileName, options.inputFormat, options.verbosity)
-    tmpFileName = "tmpTranscriptFile%d.gff3" % (random.randint(0, 100000))
-    writer      = Gff3Writer(tmpFileName, options.verbosity)
-    if options.mysql:
-        mysqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)
-    outputData = {}
-        
-    # process transcripts
-    progress = Progress(parser.getNbTranscripts(), "Printing transcripts %s" % (options.inputFileName), options.verbosity)
-    for transcript in parser.getIterator():
-        if options.tag in transcript.tags:
-            value = transcript.tags[options.tag]
-            del transcript.tags[options.tag]
-            transcript.tags[options.name] = value
-        writer.addTranscript(transcript)
-        if options.mysql:
-            mysqlWriter.addTranscript(transcript)
-        progress.inc()
-    progress.done()
-    parser.transcriptListParser.close()
-
-    writer.write()
-
-    if options.mysql:
-        mysqlWriter.write()
-
-    os.rename(tmpFileName, options.outputFileName)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleanGff.py
--- a/SMART/Java/Python/cleanGff.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,195 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Clean a GFF file (as given by NCBI or TAIR) and outputs a GFF3 file.
-"""
-
-import os
-import re
-from optparse import OptionParser
-from commons.core.parsing.GffParser import *
-from SMART.Java.Python.misc.RPlotter import *
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-count = {}
-
-class ParsedLine(object):
-    def __init__(self, line, cpt):
-        self.line = line
-        self.cpt  = cpt
-        self.parse()
-
-    def parse(self):
-        self.line = self.line.strip()
-        self.splittedLine = self.line.split(None, 8)
-        if len(self.splittedLine) < 9:
-            raise Exception("Line '%s' has less than 9 fields.  Exiting..." % (self.line))
-        self.type = self.splittedLine[2]
-        self.parseOptions()
-        self.getId()
-        self.getParents()
-
-    def parseOptions(self):
-        self.parsedOptions = {}
-        for option in self.splittedLine[8].split(";"):
-            option = option.strip()
-            if option == "": continue
-            posSpace = option.find(" ")
-            posEqual = option.find("=")
-            if posEqual != -1 and (posEqual < posSpace or posSpace == -1):
-                key, value = option.split("=", 1)
-            elif posSpace != -1:
-                key, value = option.split(None, 1)
-            else:
-                key   = "ID"
-                value = option
-            self.parsedOptions[key.strip()] = value.strip(" \"")
-
-    def getId(self):
-        for key in self.parsedOptions:
-            if key.lower() == "id":
-                self.id = self.parsedOptions[key]
-                return
-        if "Parent" in self.parsedOptions:
-            parent = self.parsedOptions["Parent"].split(",")[0]
-            if parent not in count:
-                count[parent] = {}
-            if self.type not in count[parent]:
-                count[parent][self.type] = 0
-            count[parent][self.type] += 1
-            self.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])
-        else:
-            self.id = "smart%d" % (self.cpt)
-        self.parsedOptions["ID"] = self.id
-
-    def getParents(self):
-        for key in self.parsedOptions:
-            if key.lower() in ("parent", "derives_from"):
-                self.parents = self.parsedOptions[key].split(",")
-                return
-        self.parents = None
-
-    def removeParent(self):
-        for key in self.parsedOptions.keys():
-            if key.lower() in ("parent", "derives_from"):
-                del self.parsedOptions[key]
-
-    def export(self):
-        self.splittedLine[8] = ";".join(["%s=%s" % (key, value) for key, value in self.parsedOptions.iteritems()])
-        return "%s\n" % ("\t".join(self.splittedLine))
-
-
-class CleanGff(object):
-
-    def __init__(self, verbosity = 1):
-        self.verbosity = verbosity
-        self.lines         = {}
-        self.acceptedTypes = []
-        self.parents       = []
-        self.children      = {}
-
-    def setInputFileName(self, name):
-        self.inputFile = open(name)
-        
-    def setOutputFileName(self, name):
-        self.outputFile = open(name, "w")
-
-    def setAcceptedTypes(self, types):
-        self.acceptedTypes = types
-
-    def parse(self):
-        progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
-        for cpt, line in enumerate(self.inputFile):
-            if not line or line[0] == "#": continue
-            if line[0] == ">": break
-            parsedLine = ParsedLine(line, cpt)
-            if parsedLine.type in self.acceptedTypes:
-                self.lines[parsedLine.id] = parsedLine
-            progress.inc()
-        progress.done()
-
-    def sort(self):
-        progress = Progress(len(self.lines.keys()), "Sorting file", self.verbosity)
-        for line in self.lines.values():
-            parentFound = False
-            if line.parents:
-                for parent in line.parents:
-                    if parent in self.lines:
-                        parentFound = True
-                        if parent in self.children:
-                            self.children[parent].append(line)
-                        else:
-                            self.children[parent] = [line]
-            if not parentFound:
-                line.removeParent()
-                self.parents.append(line)
-            progress.inc()
-        progress.done()
-
-    def write(self):
-        progress = Progress(len(self.parents), "Writing output file", self.verbosity)
-        for line in self.parents:
-            self.writeLine(line)
-            progress.inc()
-        self.outputFile.close()
-        progress.done()
-
-    def writeLine(self, line):
-        self.outputFile.write(line.export())
-        if line.id in self.children:
-            for child in self.children[line.id]:
-                self.writeLine(child)
-
-    def run(self):
-        self.parse()
-        self.sort()
-        self.write()
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Clean GFF v1.0.3: Clean a GFF file (as given by NCBI) and outputs a GFF3 file. [Category: Other]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                      type="string", help="input file name [compulsory] [format: file in GFF format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                      type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-t", "--types",     dest="types",          action="store", default="mRNA,exon", type="string", help="list of comma-separated types that you want to keep [format: string] [default: mRNA,exon]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,           type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    cleanGff = CleanGff(options.verbosity)
-    cleanGff.setInputFileName(options.inputFileName)
-    cleanGff.setOutputFileName(options.outputFileName)
-    cleanGff.setAcceptedTypes(options.types.split(","))
-    cleanGff.run()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleanGff.pyc
b
Binary file SMART/Java/Python/cleanGff.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/CleanerChooser.py
--- a/SMART/Java/Python/cleaning/CleanerChooser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,80 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
-from SMART.Java.Python.cleaning.GffCleaner import GffCleaner
-from SMART.Java.Python.cleaning.GtfCleaner import GtfCleaner
-from SMART.Java.Python.cleaning.DefaultCleaner import DefaultCleaner
-
-#Attention!! Do not delete the imports!! They are used to know the type of file format!!!
-
-class CleanerChooser(object):
- """
- A class that finds the correct cleaner
- @ivar format: the format
- @type format: string
- @ivar cleaner: the parser
- @type cleaner: object
- @ivar cleanerClass: the class of the parser
- @type cleanerClass: class
- @ivar verbosity: verbosity
- @type verbosity: int
- """
-
- def __init__(self, verbosity = 0):
- """
- Constructor
- @param verbosity: verbosity
- @type verbosity: int
- """
- self.verbosity = verbosity
-
-
- def findFormat(self, format):
- """
- Find the correct parser
- @ivar format: the format
- @type format: string
- @return: a cleaner
- """
- for cleanerClass in TranscriptListCleaner.__subclasses__():
- if cleanerClass != None:
- if cleanerClass.getFileFormats() != None and format in cleanerClass.getFileFormats():
- self.cleanerClass = cleanerClass
- return
- self.cleanerClass = DefaultCleaner
-
-
- def getCleaner(self):
- """
- Get the parser previously found
- @return: the parser
- """
- return self.cleanerClass(self.verbosity)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/CleanerChooser.pyc
b
Binary file SMART/Java/Python/cleaning/CleanerChooser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/DefaultCleaner.py
--- a/SMART/Java/Python/cleaning/DefaultCleaner.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,45 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Default cleaner. Does nothing but copying.
-"""
-from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-
-class DefaultCleaner(TranscriptListCleaner):
-
- def __init__(self, verbosity = 1):
- super(DefaultCleaner, self).__init__(verbosity)
-
- def _clean(self):
- self.outputHandle.write(self.inputHandle.read())
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/DefaultCleaner.pyc
b
Binary file SMART/Java/Python/cleaning/DefaultCleaner.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/GffCleaner.py
--- a/SMART/Java/Python/cleaning/GffCleaner.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,168 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Clean a GFF file (as given by NCBI or TAIR) and outputs a GFF3 file.
-"""
-
-from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-count = {}
-
-class ParsedLine(object):
- def __init__(self, line, cpt):
- self.line = line
- self.cpt  = cpt
- self.parse()
-
- def parse(self):
- self.line = self.line.strip()
- self.splittedLine = self.line.split(None, 8)
- if len(self.splittedLine) < 9:
- raise Exception("Line '%s' has less than 9 fields.  Exiting..." % (self.line))
- self.type = self.splittedLine[2]
- self.parseOptions()
- self.getId()
- self.getParents()
-
- def parseOptions(self):
- self.parsedOptions = {}
- for option in self.splittedLine[8].split(";"):
- option = option.strip()
- if option == "": continue
- posSpace = option.find(" ")
- posEqual = option.find("=")
- if posEqual != -1 and (posEqual < posSpace or posSpace == -1):
- key, value = option.split("=", 1)
- elif posSpace != -1:
- key, value = option.split(None, 1)
- else:
- key   = "ID"
- value = option
- self.parsedOptions[key.strip()] = value.strip(" \"")
-
- def getId(self):
- for key in self.parsedOptions:
- if key.lower() == "id":
- self.id = self.parsedOptions[key]
- return
- if "Parent" in self.parsedOptions:
- parent = self.parsedOptions["Parent"].split(",")[0]
- if parent not in count:
- count[parent] = {}
- if self.type not in count[parent]:
- count[parent][self.type] = 0
- count[parent][self.type] += 1
- self.id = "%s-%s-%d" % (parent, self.type, count[parent][self.type])
- else:
- self.id = "smart%d" % (self.cpt)
- self.parsedOptions["ID"] = self.id
-
- def getParents(self):
- for key in self.parsedOptions:
- if key.lower() in ("parent", "derives_from"):
- self.parents = self.parsedOptions[key].split(",")
- return
- self.parents = None
-
- def removeParent(self):
- for key in self.parsedOptions.keys():
- if key.lower() in ("parent", "derives_from"):
- del self.parsedOptions[key]
-
- def export(self):
- self.splittedLine[8] = ";".join(["%s=%s" % (key, value) for key, value in self.parsedOptions.iteritems()])
- return "%s\n" % ("\t".join(self.splittedLine))
-
-
-class GffCleaner(TranscriptListCleaner):
-
- def __init__(self, verbosity = 1):
- super(GffCleaner, self).__init__(verbosity)
- self.lines  = {}
- self.acceptedTypes = ["mRNA", "transcript", "exon"]
- self.parents    = []
- self.children   = {}
-
- def getFileFormats():
- return ["gff", "gff2", "gff3"]
- getFileFormats = staticmethod(getFileFormats)
-
- def setAcceptedTypes(self, types):
- self.acceptedTypes = types
-
- def parse(self):
- progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
- for cpt, line in enumerate(self.inputHandle):
- if not line or line[0] == "#": continue
- if line[0] == ">": break
- parsedLine = ParsedLine(line, cpt)
- if self.acceptedTypes == None or parsedLine.type in self.acceptedTypes:
- self.lines[parsedLine.id] = parsedLine
- progress.inc()
- progress.done()
-
- def sort(self):
- progress = Progress(len(self.lines.keys()), "Sorting file", self.verbosity)
- for line in self.lines.values():
- parentFound = False
- if line.parents:
- for parent in line.parents:
- if parent in self.lines:
- parentFound = True
- if parent in self.children:
- self.children[parent].append(line)
- else:
- self.children[parent] = [line]
- if not parentFound:
- line.removeParent()
- self.parents.append(line)
- progress.inc()
- progress.done()
-
- def write(self):
- progress = Progress(len(self.parents), "Writing output file", self.verbosity)
- for line in self.parents:
- self.writeLine(line)
- progress.inc()
- progress.done()
-
- def writeLine(self, line):
- self.outputHandle.write(line.export())
- if line.id in self.children:
- for child in self.children[line.id]:
- self.writeLine(child)
-
- def _clean(self):
- self.parse()
- self.sort()
- self.write()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/GffCleaner.pyc
b
Binary file SMART/Java/Python/cleaning/GffCleaner.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/GtfCleaner.py
--- a/SMART/Java/Python/cleaning/GtfCleaner.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,121 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Clean a GTF file
-"""
-
-import shlex
-from SMART.Java.Python.cleaning.TranscriptListCleaner import TranscriptListCleaner
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-count = {}
-
-class ParsedLine(object):
- def __init__(self, line, cpt):
- self.line = line
- self.cpt  = cpt
- self.parse()
-
- def parse(self):
- self.line = self.line.strip()
- self.splittedLine = self.line.split(None, 8)
- if len(self.splittedLine) < 9:
- raise Exception("Line '%s' has less than 9 fields.  Exiting..." % (self.line))
- self.type = self.splittedLine[2]
- self.parseOptions()
-
- def parseOptions(self):
- self.parsedOptions = {}
- key   = None
- value = ""
- for option in shlex.split(self.splittedLine[8]):
- option = option.strip()
- if option == "": continue
- if key == None:
- key = option
- else:
- endValue = False
- if option[-1] == ";":
- endValue = True
- option.rstrip(";")
- value = "%s \"%s\"" % (value, option)
- if endValue:
- self.parsedOptions[key] = value
- if key == "transcript_id":
- self.transcriptId = value
- key   = None
- value = ""
-
- def export(self):
- return "%s\n" % (self.line)
-
-
-class GtfCleaner(TranscriptListCleaner):
-
- def __init__(self, verbosity = 1):
- super(GtfCleaner, self).__init__(verbosity)
- self.acceptedTypes = ["exon"]
- self.parents    = {}
-
- def getFileFormats():
- return ["gtf"]
- getFileFormats = staticmethod(getFileFormats)
-
- def setAcceptedTypes(self, types):
- self.acceptedTypes = types
-
- def parse(self):
- progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
- for cpt, line in enumerate(self.inputHandle):
- if not line or line[0] == "#": continue
- parsedLine = ParsedLine(line, cpt)
- if self.acceptedTypes == None or parsedLine.type in self.acceptedTypes:
- transcriptId = parsedLine.transcriptId
- if transcriptId not in self.parents:
- self.parents[parsedLine.transcriptId] = [parsedLine]
- else:
- self.parents[parsedLine.transcriptId].append(parsedLine)
- progress.inc()
- progress.done()
-
- def write(self):
- progress = Progress(len(self.parents.keys()), "Writing output file", self.verbosity)
- for parent in sorted(self.parents.keys()):
- for line in self.parents[parent]:
- self.outputHandle.write(line.export())
- progress.inc()
- progress.done()
-
- def _clean(self):
- self.parse()
- self.write()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/GtfCleaner.pyc
b
Binary file SMART/Java/Python/cleaning/GtfCleaner.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/TranscriptListCleaner.py
--- a/SMART/Java/Python/cleaning/TranscriptListCleaner.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,63 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from SMART.Java.Python.structure.TranscriptList import TranscriptList
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-class TranscriptListCleaner(object):
- """A (quite generic) class that cleans a file containing transcripts"""
-
- def __init__(self, verbosity = 0):
- self.verbosity = verbosity
-
- def setInputFileName(self, fileName):
- try:
- self.inputHandle = open(fileName)
- except IOError:
- raise Exception("Error! Transcript file '%s' does not exist! Exiting..." % (self.fileName))
-
- def setOutputFileName(self, fileName):
- try:
- self.outputHandle = open(fileName, "w")
- except IOError:
- raise Exception("Error! Transcript file '%s' does not exist! Exiting..." % (self.fileName))
-
- def getFileFormats():
- pass
- getFileFormats = staticmethod(getFileFormats)
-
- def close(self):
- self.inputHandle.close()
- self.outputHandle.close()
-
- def clean(self):
- self._clean()
- self.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/TranscriptListCleaner.pyc
b
Binary file SMART/Java/Python/cleaning/TranscriptListCleaner.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/cleaning/__init__.pyc
b
Binary file SMART/Java/Python/cleaning/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/clusterize.py
--- a/SMART/Java/Python/clusterize.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,185 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-from commons.core.writer.WriterChooser import WriterChooser\n-"""Clusterize a set of transcripts"""\n-\n-import os, os.path, random\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle\n-from SMART.Java.Python.ncList.FileSorter import FileSorter\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-\n-class Clusterize(object):\n-\n-\tdef __init__(self, verbosity):\n-\t\tself.normalize\t\t = False\n-\t\tself.presorted\t\t = False\n-\t\tself.distance\t\t  = 1\n-\t\tself.colinear\t\t  = False\n-\t\tself.nbWritten\t\t = 0\n-\t\tself.nbMerges\t\t  = 0\n-\t\tself.verbosity\t\t = verbosity\n-\t\tself.splittedFileNames = {}\n-\n-\tdef __del__(self):\n-\t\tfor fileName in self.splittedFileNames.values():\n-\t\t\tos.remove(fileName)\n-\n-\tdef setInputFile(self, fileName, format):\n-\t\tparserChooser = ParserChooser(self.verbosity)\n-\t\tparserChooser.findFormat(format)\n-\t\tself.parser = parserChooser.getParser(fileName)\n-\t\tself.sortedFileName = "%s_sorted_%d.pkl" % (os.path.splitext(fileName)[0], random.randint(1, 100000))\n-\t\tif "SMARTTMPPATH" in os.environ:\n-\t\t\tself.sortedFileName = os.path.join(os.environ["SMARTTMPPATH"], os.path.basename(self.sortedFileName))\n-\n-\tdef setOutputFileName(self, fileName, format="gff3", title="S-MART", feature="transcript", featurePart="exon"):\n-\t\twriterChooser = WriterChooser()\n-\t\twriterChooser.findFormat(format)\n-\t\tself.writer = writerChooser.getWriter(fileName)\n-\t\tself.writer.setTitle(title)\n-\t\tself.writer.setFeature(feature)\n-\t\tself.writer.setFeaturePart(featurePart)\n-\n-\tdef setDistance(self, distance):\n-\t\tself.distance = distance\n-\n-\tdef setColinear(self, colinear):\n-\t\tself.colinear = colinear\n-\n-\tdef setNormalize(self, normalize):\n-\t\tself.normalize = normalize\n-\t\t\n-\tdef setPresorted(self, presorted):\n-\t\tself.presorted = presorted\n-\n-\tdef _sortFile(self):\n-\t\tif self.presorted:\n-\t\t\treturn\n-\t\tfs = FileSorter(self.parser, self.verbosity-4)\n-\t\tfs.perChromosome(True)\n-\t\tfs.setPresorted(self.presorted)\n-\t\tfs.setOutputFileName(self.sortedFileName)\n-\t\tfs.sort()\n-\t\tself.splittedFileNames       = fs.getOutputFileNames()\n-\t\tself.nbElementsPerChromosome = fs.getNbElementsPerChromosome()\n-\t\tself.nbElements              = fs.getNbElements()\n-\t\t\n-\tdef _iterat'..b'ipts = []\n-\t\t\tif newTranscript.__class__.__name__ == "Mapping":\n-\t\t\t\tnewTranscript = newTranscript.getTranscript()\n-\t\t\tfor oldTranscript in transcripts:\n-\t\t\t\tif self._checkOverlap(newTranscript, oldTranscript):\n-\t\t\t\t\tself._merge(newTranscript, oldTranscript)\n-\t\t\t\telif self._checkPassed(newTranscript, oldTranscript):\n-\t\t\t\t\tself._write(oldTranscript)\n-\t\t\t\telse:\n-\t\t\t\t\tnewTranscripts.append(oldTranscript)\n-\t\t\tnewTranscripts.append(newTranscript)\n-\t\t\ttranscripts = newTranscripts\n-\t\t\tself.nbElements += 1\n-\t\t\tprogress.inc()\n-\t\tfor transcript in transcripts:\n-\t\t\tself._write(transcript)\n-\t\tprogress.done()\n-\n-\tdef _merge(self, transcript1, transcript2):\n-\t\tself.nbMerges += 1\n-\t\ttranscript2.setDirection(transcript1.getDirection())\n-\t\ttranscript1.merge(transcript2)\n-\n-\tdef _write(self, transcript):\n-\t\tself.nbWritten += 1\n-\t\tself.writer.addTranscript(transcript)\n-\n-\tdef _checkOverlap(self, transcript1, transcript2):\n-\t\tif transcript1.getChromosome() != transcript2.getChromosome():\n-\t\t\treturn False\n-\t\tif self.colinear and transcript1.getDirection() != transcript2.getDirection():\n-\t\t\treturn False\n-\t\tif transcript1.getDistance(transcript2) > self.distance:\n-\t\t\treturn False\n-\t\treturn True\n-\n-\tdef _checkPassed(self, transcript1, transcript2):\n-\t\treturn ((transcript1.getChromosome() != transcript2.getChromosome()) or (transcript1.getDistance(transcript2) > self.distance))\n-\n-\tdef run(self):\n-\t\tself._sortFile()\n-\t\tif self.presorted:\n-\t\t\tself._iterate(None)\n-\t\telse:\n-\t\t\tfor chromosome in sorted(self.splittedFileNames.keys()):\n-\t\t\t\tself._iterate(chromosome)\n-\t\tself.writer.close()\n-\t\tif self.verbosity > 0:\n-\t\t\tprint "# input:   %d" % (self.nbElements)\n-\t\t\tprint "# written: %d (%d%% overlaps)" % (self.nbWritten, 0 if (self.nbElements == 0) else ((float(self.nbWritten) / self.nbElements) * 100))\n-\t\t\tprint "# merges:  %d" % (self.nbMerges)\n-\t\t\n-\n-if __name__ == "__main__":\n-\tdescription = "Clusterize v1.0.3: clusterize the data which overlap. [Category: Merge]"\n-\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--input",     dest="inputFileName",  action="store",\t\t\t\t     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n-\tparser.add_option("-f", "--format",    dest="format",\t\t action="store",\t\t\t\t     type="string", help="format of file [format: transcript file format]")\n-\tparser.add_option("-o", "--output",    dest="outputFileName", action="store",\t\t\t\t     type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")\n-\tparser.add_option("-u", "--outputFormat", dest="outputFormat", action="store",     default="gff",\t\t     type="string", help="output file format [format: transcript file format]")\n-\tparser.add_option("-c", "--colinear",  dest="colinear",       action="store_true", default=False,\t\t\t\thelp="merge colinear transcripts only [format: bool] [default: false]")\n-\tparser.add_option("-d", "--distance",  dest="distance",       action="store",      default=0,     type="int",    help="max. distance between two transcripts to be merged [format: int] [default: 0]")\n-\tparser.add_option("-n", "--normalize", dest="normalize",      action="store_true", default=False,\t\t\t\thelp="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")\n-\tparser.add_option("-s", "--sorted",    dest="sorted",\t\t action="store_true", default=False,\t\t\t\thelp="input is already sorted [format: bool] [default: false]")\n-\tparser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")\n-\t(options, args) = parser.parse_args()\n-\n-\tc = Clusterize(options.verbosity)\n-\tc.setInputFile(options.inputFileName, options.format)\n-\tc.setOutputFileName(options.outputFileName, options.outputFormat)\n-\tc.setColinear(options.colinear)\n-\tc.setDistance(options.distance)\n-\tc.setNormalize(options.normalize)\n-\tc.setPresorted(options.sorted)\n-\tc.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/clusterizeBySlidingWindows.py
--- a/SMART/Java/Python/clusterizeBySlidingWindows.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,344 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import re\n-from commons.core.writer.WriterChooser import WriterChooser\n-"""\n-Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.\n-"""\n-\n-import os, os.path\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.misc.Progress import Progress\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-\n-class ClusterizeBySlidingWindows(object):\n-\n-    def __init__(self, verbosity = 0):\n-        self.verbosity = verbosity\n-        self.strands   = (0, )\n-        self.normalize = False\n-        self.plot      = None\n-        self.excel     = None\n-        self.outputFileName = \'\'\n-        self.defaultValue = None\n-\n-    def __del__(self):\n-        pass\n-\n-    def setInputFile(self, fileName, format):\n-        self.parser = TranscriptContainer(fileName, format, self.verbosity)\n-\n-    def setOutputFileName(self, fileName, format="gff", title="S-MART", feature="transcript", featurePart="exon"):\n-        writerChooser = WriterChooser(self.verbosity)\n-        writerChooser.findFormat(format)\n-        self.writer = writerChooser.getWriter(fileName)\n-        self.writer.setTitle(title)\n-        self.writer.setFeature(feature)\n-        self.writer.setFeaturePart(featurePart)\n-#        self.outputFileName = fileName\n-#        self.outputFormat = format\n-\n-    def setWindowSize(self, size):\n-        self.size = size\n-\n-    def setWindowOverlap(self, overlap):\n-        self.overlap = overlap\n-\n-    def setTag(self, tag):\n-        self.tag = tag\n-\n-    def setOperation(self, operation):\n-        self.operation = operation\n-\n-    def setBothStrands(self, bothStrands):\n-        if bothStrands:\n-            self.strands = (-1, 1)\n-\n-    def setNormalize(self, normalize):\n-        self.normalize = normalize\n-\n-    def setPlot(self, plot):\n-        self.plot = plot\n-\n-    def setExcel(self, excel):\n-        self.excel = excel\n-\n-    def setOutputTag(self, tag):\n-        self.outputTagName = tag\n-        \n-    def setDefaultValue(self, defaultValue):\n-        self.defaultValue = defaultValue\n-\n-    def checkOptions(self):\n-#        if self.operation != None:\n-#            raise Exception("Trying to combine the values without specifying tag! Aborting...")\n-        if self.operation != '..b'lf.excel:\n-            self.writeExcel()\n-        if self.plot:\n-            self.plotData()\n-        self.printRegions()\n-\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Clusterize by Sliding Windows v1.0.1: Produces a GFF3 file that clusters a list of transcripts using a sliding window. [Category: Sliding Windows]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")\n-    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")\n-    parser.add_option("-u", "--outputFormat", dest="outputFormat",  action="store",     default="gff",  type="string", help="format of the output file [format: transcript file format]")\n-    parser.add_option("-s", "--size",        dest="size",           action="store",                     type="int",    help="size of the regions [compulsory] [format: int]")\n-    parser.add_option("-e", "--overlap",     dest="overlap",        action="store",                     type="int",    help="overlap between two consecutive regions [compulsory] [format: int]")\n-    parser.add_option("-m", "--normalize",   dest="normalize",      action="store_true", default=False,                help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")\n-    parser.add_option("-g", "--tag",         dest="tag",            action="store",      default=None,  type="string", help="use a given tag as input (instead of summing number of features) [format: string]")    \n-    parser.add_option("-r", "--operation",   dest="operation",      action="store",      default=None,  type="string", help="combine tag value with given operation [format: choice (sum, avg, med, min, max)]")\n-    parser.add_option("-d", "--defaultValue",dest="defaultValue",   action="store",                     type="float",    help="default value for input tag [format: float]")\n-    parser.add_option("-w", "--write",       dest="writeTag",       action="store",      default=None,  type="string", help="print the result in the given tag (default usually is \'nbElements\') [format: string]")    \n-    parser.add_option("-2", "--strands",     dest="strands",        action="store_true", default=False,                help="consider the two strands separately [format: bool] [default: false]")\n-    parser.add_option("-p", "--plot",        dest="plot",           action="store",      default=None,  type="string", help="plot regions to the given file [format: output file in PNG format]")\n-    parser.add_option("-x", "--excel",       dest="excel",          action="store",      default=None,  type="string", help="write an Excel file to the given file [format: output file in Excel format]")\n-    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")\n-    (options, args) = parser.parse_args()\n-\n-    cbsw = ClusterizeBySlidingWindows(options.verbosity)\n-    cbsw.setInputFile(options.inputFileName, options.inputFormat)\n-    cbsw.setOutputFileName(options.outputFileName, options.outputFormat)\n-    cbsw.setWindowSize(options.size)\n-    cbsw.setWindowOverlap(options.overlap)\n-    cbsw.setTag(options.tag)\n-    cbsw.setDefaultValue(options.defaultValue)\n-    cbsw.setOperation(options.operation)\n-    cbsw.setOutputTag(options.writeTag)\n-    cbsw.setBothStrands(options.strands)\n-    cbsw.setPlot(options.plot)\n-    cbsw.setExcel(options.excel)\n-    cbsw.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/compareOverlapping.py
--- a/SMART/Java/Python/compareOverlapping.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,126 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""Compare overlap of two transcript lists"""\n-import sys\n-import os\n-from optparse import OptionParser\n-from SMART.Java.Python.misc import Utils\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-\n-class CompareOverlapping(object):\n-\n-    def __init__(self):\n-        self._options = None\n-\n-\n-    def setAttributesFromCmdLine(self):\n-        description = "Compare Overlapping v1.0.3: Get the data which overlap with a reference set. [Category: Data Comparison]"\n-\n-        parser = OptionParser(description = description)\n-        parser.add_option("-i", "--input1",           dest="inputFileName1", action="store",                     type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")\n-        parser.add_option("-f", "--format1",          dest="format1",        action="store",                     type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n-        parser.add_option("-j", "--input2",           dest="inputFileName2", action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n-        parser.add_option("-g", "--format2",          dest="format2",        action="store",                     type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n-        parser.add_option("-o", "--output",           dest="output",         action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-        parser.add_option("-S", "--start1",           dest="start1",         action="store",      default=None,  type="int",    help="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]")\n-        parser.add_option("-s", "--start2",           dest="start2",         action="store",      default=None,  type="int",    help="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]")\n-        parser.add_option("-U", "--end1",             dest="end1",           action="store",      default=None,  type="int'..b'pping",   dest="notOverlapping", action="store_true", default=False,                help="also output not overlapping data [format: bool] [default: false]")\n-        parser.add_option("-x", "--exclude",          dest="exclude",        action="store_true", default=False,                help="invert the match [format: bool] [default: false]")\n-        parser.add_option("-v", "--verbosity",        dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")\n-        parser.add_option("-l", "--log",              dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")\n-        (self._options, args) = parser.parse_args()\n-\n-\n-    def run(self):             \n-        logHandle = None\n-        if self._options.log:\n-            logHandle = open(self._options.output, "w")\n-\n-        transcriptContainer1 = TranscriptContainer(self._options.inputFileName1, self._options.format1, self._options.verbosity)\n-        transcriptContainer2 = TranscriptContainer(self._options.inputFileName2, self._options.format2, self._options.verbosity)\n-        writer               = TranscriptWriter(self._options.output, "gff3", self._options.verbosity)\n-\n-        transcriptListComparator = TranscriptListsComparator(logHandle, self._options.verbosity)\n-        transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, self._options.start1)\n-        transcriptListComparator.restrictToStart(transcriptListComparator.REFERENCE, self._options.start2)\n-        transcriptListComparator.restrictToEnd(transcriptListComparator.QUERY, self._options.end1)\n-        transcriptListComparator.restrictToEnd(transcriptListComparator.REFERENCE, self._options.end2)\n-        transcriptListComparator.extendFivePrime(transcriptListComparator.QUERY, self._options.fivePrime1)\n-        transcriptListComparator.extendFivePrime(transcriptListComparator.REFERENCE, self._options.fivePrime2)\n-        transcriptListComparator.extendThreePrime(transcriptListComparator.QUERY, self._options.threePrime1)\n-        transcriptListComparator.extendThreePrime(transcriptListComparator.REFERENCE, self._options.threePrime2)\n-        transcriptListComparator.acceptIntrons(transcriptListComparator.QUERY, self._options.introns)\n-        transcriptListComparator.acceptIntrons(transcriptListComparator.REFERENCE, self._options.introns)\n-        transcriptListComparator.getAntisenseOnly(self._options.antisense)\n-        transcriptListComparator.getColinearOnly(self._options.colinear)\n-        transcriptListComparator.getInvert(self._options.exclude)\n-        transcriptListComparator.setMaxDistance(self._options.distance)\n-        transcriptListComparator.setMinOverlap(self._options.minOverlap)\n-        transcriptListComparator.setPcOverlap(self._options.pcOverlap)\n-        transcriptListComparator.setIncludedOnly(self._options.included)\n-        transcriptListComparator.setIncludingOnly(self._options.including)\n-        transcriptListComparator.includeNotOverlapping(self._options.notOverlapping)\n-        transcriptListComparator.computeOdds(True)\n-        transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer1)\n-        transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, transcriptContainer2)\n-        transcriptListComparator.setOutputWriter(writer)\n-        transcriptListComparator.compareTranscriptList()\n-\n-        if self._options.log:\n-            logHandle.close()\n-\n-        if not self._options.exclude:\n-            odds = transcriptListComparator.getOdds()\n-            if self._options.verbosity > 0 and odds:\n-                print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(odds)\n-                \n-if __name__ == "__main__":\n-    icompareOverlapping = CompareOverlapping()\n-    icompareOverlapping.setAttributesFromCmdLine()\n-    icompareOverlapping.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/compare_TAIR10_Reiterative4th.gff3
--- a/SMART/Java/Python/compare_TAIR10_Reiterative4th.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,18028 +0,0 @@\n-chr5\tS-MART\tgene\t4308129\t4310181\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13440;Name=AT5G13440\n-chr4\tS-MART\tgene\t10398918\t10399512\t.\t+\t.\tNote=protein_coding_gene;ID=AT4G18980;Name=AT4G18980\n-chr3\tS-MART\tgene\t22678151\t22680379\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G61270;Name=AT3G61270\n-chr3\tS-MART\tgene\t5705662\t5707023\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G16770;Name=AT3G16770\n-chr5\tS-MART\tgene\t4523520\t4525863\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G14020;Name=AT5G14020\n-chr3\tS-MART\tgene\t5708925\t5710579\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G16780;Name=AT3G16780\n-chr5\tS-MART\tgene\t26151421\t26157099\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G65440;Name=AT5G65440\n-chr3\tS-MART\tgene\t5711082\t5719023\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G16785;Name=AT3G16785\n-chr3\tS-MART\tgene\t2100189\t2100983\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G36659;Name=AT3G36659\n-chr1\tS-MART\tgene\t8664153\t8665289\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24450;Name=AT1G24450\n-chr5\tS-MART\tgene\t4530643\t4533070\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G14040;Name=AT5G14040\n-chr1\tS-MART\tgene\t8665874\t8672651\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24460;Name=AT1G24460\n-chr5\tS-MART\tgene\t4533261\t4535301\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G14050;Name=AT5G14050\n-chr5\tS-MART\tgene\t4535401\t4539193\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G14060;Name=AT5G14060\n-chr1\tS-MART\tgene\t8676360\t8677159\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G24480;Name=AT1G24480\n-chr3\tS-MART\tpseudogene\t14227679\t14228304\t.\t-\t.\tNote=pseudogene;ID=AT3G42047;Name=AT3G42047\n-chr3\tS-MART\tgene\t14228495\t14233245\t.\t-\t.\tNote=protein_coding_gene;ID=AT3G42050;Name=AT3G42050\n-chr5\tS-MART\tgene\t4315759\t4318360\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13460;Name=AT5G13460\n-chr2\tS-MART\tgene\t14990325\t14990935\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35658;Name=AT2G35658\n-chr2\tS-MART\tgene\t14996713\t14999085\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35680;Name=AT2G35680\n-chr2\tS-MART\tgene\t14999754\t15003066\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35690;Name=AT2G35690\n-chr1\tS-MART\tgene\t8682234\t8685271\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24490;Name=AT1G24490\n-chr2\tS-MART\tgene\t15005205\t15005789\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35700;Name=AT2G35700\n-chr5\tS-MART\tgene\t4547202\t4549417\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G14090;Name=AT5G14090\n-chr2\tS-MART\tgene\t15022676\t15022867\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35733;Name=AT2G35733\n-chr3\tS-MART\tgene\t5752258\t5752410\t.\t+\t.\tNote=protein_coding_gene;ID=AT3G16851;Name=AT3G16851\n-chr2\tS-MART\tgene\t15027589\t15027668\t.\t-\t.\tNote=snoRNA;ID=AT2G35742;Name=AT2G35742\n-chr2\tS-MART\tgene\t15024489\t15026414\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35740;Name=AT2G35740\n-chr2\tS-MART\tpseudogene\t15027810\t15027989\t.\t-\t.\tNote=pseudogene;ID=AT2G35743;Name=AT2G35743\n-chr2\tS-MART\tgene\t15029712\t15029790\t.\t-\t.\tNote=snoRNA;ID=AT2G35744;Name=AT2G35744\n-chr2\tS-MART\tgene\t15059337\t15061100\t.\t-\t.\tNote=other_RNA;ID=AT2G35859;Name=AT2G35859\n-chr1\tS-MART\tgene\t8688629\t8689268\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24520;Name=AT1G24520\n-chr2\tS-MART\tgene\t15031923\t15033307\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35760;Name=AT2G35760\n-chr2\tS-MART\tgene\t15033595\t15034091\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35765;Name=AT2G35765\n-chr2\tS-MART\tgene\t15034037\t15036518\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G35770;Name=AT2G35770\n-chr2\tS-MART\tgene\t15040679\t15042123\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35790;Name=AT2G35790\n-chr3\tS-MART\tgene\t5759375\t5762235\t.\t-\t.\tNote=protein_coding_gene;ID=AT3G16860;Name=AT3G16860\n-chr2\tS-MART\tgene\t15042175\t15043575\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35795;Name=AT2G35795\n-chr5\tS-MART\tgene\t4553806\t4554382\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G14110;Name=AT5G14110\n-chr2\tS-MART\tgene\t15049150\t15050323\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35810;Name=AT2G35810\n-chr2\tS-MART\tgene\t15050912\t15052239\t.\t+\t.\tNote=protein_coding_gene;ID=AT2G35820;Name=AT2G35820\n-chr5\tS-MART\tgene\t4318524\t4319924\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13470;Name=AT5G13470\n-chr3\tS-MART\tgene\t5763586\t5764654\t.\t-\t.\tNote=protein_coding_gene;ID=AT3'..b'ne;ID=AT1G60540;Name=AT1G60540\n-chr1\tS-MART\tgene\t22302492\t22305156\t.\t+\t.\tNote=other_RNA;ID=AT1G60545;Name=AT1G60545\n-chr5\tS-MART\tgene\t4473089\t4474402\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13860;Name=AT5G13860\n-chr1\tS-MART\tgene\t22305831\t22308229\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60550;Name=AT1G60550\n-chr3\tS-MART\ttransposable_element_gene\t14106869\t14107511\t.\t+\t.\tNote=transposable_element_gene;ID=AT3G33230;Derives_from=AT3TE57780;Name=AT3G33230\n-chr1\tS-MART\tgene\t22324645\t22327359\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60600;Name=AT1G60600\n-chr1\tS-MART\tgene\t22327912\t22330276\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60610;Name=AT1G60610\n-chr1\tS-MART\tgene\t22333916\t22334161\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G60625;Name=AT1G60625\n-chr1\tS-MART\tgene\t8609445\t8612580\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24280;Name=AT1G24280\n-chr1\tS-MART\tgene\t22334662\t22336908\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60630;Name=AT1G60630\n-chr1\tS-MART\tgene\t22337375\t22339672\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60640;Name=AT1G60640\n-chr5\tS-MART\tgene\t4477354\t4478109\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13880;Name=AT5G13880\n-chr1\tS-MART\tgene\t22347622\t22349297\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60680;Name=AT1G60680\n-chr1\tS-MART\tgene\t8612505\t8614277\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G24290;Name=AT1G24290\n-chr1\tS-MART\tgene\t22354753\t22356761\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60710;Name=AT1G60710\n-chr1\tS-MART\tgene\t19859267\t19860976\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G53260;Name=AT1G53260\n-chr3\tS-MART\tgene\t14093656\t14095549\t.\t-\t.\tNote=protein_coding_gene;ID=AT3G33520;Name=AT3G33520\n-chr5\tS-MART\tgene\t4478843\t4480928\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13890;Name=AT5G13890\n-chr1\tS-MART\tgene\t22366701\t22368714\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60770;Name=AT1G60770\n-chr1\tS-MART\tgene\t22368953\t22372159\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G60780;Name=AT1G60780\n-chr3\tS-MART\tgene\t5669373\t5670842\t.\t-\t.\tNote=protein_coding_gene;ID=AT3G16640;Name=AT3G16640\n-chr5\tS-MART\tgene\t4481232\t4481889\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13900;Name=AT5G13900\n-chr5\tS-MART\tgene\t4301792\t4304312\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13420;Name=AT5G13420\n-chr1\tS-MART\tgene\t8626315\t8630971\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G24320;Name=AT1G24320\n-chr1\tS-MART\tgene\t27982131\t27982280\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G74448;Name=AT1G74448\n-chr5\tS-MART\tgene\t4482450\t4483085\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13910;Name=AT5G13910\n-chr1\tS-MART\tgene\t8631440\t8635055\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24330;Name=AT1G24330\n-chr5\tS-MART\tgene\t4485168\t4485311\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13917;Name=AT5G13917\n-chr1\tS-MART\tgene\t8635209\t8638986\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24340;Name=AT1G24340\n-chr5\tS-MART\tgene\t4485931\t4487433\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13920;Name=AT5G13920\n-chr4\tS-MART\tgene\t10325965\t10326036\t.\t-\t.\tNote=tRNA;ID=AT4G18815;Name=AT4G18815\n-chr1\tS-MART\tgene\t8638520\t8640825\t.\t-\t.\tNote=protein_coding_gene;ID=AT1G24350;Name=AT1G24350\n-chr5\tS-MART\tgene\t4488688\t4490264\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13930;Name=AT5G13930\n-chr1\tS-MART\tgene\t8640582\t8643478\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G24360;Name=AT1G24360\n-chr5\tS-MART\tgene\t4305125\t4307513\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13430;Name=AT5G13430\n-chr1\tS-MART\tgene\t27982509\t27984280\t.\t+\t.\tNote=protein_coding_gene;ID=AT1G74450;Name=AT1G74450\n-chr5\tS-MART\tgene\t4495971\t4500725\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13950;Name=AT5G13950\n-chr4\tS-MART\tgene\t1311198\t1312154\t.\t+\t.\tNote=protein_coding_gene;ID=AT4G02950;Name=AT4G02950\n-chr5\tS-MART\tgene\t4501447\t4506188\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13960;Name=AT5G13960\n-chr5\tS-MART\tgene\t4506232\t4507842\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13970;Name=AT5G13970\n-chr5\tS-MART\tgene\t4508496\t4514666\t.\t+\t.\tNote=protein_coding_gene;ID=AT5G13980;Name=AT5G13980\n-chr2\tS-MART\tgene\t17167279\t17170407\t.\t-\t.\tNote=protein_coding_gene;ID=AT2G41190;Name=AT2G41190\n-chr5\tS-MART\tgene\t4514568\t4516892\t.\t-\t.\tNote=protein_coding_gene;ID=AT5G13990;Name=AT5G13990\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/convertTranscriptFile.py
--- a/SMART/Java/Python/convertTranscriptFile.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,115 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Read a transcript file and convert it to another format
-"""
-
-import os, re
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.misc.Progress import Progress
-
-
-class ConvertTranscriptFile(object):
-    def __init__(self,inputFileName="", inputFormat ="", outputFileName="", outputFormat="", name="", sequenceFileName=None, strands=False, galaxy=False, feature=None, featurePart=None, verbosity=1):
-        self.inputFileName = inputFileName
-        self.inputFormat = inputFormat
-        self.outputFileName = outputFileName
-        self.outputFormat = outputFormat
-        self.name = name
-        self.sequenceFileName = sequenceFileName
-        self.strands = strands
-        self.galaxy = galaxy
-
-        self.feature=feature
-        self.featurePart=featurePart
-        
-        self.verbosity = verbosity
-         
-    def setAttributesFromCmdLine(self):
-        description = "Convert Transcript File v1.0.3: Convert a file from a format to another. [Category: Conversion]"
-        parser = OptionParser(description = description)
-        parser.add_option("-i", "--input",        dest="inputFileName",    action="store",                       type="string", help="input file [compulsory] [format: file in format given by -f]")
-        parser.add_option("-f", "--inputFormat",  dest="inputFormat",      action="store",                       type="string", help="format of the input file [compulsory] [format: transcript or mapping file format]")
-        parser.add_option("-o", "--output",       dest="outputFileName",   action="store",                       type="string", help="output file [compulsory] [format: output file in format given by -g]")
-        parser.add_option("-g", "--outputFormat", dest="outputFormat",     action="store",                       type="string", help="format of the output file [compulsory] [format: transcript file format]")
-        parser.add_option("-n", "--name",         dest="name",             action="store",      default="SMART", type="string", help="name for the transcripts [format: string] [default: SMART]")
-        parser.add_option("-s", "--sequences",    dest="sequenceFileName", action="store",      default=None,    type="string", help="give the corresponding Multi-Fasta file (useful for EMBL format) [format: string]")
-        parser.add_option("-t", "--strands",      dest="strands",          action="store_true", default=False,                  help="consider the 2 strands as different (only useful for writing WIG files) [format: bool] [default: False]")
-        parser.add_option("-v", "--verbosity",    dest="verbosity",        action="store",      default=1,       type="int",    help="trace level [format: int] [default: 1]")
-        parser.add_option("-G", "--galaxy",       dest="galaxy",           action="store_true", default=False,                  help="used for galaxy [format: bool] [default: False]")
-        (options, args) = parser.parse_args()
-        self._setAttributesFromOptions(options)
-
-    def _setAttributesFromOptions(self, options):
-        self.inputFileName = options.inputFileName
-        self.inputFormat = options.inputFormat
-        self.outputFileName = options.outputFileName
-        self.outputFormat = options.outputFormat
-        self.name = options.name  
-        self.sequenceFileName = options.sequenceFileName
-        self.strands = options.strands
-        self.galaxy =  options.galaxy
-        self.verbosity = options.verbosity
-
-    def run(self):
-        # create parser
-        parser = TranscriptContainer(self.inputFileName, self.inputFormat, self.verbosity)
-        # create writer
-        writer = TranscriptWriter(self.outputFileName, self.outputFormat, self.verbosity)
-        # connect parser and writer
-        writer.setContainer(parser)
-            
-        if self.name != None:
-            writer.setTitle(self.name)
-        if self.feature != None:
-            writer.setFeature(self.feature)
-        if self.featurePart != None:
-            writer.setFeaturePart(self.featurePart)
-        if self.sequenceFileName != None:
-            writer.addSequenceFile(self.sequenceFileName)
-            
-        nbItems = 0
-        if self.verbosity > 0:
-            nbItems = parser.getNbItems()
-            print "%i items found" % (nbItems)
-    
-        if self.strands:
-            writer.setStrands(True)
-        # convert
-        writer.write()
-        writer.close()
-
-if __name__ == "__main__":
-    iConvertTranscriptFile = ConvertTranscriptFile()
-    iConvertTranscriptFile.setAttributesFromCmdLine()
-    iConvertTranscriptFile.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/coordinatesToSequence.py
--- a/SMART/Java/Python/coordinatesToSequence.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,64 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Convert a list of coordinates to sequences"""
-
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import FastaParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from commons.core.writer.FastaWriter import FastaWriter
-from SMART.Java.Python.misc.Progress import Progress
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Coordinates to Sequences v1.0.2: Extract the sequences from a list of coordinates. [Category: Conversion]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format", dest="format", action="store", type="string", help="format of file [compulsory] [format: transcript file format]")
-    parser.add_option("-s", "--sequences", dest="sequences", action="store",  type="string", help="file that contains the sequences [compulsory] [format: file in FASTA format]")
-    parser.add_option("-o", "--output", dest="outputFileName", action="store",  default=None, type="string", help="output file (FASTA format) [format: output file in FASTA format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    # create parser
-    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity) 
-
-    sequenceParser = FastaParser(options.sequences, options.verbosity)
-
-    writer = FastaWriter(options.outputFileName, options.verbosity)
-    progress = Progress(parser.getNbTranscripts(), "Reading %s" % (options.inputFileName), options.verbosity)
-    for transcript in parser.getIterator():
-        sequence = transcript.extractSequence(sequenceParser)
-        writer.addSequence(sequence)
-        progress.inc()
-    progress.done()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/findTss.py
--- a/SMART/Java/Python/findTss.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,77 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Find TSS from short reads"""
-import os
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from commons.core.writer.Gff3Writer import Gff3Writer
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Find TSS v1.0.1: Find the transcription start site of a list of transcripts. [Category: Merge]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName", action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",        action="store",                     type="string", help="format of file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",    dest="output",        action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-n", "--normalize", dest="normalize",     action="store_true", default=False,                help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
-    parser.add_option("-d", "--distance",  dest="distance",      action="store",      default=10,    type="int",    help="distance between two reads to mark the same TSS [format: int] [default: 10]")
-    parser.add_option("-e", "--colinear",  dest="colinear",      action="store_true", default=False,                help="group by strand [format: bool] [default: false]")
-    parser.add_option("-c", "--csv",       dest="csv",           action="store",      default=None,  type="string", help="output a CSV file in the given path [format: output file in Excel format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",     action="store",      default=1,     type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)        
-    transcriptListComparator = TranscriptListsComparator(None, options.verbosity)
-    transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, 1)
-    transcriptListComparator.setMaxDistance(options.distance)
-    transcriptListComparator.aggregate(True)
-    transcriptListComparator.computeOdds(True)
-    transcriptListComparator.getColinearOnly(options.colinear)
-    transcriptListComparator.setNormalization(options.normalize)
-    transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer)
-    transcriptListComparator.setOutputWriter(Gff3Writer(options.output, options.verbosity))
-    transcriptListComparator.compareTranscriptListSelfMerge()
-
-    if options.csv != None:
-        csvResults = transcriptListComparator.getOddsPerTranscript()
-        csvFile    = open(options.csv, "w")
-        csvFile.write("Number,Transcript\n")
-        for number in sorted(list(set(csvResults.values()))):
-            csvFile.write("%d," % (number))
-            for name in csvResults:
-                if csvResults[name] == number:
-                    csvFile.write("%s " % (name))
-            csvFile.write("\n")
-        csvFile.close()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/fo.py
--- a/SMART/Java/Python/fo.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,341 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2012\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-\n-import os, struct, time, shutil\n-from optparse import OptionParser\n-from pyRepetUnit.commons.parsing.ParserChooser import ParserChooser\n-from pyRepetUnit.commons.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.ncList.NCList import NCList\n-from SMART.Java.Python.ncList.ConvertToNCList import ConvertToNCList\n-from SMART.Java.Python.ncList.NCListParser import NCListParser\n-from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n-from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n-from SMART.Java.Python.ncList.FileSorter import FileSorter\n-from SMART.Java.Python.ncList.NCListHandler import NCListHandler\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-try:\n-   import cPickle as pickle\n-except:\n-   import pickle\n-\n-REFERENCE = 0\n-QUERY = 1\n-TYPES = (REFERENCE, QUERY)\n-TYPETOSTRING = {0: "reference", 1: "query"}\n-\n-class FindOverlapsOptim(object):\n-\t\n-\tdef __init__(self, verbosity = 1):\n-\t\tself._parsers\t\t\t\t  = {}\n-\t\tself._sortedFileNames\t\t  = {}\n-\t\tself._outputFileName\t\t  = "outputOverlaps.gff3"\n-\t\tself._iWriter\t\t\t\t  = None\n-\t\tself._inputFileNames\t\t  = {REFERENCE: None,  QUERY: None}\n-\t\tself._convertedFileNames      = {REFERENCE: False, QUERY: False}\n-\t\tself._inputFileFormats\t\t  = {REFERENCE: None,  QUERY: None}\n-\t\tself._converted\t\t\t      = {REFERENCE: False, QUERY: False}\n-\t\tself._ncListHandlers          = {REFERENCE: None,  QUERY: None}\n-\t\tself._splittedFileNames\t      = {REFERENCE: {},\tQUERY: {}}\n-\t\tself._nbOverlappingQueries\t  = 0\n-\t\tself._nbOverlaps\t\t\t  = 0\n-\t\tself._nbLines\t\t\t\t  = {REFERENCE: 0, QUERY: 0}\n-\t\tself._verbosity\t\t\t      = verbosity\n-\t\tself._ncLists\t\t\t\t  = {}\n-\t\tself._cursors\t\t\t\t  = {}\n-\t\tself._nbElementsPerChromosome = {}\n-\t\tself._tmpDirectories\t\t  = {REFERENCE: False, QUERY: False}\n-\t\t\n-\tdef close(self):\n-\t\tself._iWriter.close()\n-\t\tfor fileName in (self._sortedFileNames.values()):\n-\t\t\tif os.path.exists(fileName):\n-\t\t\t\tos.remove(fileName)\n-\t\tfor fileName in self._convertedFileNames.values():\n-\t\t\tif fileName:\n-\t\t\t\tos.remove(fileName)\n-\t\t\n-\tdef setRefFileName(self, fileName, format):\n-\t\tself.setFileName(fileName, format, REFERENCE)\n-\t\t\n-\tdef setQueryFileName(self, fileName, format):\n-\t\tself.setFileName(fileName, format, QUERY)\n-\n-\tdef setFileNam'..b'eak\n-\t\t\t\t\telse:\n-\t\t\t\t\t\tcursor.moveDown()\n-\t\t\t#In case: Query is on the left of the RefInterval and does not overlap.\t\t\n-\t\t\telse:\n-\t\t\t\t#print "choice 3"\n-\t\t\t\tif firstOverlapLAddr.isOut() or firstOverlapLAddr.compare(parentCursor):\n-\t\t\t\t\t#print "changing nfo 2"\n-\t\t\t\t\tfirstOverlapLAddr.copy(cursor)\n-\t\t\t\t\tnextDone = False # new\n-\t\t\t\t#print "break 2"\n-\t\t\t\tbreak\n-\t\t\t\n-\t\t\tdone = False\n-\t\t\tif cursor.isOut():\n-\t\t\t\t#print "break 3"\n-\t\t\t\tbreak\n-\t\tself._writeIntervalInNewGFF3(queryTranscript, overlappingNames)\n-\t\treturn firstOverlapLAddr, nextDone, not overlappingNames\n-\t\n-\tdef isOverlapping(self, queryTranscript, refTranscript):\n-\t\tif (queryTranscript.getStart() <= refTranscript.getEnd() and queryTranscript.getEnd() >= refTranscript.getStart()):\n-\t\t\treturn 0   \n-\t\tif queryTranscript.getEnd() < refTranscript.getStart():\n-\t\t\treturn 1\n-\t\treturn -1\n-\n-\tdef checkIndex(self, transcript, cursor):\n-\t\tchromosome = transcript.getChromosome()\n-\t\tnextLIndex = self._indices[REFERENCE][chromosome].getIndex(transcript)\n-\t\tif nextLIndex == None:\n-\t\t\treturn None\n-\t\tncList\t\t = self._ncLists[REFERENCE][chromosome]\n-\t\tnextGffAddress = ncList.getRefGffAddr(nextLIndex)\n-\t\tthisGffAddress = cursor.getGffAddress()\n-\t\tif nextGffAddress > thisGffAddress:\n-\t\t\treturn nextLIndex\n-\t\treturn None\n-\t\t\n-\tdef _writeIntervalInNewGFF3(self, transcript, names):\n-\t\tnbOverlaps = 0\n-\t\tfor cpt in names.values():\n-\t\t\tnbOverlaps += cpt\n-\t\tif not names:\n-\t\t\treturn\n-\t\ttranscript.setTagValue("overlapsWith", "--".join(sorted(names.keys())))\n-\t\ttranscript.setTagValue("nbOverlaps", nbOverlaps)\n-\t\tself._iWriter.addTranscript(transcript)\n-\t\tself._iWriter.write()\n-\t\tself._nbOverlappingQueries += 1\n-\t\tself._nbOverlaps\t\t   += nbOverlaps\n-\t\t\n-\tdef _extractID(self, transcript):\n-\t\tnbElements = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1\n-\t\tid\t\t   = transcript.getTagValue("ID")\t\t\t\t if "ID"\t\t in transcript.getTagNames() else transcript.getUniqueName()\n-\t\treturn {id: nbElements}\n-\t\t\n-\tdef run(self):\n-\t\tself.createNCLists()\n-\t\tself.compare()\n-\t\tself.close()\n-\t\tif self._verbosity > 0:\n-\t\t\tprint "# queries: %d" % (self._nbLines[QUERY])\n-\t\t\tprint "# refs:    %d" % (self._nbLines[REFERENCE])\n-\t\t\tprint "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps)\n-\t\t\tprint "time:      %.2gs" % (self._timeSpent)\n-\n-\n-if __name__ == "__main__":\n-\tdescription = "Find Overlaps Optim v1.0.0: Finds overlaps with several query intervals. [Category: Data Comparison]"\n-\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--query",\t     dest="inputQueryFileName", action="store",\t\t\t   type="string",  help="Query input file [compulsory] [format: file in transcript or other format given by -f]")\n-\tparser.add_option("-f", "--queryFormat", dest="queryFormat",\t\taction="store",\t\t\t   type="string",  help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")\n-\tparser.add_option("-j", "--ref",\t\t dest="inputRefFileName",   action="store",\t\t\t   type="string",  help="Reference input file [compulsory] [format: file in transcript or other format given by -g]")\n-\tparser.add_option("-g", "--refFormat",   dest="refFormat",\t\t    action="store",\t\t\t   type="string",  help="format of previous file (possibly in NCL format) [compulsory] [format: transcript or other file format]")\n-\tparser.add_option("-o", "--output",\t     dest="outputFileName",\t    action="store",\t\t\t   type="string",  help="Output file [compulsory] [format: output file in GFF3 format]")\n-\tparser.add_option("-v", "--verbosity",   dest="verbosity",\t\t    action="store", default=1, type="int",\t   help="Trace level [format: int] [default: 1]")\n-\t(options, args) = parser.parse_args()\n-\t\n-\tiFOO = FindOverlapsOptim(options.verbosity)\n-\tiFOO.setRefFileName(options.inputRefFileName, options.refFormat)\n-\tiFOO.setQueryFileName(options.inputQueryFileName, options.queryFormat)\n-\tiFOO.setOutputFileName(options.outputFileName)\n-\tiFOO.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/fold.py
--- a/SMART/Java/Python/fold.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,95 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Read a mapping file (many formats supported) and select some of them
-Mappings should be sorted by read names
-"""
-
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.toolLauncher.RnaFoldLauncher import RnaFoldLauncher
-from commons.core.writer.Gff3Writer import Gff3Writer
-
-
-class Fold(object):
-    """
-    Fold a series of transcripts
-    """
-
-    def __init__(self, verbosity = 0):
-        self.verbosity       = verbosity
-        self.rnaFoldLauncher = RnaFoldLauncher(verbosity)
-        self.gff3Writer      = None
-
-
-    def setInputFileName(self, fileName, format):
-        transcriptContainer = TranscriptContainer(fileName, format, options.verbosity)
-        self.rnaFoldLauncher.setTranscriptList(transcriptContainer)
-
-    
-    def setOutputFileName(self, fileName):
-        self.gff3Writer = Gff3Writer("%s.gff3" % (fileName), self.verbosity)
-
-
-    def setGenomeFileName(self, fileName):
-        self.rnaFoldLauncher.setGenomeFile(fileName)
-
-
-    def setExtensions(self, fivePrime, threePrime):
-        self.rnaFoldLauncher.setExtensions(fivePrime, threePrime)
-
-
-    def start(self):
-        self.gff3Writer.addTranscriptList(self.rnaFoldLauncher.getResults())
-
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Fold v1.0.1: Fold a list of transcript and give the energy. [Category: Personal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",      dest="inputFileName",  action="store",            type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",     dest="format",         action="store",            type="string", help="format of file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",     dest="outputFileName", action="store",            type="string", help="output file [format: output file in GFF3 format]")
-    parser.add_option("-g", "--genome",     dest="genomeFileName", action="store",            type="string", help="genome file name [format: file in FASTA format]")
-    parser.add_option("-5", "--fivePrime",  dest="fivePrime",      action="store",            type="int",    help="extend towards the 5' end [format: int]")
-    parser.add_option("-3", "--threePrime", dest="threePrime",     action="store",            type="int",    help="extend towards the 3' end [format: int]")
-    parser.add_option("-v", "--verbosity",  dest="verbosity",      action="store", default=1, type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    folder = Fold(options.verbosity)
-    folder.setInputFileName(options.inputFileName, options.format)
-    folder.setOutputFileName(options.outputFileName)
-    folder.setExtensions(options.fivePrime, options.threePrime)
-    folder.setGenomeFileName(options.genomeFileName)
-    folder.start()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/genes.gtf
--- a/SMART/Java/Python/genes.gtf Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,27989 +0,0 @@\n-I\tprotein_coding\tCDS\t335\t646\t.\t+\t0\tgene_id "YAL069W"; transcript_id "YAL069W"; exon_number "1"; gene_name "YAL069W"; p_id "P1273"; protein_id "YAL069W"; transcript_name "YAL069W"; tss_id "TSS1127";\n-I\tprotein_coding\texon\t335\t649\t.\t+\t.\tgene_id "YAL069W"; transcript_id "YAL069W"; exon_number "1"; gene_name "YAL069W"; p_id "P1273"; seqedit "false"; transcript_name "YAL069W"; tss_id "TSS1127";\n-I\tprotein_coding\tstart_codon\t335\t337\t.\t+\t0\tgene_id "YAL069W"; transcript_id "YAL069W"; exon_number "1"; gene_name "YAL069W"; p_id "P1273"; transcript_name "YAL069W"; tss_id "TSS1127";\n-I\tprotein_coding\tCDS\t538\t789\t.\t+\t0\tgene_id "YAL068W-A"; transcript_id "YAL068W-A"; exon_number "1"; gene_name "YAL068W-A"; p_id "P3278"; protein_id "YAL068W-A"; transcript_name "YAL068W-A"; tss_id "TSS5440";\n-I\tprotein_coding\texon\t538\t792\t.\t+\t.\tgene_id "YAL068W-A"; transcript_id "YAL068W-A"; exon_number "1"; gene_name "YAL068W-A"; p_id "P3278"; seqedit "false"; transcript_name "YAL068W-A"; tss_id "TSS5440";\n-I\tprotein_coding\tstart_codon\t538\t540\t.\t+\t0\tgene_id "YAL068W-A"; transcript_id "YAL068W-A"; exon_number "1"; gene_name "YAL068W-A"; p_id "P3278"; transcript_name "YAL068W-A"; tss_id "TSS5440";\n-I\tprotein_coding\tstop_codon\t647\t649\t.\t+\t0\tgene_id "YAL069W"; transcript_id "YAL069W"; exon_number "1"; gene_name "YAL069W"; p_id "P1273"; transcript_name "YAL069W"; tss_id "TSS1127";\n-I\tprotein_coding\tstop_codon\t790\t792\t.\t+\t0\tgene_id "YAL068W-A"; transcript_id "YAL068W-A"; exon_number "1"; gene_name "YAL068W-A"; p_id "P3278"; transcript_name "YAL068W-A"; tss_id "TSS5440";\n-I\tprotein_coding\texon\t1807\t2169\t.\t-\t.\tgene_id "YAL068C"; transcript_id "YAL068C"; exon_number "1"; gene_name "PAU8"; p_id "P3469"; seqedit "false"; transcript_name "PAU8"; tss_id "TSS248";\n-I\tprotein_coding\tstop_codon\t1807\t1809\t.\t-\t0\tgene_id "YAL068C"; transcript_id "YAL068C"; exon_number "1"; gene_name "PAU8"; p_id "P3469"; transcript_name "PAU8"; tss_id "TSS248";\n-I\tprotein_coding\tCDS\t1810\t2169\t.\t-\t0\tgene_id "YAL068C"; transcript_id "YAL068C"; exon_number "1"; gene_name "PAU8"; p_id "P3469"; protein_id "YAL068C"; transcript_name "PAU8"; tss_id "TSS248";\n-I\tprotein_coding\tstart_codon\t2167\t2169\t.\t-\t0\tgene_id "YAL068C"; transcript_id "YAL068C"; exon_number "1"; gene_name "PAU8"; p_id "P3469"; transcript_name "PAU8"; tss_id "TSS248";\n-I\tprotein_coding\tCDS\t2480\t2704\t.\t+\t0\tgene_id "YAL067W-A"; transcript_id "YAL067W-A"; exon_number "1"; gene_name "YAL067W-A"; p_id "P5000"; protein_id "YAL067W-A"; transcript_name "YAL067W-A"; tss_id "TSS1247";\n-I\tprotein_coding\texon\t2480\t2707\t.\t+\t.\tgene_id "YAL067W-A"; transcript_id "YAL067W-A"; exon_number "1"; gene_name "YAL067W-A"; p_id "P5000"; seqedit "false"; transcript_name "YAL067W-A"; tss_id "TSS1247";\n-I\tprotein_coding\tstart_codon\t2480\t2482\t.\t+\t0\tgene_id "YAL067W-A"; transcript_id "YAL067W-A"; exon_number "1"; gene_name "YAL067W-A"; p_id "P5000"; transcript_name "YAL067W-A"; tss_id "TSS1247";\n-I\tprotein_coding\tstop_codon\t2705\t2707\t.\t+\t0\tgene_id "YAL067W-A"; transcript_id "YAL067W-A"; exon_number "1"; gene_name "YAL067W-A"; p_id "P5000"; transcript_name "YAL067W-A"; tss_id "TSS1247";\n-I\tprotein_coding\texon\t7235\t9016\t.\t-\t.\tgene_id "YAL067C"; transcript_id "YAL067C"; exon_number "1"; gene_name "SEO1"; p_id "P6606"; seqedit "false"; transcript_name "SEO1"; tss_id "TSS5465";\n-I\tprotein_coding\tstop_codon\t7235\t7237\t.\t-\t0\tgene_id "YAL067C"; transcript_id "YAL067C"; exon_number "1"; gene_name "SEO1"; p_id "P6606"; transcript_name "SEO1"; tss_id "TSS5465";\n-I\tprotein_coding\tCDS\t7238\t9016\t.\t-\t0\tgene_id "YAL067C"; transcript_id "YAL067C"; exon_number "1"; gene_name "SEO1"; p_id "P6606"; protein_id "YAL067C"; transcript_name "SEO1"; tss_id "TSS5465";\n-I\tprotein_coding\tstart_codon\t9014\t9016\t.\t-\t0\tgene_id "YAL067C"; transcript_id "YAL067C"; exon_number "1"; gene_name "SEO1"; p_id "P6606"; transcript_name "SEO1"; tss_id "TSS5465";\n-I\tprotein_coding\tCDS\t10091\t10396\t.\t+\t0\tgene_id "YAL066W"; transcript_id "YAL066W"; exon_number "1"; gene_name "YAL066W"; p_'..b'203";\n-XVI\tprotein_coding\tstart_codon\t939922\t939924\t.\t+\t0\tgene_id "YPR201W"; transcript_id "YPR201W"; exon_number "1"; gene_name "ARR3"; p_id "P1664"; transcript_name "ARR3"; tss_id "TSS5203";\n-XVI\tprotein_coding\tstop_codon\t941134\t941136\t.\t+\t0\tgene_id "YPR201W"; transcript_id "YPR201W"; exon_number "1"; gene_name "ARR3"; p_id "P1664"; transcript_name "ARR3"; tss_id "TSS5203";\n-XVI\tprotein_coding\tCDS\t943032\t943050\t.\t+\t0\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "1"; gene_name "YPR202W"; p_id "P3577"; protein_id "YPR202W"; transcript_name "YPR202W"; tss_id "TSS6873";\n-XVI\tprotein_coding\texon\t943032\t943050\t.\t+\t.\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "1"; gene_name "YPR202W"; p_id "P3577"; seqedit "false"; transcript_name "YPR202W"; tss_id "TSS6873";\n-XVI\tprotein_coding\tstart_codon\t943032\t943034\t.\t+\t0\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "1"; gene_name "YPR202W"; p_id "P3577"; transcript_name "YPR202W"; tss_id "TSS6873";\n-XVI\tprotein_coding\tCDS\t943199\t943893\t.\t+\t1\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "2"; gene_name "YPR202W"; p_id "P3577"; protein_id "YPR202W"; transcript_name "YPR202W"; tss_id "TSS6873";\n-XVI\tprotein_coding\texon\t943199\t943896\t.\t+\t.\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "2"; gene_name "YPR202W"; p_id "P3577"; seqedit "false"; transcript_name "YPR202W"; tss_id "TSS6873";\n-XVI\tprotein_coding\tCDS\t943880\t944185\t.\t+\t0\tgene_id "YPR203W"; transcript_id "YPR203W"; exon_number "1"; gene_name "YPR203W"; p_id "P4951"; protein_id "YPR203W"; transcript_name "YPR203W"; tss_id "TSS2481";\n-XVI\tprotein_coding\texon\t943880\t944188\t.\t+\t.\tgene_id "YPR203W"; transcript_id "YPR203W"; exon_number "1"; gene_name "YPR203W"; p_id "P4951"; seqedit "false"; transcript_name "YPR203W"; tss_id "TSS2481";\n-XVI\tprotein_coding\tstart_codon\t943880\t943882\t.\t+\t0\tgene_id "YPR203W"; transcript_id "YPR203W"; exon_number "1"; gene_name "YPR203W"; p_id "P4951"; transcript_name "YPR203W"; tss_id "TSS2481";\n-XVI\tprotein_coding\tstop_codon\t943894\t943896\t.\t+\t0\tgene_id "YPR202W"; transcript_id "YPR202W"; exon_number "2"; gene_name "YPR202W"; p_id "P3577"; transcript_name "YPR202W"; tss_id "TSS6873";\n-XVI\tprotein_coding\tstop_codon\t944186\t944188\t.\t+\t0\tgene_id "YPR203W"; transcript_id "YPR203W"; exon_number "1"; gene_name "YPR203W"; p_id "P4951"; transcript_name "YPR203W"; tss_id "TSS2481";\n-XVI\tprotein_coding\tCDS\t944603\t947698\t.\t+\t0\tgene_id "YPR204W"; transcript_id "YPR204W"; exon_number "1"; gene_name "YPR204W"; p_id "P2697"; protein_id "YPR204W"; transcript_name "YPR204W"; tss_id "TSS839";\n-XVI\tprotein_coding\texon\t944603\t947701\t.\t+\t.\tgene_id "YPR204W"; transcript_id "YPR204W"; exon_number "1"; gene_name "YPR204W"; p_id "P2697"; seqedit "false"; transcript_name "YPR204W"; tss_id "TSS839";\n-XVI\tprotein_coding\tstart_codon\t944603\t944605\t.\t+\t0\tgene_id "YPR204W"; transcript_id "YPR204W"; exon_number "1"; gene_name "YPR204W"; p_id "P2697"; transcript_name "YPR204W"; tss_id "TSS839";\n-XVI\tprotein_coding\texon\t946856\t947338\t.\t-\t.\tgene_id "YPR204C-A"; transcript_id "YPR204C-A"; exon_number "1"; gene_name "YPR204C-A"; p_id "P6072"; seqedit "false"; transcript_name "YPR204C-A"; tss_id "TSS5621";\n-XVI\tprotein_coding\tstop_codon\t946856\t946858\t.\t-\t0\tgene_id "YPR204C-A"; transcript_id "YPR204C-A"; exon_number "1"; gene_name "YPR204C-A"; p_id "P6072"; transcript_name "YPR204C-A"; tss_id "TSS5621";\n-XVI\tprotein_coding\tCDS\t946859\t947338\t.\t-\t0\tgene_id "YPR204C-A"; transcript_id "YPR204C-A"; exon_number "1"; gene_name "YPR204C-A"; p_id "P6072"; protein_id "YPR204C-A"; transcript_name "YPR204C-A"; tss_id "TSS5621";\n-XVI\tprotein_coding\tstart_codon\t947336\t947338\t.\t-\t0\tgene_id "YPR204C-A"; transcript_id "YPR204C-A"; exon_number "1"; gene_name "YPR204C-A"; p_id "P6072"; transcript_name "YPR204C-A"; tss_id "TSS5621";\n-XVI\tprotein_coding\tstop_codon\t947699\t947701\t.\t+\t0\tgene_id "YPR204W"; transcript_id "YPR204W"; exon_number "1"; gene_name "YPR204W"; p_id "P2697"; transcript_name "YPR204W"; tss_id "TSS839";\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/genome.fasta
--- a/SMART/Java/Python/genome.fasta Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,92 +0,0 @@
->chr1
-GTAATCAACTACCAATTCCAGCTCTCTTTTGACAACTGGTCTTATACCCACTTCCGTACACTTGCAACCC
-TCGTAAGACAATTGCAAATGAGTAATGGCCTTCCAATTAGCATTGGACGCCCTTGCACCCACGACTCACA
-GAGATCCCTCTCTGCACCCGATTCTCGAATCCACAGTGGATTCAATTCGCTCCTCGATACAGACCTACCC
-ATGGTCCATTCCGAAGGAACTTCTACCCCTACTCAACTCCTACGGCATCCCAACATCTGGTTTGGGAACC
-TCCCACCACCCCCACGCCGCCCACAAGACAATCGAGACTTTTCTCCTTTGCACCCACTGGTCTTTCCAGG
-CCACCACTCCCAGCTCCGTCATGTTCATGAAACCCAGCAAGTTCAACAAACTTGCCCAGGTAAACTCAAA
-CTTTCGGGAGCTGAAGAACTACCGCCTGCACCCCAACGACAGCACTCGTTACCCCTTCACATCACCAGAC
-CTTCCCGTTTTCCCCACCATTTTCATGCACGACGCCCTGATGTATTACCATCCGTCCCAGATCATGGACC
-TGTTCTTRCGGAAACCAAACCTCGAACGTCTGTACGCCAGCCTCGTAGTGCCACCCGAGGCCCATCTTTC
-CGACCAATCCTTCTACCCAAAGTTGTACACGTACACGACGACCCGCCACACTCTTCACTACGTCCCAGAG
-GGTCACGAAGCCGGCAGCTACAACCAACCGTCCGACGCCCACTCTTGGCTCCGAATCAATTCCATTCGCC
-TCGGCAACCACCACCTCTCAGTGACGATCCTGGAATCCTGGGGCCCCGTCCACTCGCTCCTCATTCAACG
-AGGGACCCCCCCCCCCGACCCATCACTCCAGGCCCCTCCAACACTCATGRCCTCAGACCTCTTTCGGTCT
-TACCAAGARCCTCGCCTCGACGTGGTCTCCTTCCGAATCCCCGACGCCATCGAACTTCCACAGGCCACAT
-TCCTCCAACAACCACTTCGAGACCGACTGGTCCCCCGAGCCGTCTACAACGCCCTGTTCACCTATACCAG
-AGCAGTCCGCACACTCCGAACTTCAGACCCAGCAGCATTCGTAAGGATGCACTCCTCCAAACCGGACCAC
-GATTGGGTCACCTCGAACGCCTGGGACAACCTGCAAACCTTCGCACTTCTGAACGTTCCCCTCCGACCAA
-ACGTCGTCTACCACGTTCTTCAGAGCCCAATCGCCTCCCTAAGCCTTTACCTGAGGCAACATTGGCGCCG
-TCTTACCGCCACCGCCGTCCCTATCCTCTCCTTCCTAACCCTCCTGCAGCGCTTCCTTCCATTGCCTATA
-CCTCTAGCAGAGGTAAAATCCATCACAGCCTTCCGAAGGGAGCTTTACCGAAAGAAGGAGCCCCACCACC
-CCCTCGACGTCTTCCATCTCCAGCACCGCGTCCGCAACTACCACTCCGCGATCTCGGCCGTACGCCCGGC
-TTCCCCACCCCACCAAAAACTCCCACACGCACTCCAGAAAGCCGCATTACTGCTTCTCCGACCGATATCG
-CCCCTCTTGACAGCGACCCCGTTCTTTCGGTCCGAACAGAAGTCCATGCTCCCGAACGCCGAACTTTCAT
-GGACCCTGAAGCGCTTCGCTCTGCCCTGGCAAGCCTCCCTAGTCCTCCTCGCTCTGTCGGAATCATCCAT
-ACTGCTCCACAAACTGTTCTCCCCGCCAACCCTCCAAGCCCAACACGACACCTACCACCGACATCTCCAC
-CCTGGATCCTACAGTCTCCAGTGGGAGAGGACGCCATTGTCGATTCCGAGGACGACAGCATTTCTTCCTT
-TCACTCCCACGACTTCGACAGCCCCTCCGGACCGCTCCGAAGCCAGTCTCCCTCCCGCTTTCGCCTCCAC
-CTTCGTTCCCCGTCCACCTCCAGCGGCATCGAGCCCTGGAGCCCAGCCTCCTACGACTACGGCAGCGCCC
-CCGACACCGATTGAACCCACCCAACGCACCCATCAAAATTCTGACCTCGCGCTCGAAAGTTCAACCTCAA
-CCGAACCTCCCCCACCCCCCATCCGATCCCCCGACAYGACGCCCTCCGCCCCCGTCCTTTTCCCAGAAAT
-CAACTCACCTCGTCGTTTTCCCCCCCAACTTCCCGCCACGCCCGATCTCGAACCCGCCCACACTCCACCC
-CCTCTTTCCATCCCGCACCAAGATCCGACTGACTCAGCGGACCCCCTCATGGGCTCCCACCTTTTGCACC
-ATTCACTGCCTGCACCCCCCACCCACCCCCTTCCATCTTCACAGCTGTTACCCGCACCTTTAACGAACGA
-CCCCACTGCGATCGGCCCGGTGCTCCCCTTTGAAGAACTCCACCCACGCAGGTACCCTGAAAACACCGCC
-ACTTTCCTCACGAGGCTCCGTTCACTCCCATCAAACCATCTTCCACAACCCACCTTGAATTGTCTTCTCT
-CCGCTGTCTCCGACCAAACCAAGGTTTCCGAGGAGCACCTCTGGGAGTCCCTACAGACAATTCTCCCAGA
-CAGCCAACTCAGCAATGAAGAGACCAACACTCTCGGGCTTTCAACTGAACACCTCACTGCGTTGGCCCAC
-CTTTACAACTTCCAGGCAACCGTTTACTCCGATCGCGGCCCCATCCTCTTCGGCCCCTCCGACACCATCA
-AGAGGATAGACATCACCCACACCACCGGACCGCCATCCCACTTTTCACCCGGCAAAAGACTCCTAGGCAG
-CCAACCCTCCGCTAAGGGCCATCCCTCCGACCCACTCATCAGAGCCATGAAGTCTTTCAAAGTATCCGGC
-AACTACCTTCCCTTCTCTGAGGCCCACAACCATCCCACCTCCATCTCACACGCCAAGAACTTGATTTCAA
-ACATGAAGAATGGTTTCGACGGCGTCCTCTCCCTCCTCGACGTCTCCACGGGCCAACGAACCGGACCCRC
-CCCCAAAGAACGGATCATCCAGATAGACCACTACCTTGACACCAACCCCGGCAAAACCACTCCTGTGGTG
-CATTTCGCTGGCTTCGCTGGCTGTGGGAAGACATATCCGATCCAACAGCTCCTCAAAACCAAACTGTTCA
-AAGACTTCCGGGTCTCTTGCCCTACCACAGAACTCAGAACCGAATGGAAGACAGCGATGGAACTCCACGG
-CTCCCAGTCATGGCGCTTTAACACTTGGGAGTCTTCCATTCTCAAGTCATCCAGAATCCTGGTCATTGAT
-GAGATCTACAAAATGCCAAGAGGGTACCTCGACCTTTCCATCCTCGCCGACCCCGCCCTCGAGCTCGTCA
-TAATTCTCGGCGATCCTCTMCAGGGCGAGTACCACTCCCAATCGAAAGACTCATCCAACCACCGCCTTCC
-CTCTGAAACTCTCAGGCTGCTACCATACATCGACATGTACTGCTGGTGGAGTTACCGCATTCCTCAATGC
-ATCGCCCGACTCTTCCAAATTCACAGCTTCAATGCCTGGCAAGGAGTTATCGGGTCCGTTTCCACTCCCC
-ATGATCAATCCCCCGTCCTCACCAACAGTCATGCCTCATCTCTTACCTTCAACAGCCTGGGATATCGCTC
-CTGCACGATCAGCTCTAGCCAAGGCCTCACATTCTGCGACCCCGCCATAATCGTCCTGGACAACTACACC
-AAGTGGCTCTCCTCGGCTAACGGCCTCGTCGCCCTCACTCGATCCAGATCAGGCGTCCAATTCATGKGCC
-CCTCTTCCTACGTCGGGGGAACCAACGGCTCTTCCGCCATGTTTTCCGACGCCTTCAACAACAGCCTCAT
-CATCATGGATCGCTACTTCCCATCCCTGTTCCCGCAACTCAAGCTCATCACCTCCCCCCTCACAACTCGC
-GGCCCCAAACTCAACGGGGCCACCCCCAGCGCATCCCCCACCCACCGTTCGCCAAACTTCCACCTTCCCC
-CACACATTCCGCTCTCCTATGATCGTGATTTTGTTACGGTGAACCCAACTCTCCCCGACCAAGGACCCGA
-AACAAGACTCGACACCCACTTTCTCCCACCGTCTCGGCTCCCTCTCCATTTCGATCTCCCACCGGCTATC
-ACCCCACCCCCGGTTTCCACAAGCGTCGACCCGCCACAAGCGAAAGCTAGCCCCGTCTACCCAGGCGAGT
-TCTTCGATTCTCTGGCGGCGTTCTTCTTACCAGCACACGACCCATCAACAAGGGAAATACTCCACAAAGA
-TCAATCTAGCAACCAGTTCCCCTGGTTCGACCGACCCTTCAGCCTGTCCTGCCAGCCCTCAAGTCTGATT
-TCCGCCAAGCATGCACCCAACCATGATCCGACCCTTCTACCGGCCTCCATCAACAAACGCTTGCGATTCA
-GACCCAGTGACTCACCGCACCAAATCACCGCGGACGACGTGGTCCTAGGCCTGCAACTCTTTCACTCTCT
-TTGTCGCGCCTACTCACGTCAACCCAACAGCACCGTTCCATTCAACCCTGAACTTTTCGCAGAATGCATC
-TCTCTGAATGAGTACGCACAGCTCAGTTCCAAAACCCAATCCACCATAGTGGCCAACGCTTCACGCTCCG
-ACCCAGACTGGCGACACACCACCGTCAAGATCTTCGCGAAAGCCCAACACAAAGTCAACGACGGCTCCAT
-CTTCGGCTCGTGGAAAGCCTGCCAGACCCTCGCACTCATGCACGACTACGTGATTCTGGTTCTTGGACCC
-GTCAAGAAATACCAGAGAATCTTCGACAACGCTGACCGGCCACCTAACATCTACTCACACTGCGGCAAGA
-CACCCAACCAACTTCGAGATTGGTGCCAGGAACATCTCACTCATTCCACCCCCAAAATCGCAAACGACTA
-CACCGCTTTCGACCAGTCCCAGCATGGAGAATCCGTGGTCCTTGAAGCCCTCAAAATGAAGAGACTGAAC
-ATTCCRAGCCATCTGATTCAGCTCCACGTCCACCTCAAGACCAACGTCTCCACCCAGTTCGGCCCCCTCA
-CATGCATGCGCCTAACCGGGGAACCCGGAACTTACGACGACAACACTGACTATAACCTCGCAGTCATCTA
-CTCCCAGTATGACGTCGGTTCCTGCCCCATCATGGTTTCTGGCGACGACTCACTCATAGACCACCCCCTT
-CCCACTCGCCACGACTGGCCATCCGTTCTCAAACGCCTCCACCTCCGCTTCAAACTTGAACTCACCTCTC
-ACCCCCTCTTCTGTGGCTACTACGTCGGTCCAGCCGGCTGCATCCGCAACCCCCTGGCCCTTTTCTGCAA
-GCTCATGATCGCCGTGGACGACGACGCCCTCGACGACCGACGACTCAGCTACCTCACCGAGTTCACCACC
-GGACACCTCCTTGGCGAATCACTGTGGCACCTCCTCCCTGAAACCCATGTTCAGTATCAGTCAGCCTGCT
-TTGACTTCTTCTGCAGGCGGTGCCCAAGACACGAGAAAATGCTCCTCGACGACTCCACACCCGCACTCAG
-CCTCCTCGAACGAATCACTTCTTCGCCGAGGTGGCTCACCAAAAATGCCATGTACCTCCTCCCTGCCAAG
-CTACGACTGGCCATCACCTCTCTATCTCAAACGCAGTCCTTCCCAGAATCCATCGAGGTTTCCCACGCTG
-AGTCTGAATTGCTTCACTACGTCCAATAGCAATCAGCCCCAACATGGAAATCGACAAAGAACTCGCCCCC
-CAAGACCGCACCGTCACCGTCGCCACCGTCCTACCAGCTGTCCCCGGCCCATCACCTCTCACCATCAAAC
-AACCGTTYCAGTCTGAAGTTCTATTTGCTGGAACCAAAGATGCCGAGGCTTCTCTCACCATCGCCAACAT
-CGACAGCGTTTCCACCCTCACCACCTTCTACCGTCATGCATCTCTGGAATCACTCTGGGTCACTATCCAT
-CCCACCTTGCAAGCCCCAGCTTTCCCGACCACGGTCGGTGTCTGCTGGGTACCCGCCAATTCTCCAGTCA
-CTCCCGCCCAAATCACCAAGACCTATGGTGGCCAGATCTTCTGCATTGGCGGCGCCATCAACACCCTCTC
-ACCTCTCATCGTCAAGTGCCCACTTGAAATGATGAACCCCCGGGTCAAGGATTCGATTCAGTACCTTGAC
-TCGCCCAAACTCCTCATCTCCATCACCGCTCAACCCACCGCTCCCCCCGCATCGACCTGCATAATAACTG
-TATCAGGAACTCTCTCGATGCACTCTCCGCTCATCACGGACACTTCCACCTAAGTTCTCGATCTTTAAAA
-TCGTTAGCTCGCCAGTTAGCGAGGTCTGTCCCCACACGACAGATAATCGGGTGCAACTCCCGCCCCTCTT
-CCGAGGGTCATCGGAACC
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getDifference.py
--- a/SMART/Java/Python/getDifference.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,155 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""Restrict a transcript list with some parameters (regions)"""\n-\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from commons.core.parsing.FastaParser import FastaParser\n-from SMART.Java.Python.misc.Progress import Progress\n-\n-class DifferenceGetter(object):\n-\n-    def __init__(self, verbosity):\n-        self.verbosity        = verbosity\n-        self.annotationParser = None\n-        self.referenceParser  = None\n-        self.sequenceParser   = None\n-        self.transcriptCount  = 1\n-        self.split            = False\n-\n-    def createTranscript(self, chromosome, start, end):\n-        transcript = Transcript()\n-        transcript.setChromosome(chromosome)\n-        transcript.setDirection("+")\n-        transcript.setStart(start)\n-        transcript.setEnd(end)\n-        transcript.setName("region_%d" % self.transcriptCount)\n-        transcript.setTagValue("ID", "region_%d" % self.transcriptCount)\n-        self.transcriptCount += 1\n-        return transcript\n-\n-    def setSplit(self, split):\n-        self.split = split\n-\n-    def setAnnotationFile(self, fileName, format):\n-        if fileName != None:\n-            self.annotationParser = TranscriptContainer(fileName, format, self.verbosity)\n-\n-    def setReferenceFile(self, fileName, format):\n-        if fileName != None:\n-            self.referenceParser = TranscriptContainer(fileName, format, self.verbosity)\n-\n-    def setSequenceFile(self, fileName):\n-        if fileName != None:\n-            self.sequenceParser = FastaParser(fileName, self.verbosity)\n-\n-    def setOutputFile(self, fileName):\n-        self.writer = Gff3Writer(fileName, self.verbosity)\n-\n-    def initialize(self):\n-        self.presence = {}\n-        for chromosome in self.sequenceParser.getRegions():\n-            self.presence[chromosome] = [[1, self.sequenceParser.getSizeOfRegion(chromosome)]]\n-\n-    def readTranscripts(self):\n-        nbTranscripts = self.annotationParser.getNbTranscripts()\n-        progress      = Progress(nbTranscripts, "Parsing annotation file" , self.verbosity)\n-        for transcript in self.annotationParser.getIterator():\n-            chromosome   = transcript.getChromosome()\n-            '..b'me]):\n-                start, end = element\n-                if start <= transcript.getEnd() and transcript.getStart() <= end:\n-                    toBeDeleted.append(i)\n-                    if start < transcript.getStart():\n-                        toBeAppended.append([start, transcript.getStart() - 1])\n-                    if end > transcript.getEnd():\n-                        toBeAppended.append([transcript.getEnd() + 1, end])\n-            for i in reversed(toBeDeleted):\n-                del self.presence[chromosome][i]\n-            self.presence[chromosome].extend(toBeAppended)\n-            progress.inc()\n-        progress.done()\n-\n-    def writeOutput(self):\n-        for chromosome in self.presence:\n-            for element in self.presence[chromosome]:\n-                start, end = element\n-                self.writer.addTranscript(self.createTranscript(chromosome, start, end))\n-        self.writer.write()\n-\n-    def compareToSequence(self):\n-        self.initialize()\n-        self.readTranscripts()\n-        self.writeOutput()\n-\n-    def compareToAnnotation(self):\n-        transcriptListComparator = TranscriptListsComparator(None, self.verbosity)\n-        transcriptListComparator.setSplitDifference(self.split)\n-        transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, self.annotationParser)\n-        transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, self.referenceParser)\n-        transcriptListComparator.setOutputWriter(self.writer)\n-        transcriptListComparator.getDifferenceTranscriptList()\n-\n-    def run(self):\n-        if self.referenceParser != None:\n-            self.compareToAnnotation()\n-        else:\n-            self.compareToSequence()\n-\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Get Difference v1.0.1: Get all the regions of the genome, except the one given or get all the elements from the first set which does not ovelap with the second set (at the nucleotide level). [Category: Data Comparison]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input1",    dest="inputFileName1",   action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format1",   dest="format1",          action="store",                     type="string", help="format [compulsory] [format: transcript file format]")\n-    parser.add_option("-j", "--input2",    dest="inputFileName2",   action="store",      default=None,  type="string", help="reference file [format: file in transcript format given by -g]")\n-    parser.add_option("-g", "--format2",   dest="format2",          action="store",      default=None,  type="string", help="format of the reference file [format: transcript file format]")\n-    parser.add_option("-s", "--sequence",  dest="sequenceFileName", action="store",      default=None,  type="string", help="sequence file [format: file in FASTA format]")\n-    parser.add_option("-p", "--split",     dest="split",            action="store_true", default=False,                help="when comparing to a set of genomic coordinates, do not join [format: boolean] [default: False")\n-    parser.add_option("-o", "--output",    dest="outputFileName",   action="store",                     type="string", help="output file [format: output file in GFF3 format]")\n-    parser.add_option("-v", "--verbosity", dest="verbosity",        action="store",      default=1,     type="int",    help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-    getter = DifferenceGetter(options.verbosity)\n-    getter.setSplit(options.split)\n-    getter.setAnnotationFile(options.inputFileName1, options.format1)\n-    getter.setSequenceFile(options.sequenceFileName)\n-    getter.setReferenceFile(options.inputFileName2, options.format2)\n-    getter.setOutputFile(options.outputFileName)\n-    getter.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getDistance.py
--- a/SMART/Java/Python/getDistance.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,241 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""Get the distance between the transcripts of two lists"""\n-\n-import os\n-import sys\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-\n-class GetDistance(object):\n-\n-    def __init__(self, verbosity = 0):\n-        self.verbosity      = verbosity\n-        self.writer         = None\n-        self.spearman       = False\n-        self.tlc            = TranscriptListsComparator(None, self.verbosity)\n-        self.strands        = (0, )\n-        self.buckets        = None\n-        self.title          = ""\n-        self.xMin           = None\n-        self.xMax           = None\n-        self.proportion     = False\n-        self.outputFileName = None\n-        self.keep           = False\n-\n-    def __del__(self):\n-        pass\n-\n-    def setQueryFile(self, fileName, format):\n-        self.transcriptContainer1 = TranscriptContainer(fileName, format, self.verbosity)\n-        \n-    def setReferenceFile(self, fileName, format):\n-        self.transcriptContainer2 = TranscriptContainer(fileName, format, self.verbosity)\n-\n-    def setOutputFile(self, fileName):\n-        self.outputFileName = fileName\n-        \n-    def setOutputTranscriptFile(self, fileName):\n-        if fileName != None:\n-            self.writer = Gff3Writer(fileName, self.verbosity)\n-        \n-    def restrictQueryToStart(self, number):\n-        self.tlc.restrictToStart(self.tlc.QUERY, number)\n-\n-    def restrictReferenceToStart(self, number):\n-        self.tlc.restrictToStart(self.tlc.REFERENCE, number)\n-\n-    def restrictQueryToEnd(self, number):\n-        self.tlc.restrictToEnd(self.tlc.QUERY, number)\n-\n-    def restrictReferenceToEnd(self, number):\n-        self.tlc.restrictToEnd(self.tlc.REFERENCE, number)\n-\n-    def setAbsolute(self, boolean):\n-        self.tlc.setAbsolute(boolean)\n-\n-    def setProportion(self, boolean):\n-        self.proportion = boolean\n-\n-    def setColinear(self, boolean):\n-        self.tlc.getColinearOnly(boolean)\n-\n-    def setAntisense(self, boolean):\n-        self.tlc.getAntisenseOnly(boolean)\n-\n-    def setDistances(self, minDistance, maxDistance):\n-        self.tlc.setMinDistance(minDistance)\n-        self.tlc.setMaxDistance(maxDistance)\n-\n-    def setStrands(s'..b'"--start2",          dest="start2",          action="store",      default=None, type="int",    help="only consider the n first 5\' nucleotides for list 2 [format: int]")\n-    parser.add_option("-e", "--end1",            dest="end1",            action="store",      default=None, type="int",    help="only consider the n last 3\' nucleotides for list 1 [format: int]")\n-    parser.add_option("-E", "--end2",            dest="end2",            action="store",      default=None, type="int",    help="only consider the n last 3\' nucleotides for list 2 [format: int]")\n-    parser.add_option("-m", "--minDistance",     dest="minDistance",     action="store",      default=None, type="int",    help="minimum distance considered between two transcripts [format: int] [default: None]")\n-    parser.add_option("-M", "--maxDistance",     dest="maxDistance",     action="store",      default=1000, type="int",    help="maximum distance considered between two transcripts [format: int] [default: 1000]")\n-    parser.add_option("-5", "--fivePrime",       dest="fivePrime",       action="store_true", default=False,               help="consider the elements from list 1 which are upstream of elements of list 2 [format: bool] [default: False]")\n-    parser.add_option("-3", "--threePrime",      dest="threePrime",      action="store_true", default=False,               help="consider the elements from list 1 which are downstream of elements of list 2 [format: bool] [default: False]")\n-    parser.add_option("-u", "--buckets",         dest="buckets",         action="store",      default=None, type="int",    help="plot histogram instead of line plot with given interval size [format: int] [default: None]")\n-    parser.add_option("-2", "--2strands",        dest="twoStrands",      action="store_true", default=False,               help="plot the distributions of each strand separately [format: bool] [default: False]")\n-    parser.add_option("-r", "--spearman",        dest="spearman",        action="store_true", default=False,               help="compute Spearman rho [format: bool] [default: False]")\n-    parser.add_option("-x", "--xMin",            dest="xMin",            action="store",      default=None, type="int",    help="minimum value on the x-axis to plot [format: int] [default: None]")\n-    parser.add_option("-X", "--xMax",            dest="xMax",            action="store",      default=None, type="int",    help="maximum value on the x-axis to plot [format: int] [default: None]")\n-    parser.add_option("-t", "--title",           dest="title",           action="store",      default=None, type="string", help="title for the graph [format: int] [default: None]")\n-    parser.add_option("-v", "--verbosity",       dest="verbosity",       action="store",      default=1,    type="int",    help="trace level [format: int]")\n-    parser.add_option("-k", "--keep",            dest="keep",            action="store_true", default=False,               help="keep temporary files [format: bool]")\n-    (options, args) = parser.parse_args()\n-\n-    gd = GetDistance(options.verbosity)\n-    gd.setQueryFile(options.inputFileName1, options.format1)\n-    gd.setReferenceFile(options.inputFileName2, options.format2)\n-    gd.setOutputFile(options.outputFileName)\n-    gd.setOutputTranscriptFile(options.outputDistances)\n-    gd.setColinear(options.colinear)\n-    gd.setAntisense(options.antisense)\n-    gd.setAbsolute(options.absolute)\n-    gd.setProportion(options.proportion)\n-    gd.restrictQueryToStart(options.start1)\n-    gd.restrictReferenceToStart(options.start2)\n-    gd.restrictQueryToEnd(options.end1)\n-    gd.restrictReferenceToEnd(options.end2)\n-    gd.setDistances(options.minDistance, options.maxDistance)\n-    gd.setUpstream(options.fivePrime)\n-    gd.setDownstream(options.threePrime)\n-    gd.setStrands(options.twoStrands)\n-    gd.setBuckets(options.buckets)\n-    gd.setTitle(options.title)\n-    gd.setXValues(options.xMin, options.xMax)\n-    gd.keepTmpValues(options.keep)\n-    gd.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getDistribution.py
--- a/SMART/Java/Python/getDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,291 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""Get the repartition of some elements in a chromosomes"""\n-\n-import os\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.misc.Progress import Progress\n-from math import *\n-\n-def divideKeyDict(dictionary, ratio):\n-    return dict([(key / ratio, dictionary[key]) for key in dictionary])\n-\n-\n-def setTranscript(chromosome, direction, start, end, name, value):\n-    transcript = Transcript()\n-    transcript.setChromosome(chromosome)\n-    transcript.setDirection(direction)\n-    transcript.setStart(start)\n-    transcript.setEnd(end)\n-    transcript.setName(name)\n-    transcript.setTagValue("nbElements", value)\n-    return transcript\n-\n-\n-\n-if __name__ == "__main__":\n-    \n-    magnifyingFactor = 1000\n-    \n-    # parse command line\n-    description = "Get Distribution v1.0.1: Get the distribution of the genomic coordinates on a genome. [Category: Visualization]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input",       dest="inputFileName",     action="store",                           type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format",      dest="format",            action="store",                           type="string", help="format of the input file [compulsory] [format: transcript file format]")\n-    parser.add_option("-o", "--output",      dest="outputFileName",    action="store",                           type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-    parser.add_option("-r", "--reference",   dest="referenceFileName", action="store",      default=None,        type="string", help="file containing the genome [compulsory] [format: file in FASTA format]")\n-    parser.add_option("-n", "--nbBins",      dest="nbBins",            action="store",      default=1000,        type="int",    help="number of bins [default: 1000] [format: int]")\n-    parser.add_option("-2", "--bothStrands", dest="bothStrands",       action="store_true", default=False,                      help="plot one curve per strand [format: bool] [default: false]")\n-    parser.add_option("-w", "--raw",         dest="raw",         '..b'       plotter.addLine(divideKeyDict(densityPlus[chromosome], ratio))\n-            if options.raw:\n-                plotter.addLine(divideKeyDict(binsMinus[chromosome], ratio))\n-            else:\n-                plotter.addLine(divideKeyDict(densityMinus[chromosome], ratio))\n-        else:\n-            if options.raw:\n-                plotter.addLine(divideKeyDict(bins[chromosome], ratio))\n-            else:\n-                plotter.addLine(divideKeyDict(density[chromosome], ratio))\n-        plotter.plot()\n-        \n-    if options.csv:\n-        outputFileName = "%s" % (options.outputFileName)\n-        if options.chromosome != None:\n-            outputFileName += "_%s" % (options.chromosome)\n-        if options.start != None and options.end != None:\n-            outputFileName += ":%d-%d" % (options.start, options.end)\n-        outputFileName += ".csv"\n-        csvHandle = open(outputFileName, "w")\n-        for slice in range(start / sliceSize, maxSlice + 1):\n-            csvHandle.write(";%d-%d" % (slice * sliceSize + 1, (slice+1) * sliceSize))\n-        csvHandle.write("\\n")\n-        if options.bothStrands:\n-            for chromosome in densityPlus:\n-                if len(densityPlus[chromosome]) > 0:\n-                    csvHandle.write("%s [+]" % (chromosome))\n-                    for slice in sorted(densityPlus[chromosome].keys()):\n-                        csvHandle.write(";%.2f" % (densityPlus[chromosome][slice]))\n-                    csvHandle.write("\\n")            \n-                if len(densityMinus[chromosome]) > 0:\n-                    csvHandle.write("%s [-]" % (chromosome))\n-                    for slice in sorted(densityPlus[chromosome].keys()):\n-                        csvHandle.write(";%.2f" % (-densityMinus[chromosome][slice]))\n-                    csvHandle.write("\\n")            \n-        else:\n-            for chromosome in density:\n-                if len(density[chromosome]) > 0:\n-                    csvHandle.write(chromosome)\n-                    for slice in sorted(density[chromosome].keys()):\n-                        csvHandle.write(";%.2f" % (density[chromosome][slice]))\n-                    csvHandle.write("\\n")\n-        csvHandle.close()\n-             \n-    if options.gff:\n-        chromosome = "" if options.chromosome == None                         else options.chromosome.capitalize()\n-        start      = "" if options.start      == None                         else "%d" % (options.start)\n-        end        = "" if options.end        == None                         else "%d" % (options.end)\n-        link1      = "" if options.start      == None and options.end == None else ":"\n-        link2      = "" if options.start      == None and options.end == None else "-"\n-        writer     = Gff3Writer("%s%s%s%s%s.gff3" % (options.outputFileName, link1, start, link2, end), options.verbosity)\n-        cpt = 1\n-        if options.raw:\n-            valuesPlus  = binsPlus\n-            valuesMinus = binsMinus\n-            values      = bins\n-        else:\n-            valuesPlus  = densityPlus\n-            valuesMinus = densityMinus\n-            values      = density\n-        if options.bothStrands:\n-            for chromosome in values:\n-                for slice in valuesPlus[chromosome]:\n-                    writer.addTranscript(setTranscript(chromosome, 1, slice, slice + sliceSize, "region%d" % (cpt), valuesPlus[chromosome][slice]))\n-                    cpt += 1\n-                for slice in valuesMinus[chromosome]:\n-                    writer.addTranscript(setTranscript(chromosome, -1, slice, slice + sliceSize, "region%d" % (cpt), - valuesMinus[chromosome][slice]))\n-                    cpt += 1\n-        else:\n-            for chromosome in values:\n-                for slice in values[chromosome]:\n-                    writer.addTranscript(setTranscript(chromosome, 1, slice, slice + sliceSize, "region%d" % (cpt), values[chromosome][slice]))\n-                    cpt += 1\n-        writer.write()\n-\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getElement.py
--- a/SMART/Java/Python/getElement.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,106 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Get the first element (exon / intron) from a list of transcripts"""
-
-import os
-from optparse import OptionParser
-from commons.core.writer.Gff3Writer import *
-from SMART.Java.Python.structure.TranscriptContainer import *
-from SMART.Java.Python.misc.Progress import *
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Get Element v1.0.1: Get the first element (exon / intron) from a list of transcripts. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",         dest="inputFileName",    action="store",                                                type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",        dest="format",                 action="store",                                                type="string", help="format of file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",        dest="outputFileName", action="store",                                                type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-y", "--mysql",         dest="mysql",                    action="store_true", default=False,                                     help="mySQL output [format: bool] [default: false]")
-    parser.add_option("-t", "--type",            dest="type",                     action="store",                                                type="string", help="type of the element    [format: choice (exon, intron)]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",            action="store",            default=1,                type="int",        help="trace level [format: int]")
-    parser.add_option("-l", "--log",             dest="log",                        action="store_true", default=False,                                     help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    parser        = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
-    writer        = Gff3Writer(options.outputFileName, options.verbosity)
-    sqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)
-    
-    nbLines = parser.getNbTranscripts()
-    print "%i lines found" % (nbLines)
-
-    # treat transcripts
-    nbWritten = 0
-    nbUsed        = 0
-    progress    = Progress(nbLines, "Analyzing transcripts of " + options.inputFileName, options.verbosity)
-    for transcript in parser.getIterator():
-
-        outTranscript = Transcript()
-        outTranscript.setName(transcript.getName())
-        outTranscript.setDirection(transcript.getDirection())
-        outTranscript.setChromosome(transcript.getChromosome())
-        
-        if options.type == "exon":
-            if len(transcript.getExons()) > 1:
-                transcript.sortExons()
-                outTranscript.setStart(transcript.getExons()[0].getStart())
-                outTranscript.setEnd(transcript.getExons()[0].getEnd())
-                writer.addTranscript(outTranscript)
-                if options.mysql:
-                    sqlWriter.addTranscript(transcript)
-                nbWritten += 1
-                nbUsed        += 1
-        elif options.type == "intron":
-            used = False
-            for intron in transcript.getIntrons():
-                used = True
-                thisTranscript = Transcript()
-                thisTranscript.copy(outTranscript)
-                thisTranscript.setStart(intron.getStart())
-                thisTranscript.setEnd(intron.getEnd())
-                writer.addTranscript(thisTranscript)
-                if options.mysql:
-                    sqlWriter.addTranscript(transcript)
-                nbWritten += 1
-            if used:
-                nbUsed += 1
-        else:
-            sys.exit("Cannot understan type %s" % (options.type))
-        progress.inc()
-    progress.done()
-
-    if options.mysql:
-        sqlWriter.write()
-
-    print "nb sequences used: %d" % (nbUsed)
-    print "nb elements used: %d" % (nbWritten)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getExons.py
--- a/SMART/Java/Python/getExons.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,128 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from optparse import OptionParser
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.misc.Progress import Progress
-
-zeroBaseToOneBaseConvertor = (lambda x: x - 1 if x > 0 else x)
-
-class GetExons(object):
-
-    def __init__(self, verbosity):
-        self.verbosity = verbosity
-        self.selection = False
-
-    def setInputFile(self, fileName, format):
-        chooser = ParserChooser(self.verbosity)
-        chooser.findFormat(format)
-        self.parser = chooser.getParser(fileName)
-
-    def setSelection(self, selection):
-        if selection == None:
-            return
-        self.selection = True
-        self.selectionItems = []
-        self.selectionIntervals = []
-        for part in selection.split(","):
-            try:
-                splittedPart = map(int, part.split(".."))
-            except Exception:
-                raise Exception("Elements '" + splittedPart + "' of selection '" + selection + "' do no seem to be integers!")
-            if len(splittedPart) == 1:
-                self.selectionItems.append(splittedPart[0])
-            elif len(splittedPart) == 2:
-                self.selectionIntervals.append((splittedPart[0], splittedPart[1]))
-            else:
-                raise Exception("Cannot parse elements '" + splittedPart + "' of selection '" + selection + "'!")
-
-    def getSelectionExonIndices(self, nbExons):
-        if not self.selection:
-            return range(nbExons)
-        indices = []
-        for item in self.selectionItems:
-            indices.append(range(nbExons)[zeroBaseToOneBaseConvertor(item)])
-        for start, end in self.selectionIntervals:
-            start, end = map(zeroBaseToOneBaseConvertor, (start, end))
-            if end > 0:
-                end += 1
-            indices.extend(range(nbExons)[start:end])
-        return indices
-
-    def setOutputFile(self, fileName):
-        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
-        
-    def run(self):
-        progress = Progress(self.parser.getNbTranscripts(), "Reading input file", self.verbosity)
-        nbExons = 0
-        for cpt1, transcript in enumerate(self.parser.getIterator()):
-            selectedExons = self.getSelectionExonIndices(transcript.getNbExons())
-            transcript.sortExons()
-            for cpt2, exon in enumerate(transcript.getExons()):
-                if cpt2 not in selectedExons:
-                    continue
-                exonTranscript = Transcript()
-                exonTranscript.copy(exon)
-                if "Parent" in exonTranscript.tags:
-                    del exonTranscript.tags["Parent"]
-                exonTranscript.tags["feature"] = "transcript"
-                if "ID" not in exonTranscript.tags or exonTranscript.tags["ID"] == "unnamed transcript":
-                    exonTranscript.tags["ID"] = "exon_%d-%d" % (cpt1+1, cpt2+1)
-                if exonTranscript.getName() == "unnamed transcript":
-                    exonTranscript.setName("exon_%d-%d" % (cpt1+1, cpt2+1))
-                self.writer.addTranscript(exonTranscript)
-                nbExons += 1
-            progress.inc()
-        self.writer.write()
-        self.writer.close()
-        progress.done()
-        if self.verbosity > 1:
-            print "%d transcripts read" % (self.parser.getNbTranscripts())
-            print "%d exons written" % (nbExons)
-
-if __name__ == "__main__":
-    
-    description = "Get Exons v1.0.1: Get the exons of a set of transcripts. [Category: Data Modification]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of file [compulsory] [format: transcript file format]")
-    parser.add_option("-s", "--select",    dest="select",         action="store", default=None, type="string", help="select some of the exons (like '1,2,5..-3,-1') [format: string]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [format: output file in GFF3 format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    ge = GetExons(options.verbosity)
-    ge.setInputFile(options.inputFileName, options.format)
-    ge.setSelection(options.select)
-    ge.setOutputFile(options.outputFileName)
-    ge.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getInfoPerCoverage.py
--- a/SMART/Java/Python/getInfoPerCoverage.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,167 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""Compare overlap of a transcript list and list of read, and get some info depending on the coverage"""\n-\n-import os\n-from optparse import OptionParser\n-from commons.core.parsing.SequenceListParser import *\n-from commons.core.writer.Gff3Writer import *\n-from SMART.Java.Python.mySql.MySqlConnection import *\n-from SMART.Java.Python.structure.TranscriptListsComparator import *\n-from SMART.Java.Python.misc.RPlotter import *\n-from SMART.Java.Python.misc.Progress import *\n-\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Get Info per Coverage v1.0.1: Get a list of information clustered by the density of the coverage on a genome. [Category: Personnal]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input1",                     dest="inputFileName1", action="store",                                         type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format1",                    dest="format1",                action="store",                                         type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n-    parser.add_option("-j", "--input2",                     dest="inputFileName2", action="store",                                         type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n-    parser.add_option("-g", "--format2",                    dest="format2",                action="store",                                         type="string", help="format of file 2 [compulsory] [format: transcript file format]")\n-    parser.add_option("-o", "--output",                     dest="output",                 action="store",            default=None,    type="string", help="output file [compulsory] [format: output file in TXT format]")\n-    parser.add_option("-v", "--verbosity",                dest="verbosity",            action="store",            default=1,         type="int",        help="trace level [format: int]")\n-    parser.add_option("-l", "--log",                            dest="log",                        action="store",            default=None,    type="string", help="write a log file [format: bool] [default: false]")\n-    (options, args) = parser.parse_args()\n-\n-    logHandle = None\n-    if options.log != None:\n-        logHandle = open(options.log, "w")\n-  '..b'script.name]\n-            else:\n-                averageSizesWithoutIntrons[transcript.getSize()] += coverages[transcript.name]\n-            if transcript.getSize() not in sumSizesWithoutIntrons:\n-                sumSizesWithoutIntrons[transcript.getSize()] = 1\n-            else:\n-                sumSizesWithoutIntrons[transcript.getSize()] += 1\n-            if transcript.getNbExons() not in averageNbExons:\n-                averageNbExons[transcript.getNbExons()] = coverages[transcript.name]\n-            else:\n-                averageNbExons[transcript.getNbExons()] += coverages[transcript.name]\n-            if transcript.getNbExons() not in sumSizesNbExons:\n-                sumSizesNbExons[transcript.getNbExons()] = 1\n-            else:\n-                sumSizesNbExons[transcript.getNbExons()] += 1\n-            sizesWithIntrons[transcript.name]        = (transcript.getSizeWithIntrons(), coverages[transcript.name])\n-            sizesWithoutIntrons[transcript.name] = (transcript.getSize(), coverages[transcript.name])\n-            nbExons[transcript.name]                         = (transcript.getNbExons(), coverages[transcript.name])\n-        progress.inc()\n-    progress.done()\n-        \n-    plotterSizeWithIntrons = RPlotter("%sWithIntrons.png" % (options.output), options.verbosity)\n-    plotterSizeWithIntrons.setPoints(True)\n-    plotterSizeWithIntrons.setMaximumX(10000)\n-    plotterSizeWithIntrons.setMaximumY(1000)    \n-    plotterSizeWithIntrons.setLog("y")\n-    plotterSizeWithIntrons.addLine(sizesWithIntrons)\n-    plotterSizeWithIntrons.plot()\n-    \n-    plotterSizeWithoutIntrons = RPlotter("%sWithoutIntrons.png" % (options.output), options.verbosity)\n-    plotterSizeWithoutIntrons.setPoints(True)\n-    plotterSizeWithoutIntrons.setMaximumX(10000)    \n-    plotterSizeWithoutIntrons.setMaximumY(1000)\n-    plotterSizeWithoutIntrons.setLog("y")\n-    plotterSizeWithoutIntrons.addLine(sizesWithoutIntrons)\n-    plotterSizeWithoutIntrons.plot()\n-    \n-    plotterNbExons = RPlotter("%sNbExons.png" % (options.output), options.verbosity)\n-    plotterNbExons.setPoints(True)\n-    plotterNbExons.addLine(nbExons)\n-    plotterNbExons.plot()\n-    \n-    for element in averageSizesWithIntrons:\n-        averageSizesWithIntrons[element] = int(float(averageSizesWithIntrons[element]) / sumSizesWithIntrons[element])\n-    plotterAverageSizeWithIntrons = RPlotter("%sAverageWithIntrons.png" % (options.output), options.verbosity)\n-    plotterAverageSizeWithIntrons.setMaximumX(10000)\n-    plotterAverageSizeWithIntrons.setMaximumY(1000)    \n-    plotterAverageSizeWithIntrons.setLog("y")\n-    plotterAverageSizeWithIntrons.addLine(averageSizesWithIntrons)\n-    plotterAverageSizeWithIntrons.plot()\n-    print "min/avg/med/max sizes with introns: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(averageSizesWithIntrons)\n-\n-    for element in averageSizesWithoutIntrons:\n-        averageSizesWithoutIntrons[element] = int(float(averageSizesWithoutIntrons[element]) / sumSizesWithoutIntrons[element])\n-    plotterAverageSizeWithoutIntrons = RPlotter("%sAverageWithoutIntrons.png" % (options.output), options.verbosity)\n-    plotterAverageSizeWithoutIntrons.setMaximumX(10000)\n-    plotterAverageSizeWithoutIntrons.setMaximumY(1000)    \n-    plotterAverageSizeWithoutIntrons.setLog("y")\n-    plotterAverageSizeWithoutIntrons.addLine(averageSizesWithoutIntrons)\n-    plotterAverageSizeWithoutIntrons.plot()\n-    print "min/avg/med/max sizes without introns: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(averageSizesWithoutIntrons)\n-\n-    for element in averageNbExons:\n-        averageNbExons[element] = int(float(averageNbExons[element]) / sumSizesNbExons[element])\n-    plotterAverageNbExons = RPlotter("%sAverageNbExons.png" % (options.output), options.verbosity)\n-    plotterAverageNbExons.addLine(averageNbExons)\n-    plotterAverageNbExons.plot()\n-    print "min/avg/med/max # exons: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(averageNbExons)\n-\n-    if options.log:\n-        logHandle.close()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getIntrons.py
--- a/SMART/Java/Python/getIntrons.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,89 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from optparse import OptionParser
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.misc.Progress import Progress
-
-class GetIntrons(object):
-
-    def __init__(self, verbosity):
-        self.verbosity = verbosity
-
-    def setInputFile(self, fileName, format):
-        chooser = ParserChooser(self.verbosity)
-        chooser.findFormat(format)
-        self.parser = chooser.getParser(fileName)
-
-    def setOutputFile(self, fileName):
-        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
-        
-    def run(self):
-        progress  = Progress(self.parser.getNbTranscripts(), "Reading input file", self.verbosity)
-        nbIntrons = 0
-        for cpt1, transcript in enumerate(self.parser.getIterator()):
-            for cpt2, intron in enumerate(transcript.getIntrons()):
-                intronTranscript = Transcript()
-                intronTranscript.copy(intron)
-                if "Parent" in intronTranscript.tags:
-                    del intronTranscript.tags["Parent"]
-                intronTranscript.tags["feature"] = "transcript"
-                if "ID" not in intronTranscript.tags or intronTranscript.tags["ID"] == "unnamed transcript":
-                    intronTranscript.tags["ID"] = "intron_%d-%d" % (cpt1+1, cpt2+1)
-                if intronTranscript.getName() == "unnamed transcript":
-                    intronTranscript.setName("intron_%d-%d" % (cpt1+1, cpt2+1))
-                self.writer.addTranscript(intronTranscript)
-                nbIntrons += 1
-            progress.inc()
-        self.writer.write()
-        self.writer.close()
-        progress.done()
-        if self.verbosity > 1:
-            print "%d transcripts read" % (self.parser.getNbTranscripts())
-            print "%d introns written" % (nbIntrons)
-
-
-if __name__ == "__main__":
-    
-    description = "Get Introns v1.0.1: Get the introns of a set of transcripts. [Category: Data Modification]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",             type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",             type="string", help="format of file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",             type="string", help="output file [format: output file in GFF3 format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",  default=1, type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    gi = GetIntrons(options.verbosity)
-    gi.setInputFile(options.inputFileName, options.format)
-    gi.setOutputFile(options.outputFileName)
-    gi.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getLetterDistribution.py
--- a/SMART/Java/Python/getLetterDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,153 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Get the size distribution of a Fasta / BED file"""
-
-import os
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import *
-from SMART.Java.Python.misc.Progress import *
-from SMART.Java.Python.misc.RPlotter import *
-from commons.core.parsing.ParserChooser import ParserChooser
-
-
-def writeCVSfile(outHandler):
-    for pos in range(len(letters)):
-        posTrue = pos +1
-        outHandler.write( "%s;" % (posTrue))
-        for letter in lettersRate:
-            if positionRate[letter].has_key(pos):
-                outHandler.write("%s=%.2f%s;" %(letter, positionRate[letter][pos], "%"))
-            else:
-                outHandler.write("%s=0%s;" % (letter, "%"))
-        outHandler.write("\n")
-
-if __name__ == "__main__":
-
-    # parse command line
-    description = "Get Letter Distribution v1.0.1: Compute the distribution of nucleotides of a set of genomic coordinates. [Category: Visualization]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file to be analyzed [compulsory] [format: file in sequence format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of file [format: sequence file format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in PNG format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
-    parser.add_option("-c", "--csv",       dest="csv",            action="store_true", default=False,                help="write a .csv file [format: bool] [default: false]")
-    parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    chooser = ParserChooser()
-    chooser.findFormat(options.format)    
-    parser      = chooser.getParser(options.inputFileName)
-    nbSequences = parser.getNbSequences()
-    print "%i sequences read" % (nbSequences)
-
-    # treat items
-    progress       = Progress(nbSequences, "Analyzing sequences of " + options.inputFileName, options.verbosity)
-    nbLettersTotal = 0
-    nbLetters      = {}
-    lettersRate    = {}
-    nbPositions    = {}
-    positionCount  = {}
-    positionRate   = {}
-    nbPositionRate = {}
-    for sequence in parser.getIterator():
-        letters            = sequence.getSequence()
-        thisNbLettersTotal = sequence.getSize()
-        nbLettersTotal    += thisNbLettersTotal
-        thisNbLetters      = {}
-        
-        for pos in range(len(letters)):
-            letter = letters[pos]
-            if letter not in thisNbLetters:
-                thisNbLetters[letter] = 1
-            else:
-                thisNbLetters[letter] += 1
-            if pos+1 not in nbPositions:
-                nbPositions[pos+1] = 1
-            else:
-                nbPositions[pos+1] += 1
-            if letter not in positionCount:
-                positionCount[letter] = {}
-            if pos+1 not in positionCount[letter]:
-                positionCount[letter][pos+1] = 1
-            else:
-                positionCount[letter][pos+1] += 1
-
-        for letter in thisNbLetters:
-            if letter not in nbLetters:
-                nbLetters[letter] = thisNbLetters[letter]
-            else:
-                nbLetters[letter] += thisNbLetters[letter]
-            if letter not in lettersRate:
-                lettersRate[letter] = {}
-            rate = int(float(thisNbLetters[letter]) / thisNbLettersTotal * 100)
-            if rate not in lettersRate[letter]:
-                lettersRate[letter][rate] = 1
-            else:
-                lettersRate[letter][rate] += 1
-        progress.inc()
-    progress.done()
-    
-    for letter in positionCount:
-        positionRate[letter] = {}
-        for pos in positionCount[letter]:
-            positionRate[letter][pos] = positionCount[letter][pos] / float(nbPositions[pos]) * 100
-    for pos in nbPositions:
-        nbPositionRate[pos] = nbPositions[pos] / float(nbPositions[1]) * 100
-
-    # plot content distributions
-    plotter = RPlotter("%s.png" % (options.outputFileName), options.verbosity, True)
-    plotter.setFill(0)
-    plotter.setLegend(True)
-    for letter in lettersRate:
-        plotter.addLine(lettersRate[letter], letter)
-    plotter.plot()
-    
-    # plot distribution per position
-    plotter = RPlotter("%sPerNt.png" % (options.outputFileName), options.verbosity, True)
-    plotter.setFill(0)
-    plotter.setLegend(True)
-    plotter.setXLabel("Position on the read")
-    plotter.setYLabel("Percentage")
-    for letter in positionRate:
-        plotter.addLine(positionRate[letter], letter)
-    plotter.addLine(nbPositionRate, "#")
-    plotter.plot()
-
-    if options.csv:
-        outHandler = open("%s.csv" % (options.outputFileName), "w")
-        writeCVSfile(outHandler)
-        outHandler.close() 

-    print "%d sequences" % (nbSequences)
-    print "%d letters" % (nbLettersTotal)
-    for letter in nbLetters:
-        print "%s: %d (%.2f%%)" % (letter, nbLetters[letter], float(nbLetters[letter]) / nbLettersTotal * 100)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getNb.py
--- a/SMART/Java/Python/getNb.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,99 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Get the repartition of some elements (# exons per transcripts, # of repetitions of a mapping or # of transcripts in a cluster)"""
-
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.misc.RPlotter import RPlotter
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc import Utils
-from math import *
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Get Nb v1.0.1: Get the distribution of exons per transcripts, or mapping per read, or transcript per cluster. [Category: Visualization]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in png format]")
-    parser.add_option("-q", "--query",     dest="query",          action="store",                     type="string", help="query  [compulsory] (# exons, # transcripts) [format: choice (exon, transcript, cluster)]")    
-    parser.add_option("-b", "--barplot",   dest="barplot",        action="store_true", default=False,                help="use barplot representation [format: bool] [default: false]")
-    parser.add_option("-x", "--xMax",      dest="xMax",           action="store",      default=None,  type="int",    help="maximum value on the x-axis to plot [format: int]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [default: 1] [format: int]")
-    parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    if options.query != "exon" and options.query != "transcript" and options.query != "cluster":
-        raise Exception("Do not understand query %s" % (options.query))
-
-    exonDistribution       = {}
-    transcriptDistribution = {}
-    clusterDistribution    = {}
-    
-    transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
-        
-    progress = Progress(transcriptContainer.getNbTranscripts(), "Parsing %s" % (options.inputFileName), options.verbosity)
-    # count the number of reads
-    for element in transcriptContainer.getIterator():
-        if options.query == "exon":
-            nbExons = element.getNbExons()
-            exonDistribution[nbExons] = exonDistribution.get(nbExons, 0) + 1
-        elif options.query == "transcript":
-            name = element.getName()
-            transcriptDistribution[name] = transcriptDistribution.get(name, 0) + 1
-        elif options.query == "cluster":
-            nbElements = 1 if "nbElements" not in element.getTagNames() else element.getTagValue("nbElements")
-            clusterDistribution[nbElements] = clusterDistribution.get(nbElements, 0) + 1
-        progress.inc()
-    progress.done()
-    
-    if options.query == "exon":
-        distribution = exonDistribution
-    elif options.query == "transcript":
-        distribution = {}
-        for name in transcriptDistribution:
-            distribution[transcriptDistribution[name]] = distribution.get(transcriptDistribution[name], 0) + 1
-    elif options.query == "cluster":
-        distribution = clusterDistribution
-    
-    outputFileName = options.outputFileName
-    plotter = RPlotter(outputFileName, options.verbosity)
-    plotter.setImageSize(1000, 300)
-    plotter.setFill(0)
-    plotter.setMaximumX(options.xMax)
-    plotter.setBarplot(options.barplot)
-    plotter.addLine(distribution)
-    plotter.plot()
-             
-    print "min/avg/med/max: %d/%.2f/%.1f/%d" % (Utils.getMinAvgMedMax(distribution))
-            
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getRandomRegions.py
--- a/SMART/Java/Python/getRandomRegions.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,267 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""Find random regions in a genome"""\n-\n-import random, math\n-from optparse import OptionParser\n-from commons.core.parsing.FastaParser import *\n-from commons.core.writer.Gff3Writer import *\n-from commons.core.writer.MySqlTranscriptWriter import *\n-from SMART.Java.Python.misc.Progress import *\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-\n-repetitions = 100\n-\n-\n-class RandomRegionsGenerator(object):\n-\n-    def __init__(self, verbosity):\n-        self.verbosity      = verbosity\n-        self.strands        = False\n-        self.distribution   = "uniform"\n-        self.transcripts    = None\n-        self.sequenceParser = None\n-        random.seed()\n-\n-\n-    def setInput(self, fileName):\n-        self.sequenceParser = FastaParser(fileName, self.verbosity)\n-\n-\n-    def setGenomeSize(self, size):\n-        self.genomeSize = size\n-\n-\n-    def setChromosomeName(self, name):\n-        self.chromosomeName = name\n-\n-\n-    def setAnnotation(self, fileName, format):\n-        parser           = TranscriptContainer(fileName, format, self.verbosity)\n-        self.transcripts = []\n-        for transcript in parser.getIterator():\n-            self.transcripts.append(transcript)\n-        self.setNumber(len(self.transcripts))\n-        self.setSize(0)\n-\n-\n-    def setOutputFile(self, fileName):\n-        self.outputFileName = fileName\n-\n-\n-    def setSize(self, size):\n-        self.minSize = size\n-        self.maxSize = size\n-\n-\n-    def setMinSize(self, size):\n-        self.minSize = size\n-\n-\n-    def setMaxSize(self, size):\n-        self.maxSize = size\n-\n-\n-    def setNumber(self, number):\n-        self.number = number\n-\n-\n-    def setStrands(self, strands):\n-        self.strands = strands\n-\n-\n-    def setMaxDistribution(self, maxElements):\n-        if maxElements == None:\n-            return\n-        self.maxElements = maxElements\n-        self.distribution = "gaussian"\n-\n-\n-    def setDeviationDistribution(self, deviation):\n-        if deviation == None:\n-            return\n-        self.deviation = deviation\n-        self.distribution = "gaussian"\n-\n-\n-    def getSizes(self):\n-        if self.sequenceParser == None:\n-            self.chromosomes    = [self.chromosomeName]\n-            self.sizes          = {self.chromosomeName: self.genomeSize}\n-            self.cumulatedSize  = self.genomeSize\n-            self.cumulatedSizes = {'..b'n(self):\n-        self.getSizes()\n-        self.writeRegions()\n-\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Get Random Regions v1.0.2: Get some random coordinates on a genome. May use uniform or gaussian distribution (in gaussion distribution, # of element per cluster follows a power law). [Category: Other]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-r", "--reference",     dest="reference",      action="store",      default=None,  type="string", help="file that contains the sequences [format: file in FASTA format]")\n-    parser.add_option("-S", "--referenceSize", dest="referenceSize",  action="store",      default=None,  type="int",    help="size of the chromosome (when no reference is given) [format: int]")\n-    parser.add_option("-c", "--chromosome",    dest="chromosome",     action="store",      default=None,  type="string", help="name of the chromosome (when no reference is given) [format: string]")\n-    parser.add_option("-o", "--output",        dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in FASTA format]")\n-    parser.add_option("-i", "--input",         dest="inputFileName",  action="store",      default=None,  type="string", help="optional file containing regions to shuffle [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format",        dest="format",         action="store",      default=None,  type="string", help="format of the previous file [format: transcript file format]")\n-    parser.add_option("-s", "--size",          dest="size",           action="store",      default=None,  type="int",    help="size of the regions (if no region set is provided) [format: int]")\n-    parser.add_option("-z", "--minSize",       dest="minSize",        action="store",      default=None,  type="int",    help="minimum size of the regions (if no region set nor a fixed size are provided) [format: int]")\n-    parser.add_option("-Z", "--maxSize",       dest="maxSize",        action="store",      default=None,  type="int",    help="maximum size of the regions (if no region set nor a fixed size are provided) [format: int]")\n-    parser.add_option("-n", "--number",        dest="number",         action="store",      default=None,  type="int",    help="number of regions (if no region set is provided) [format: int]")\n-    parser.add_option("-t", "--strands",       dest="strands",        action="store_true", default=False,                help="use both strands (if no region set is provided) [format: boolean]")\n-    parser.add_option("-m", "--max",           dest="max",            action="store",      default=None,  type="int",    help="max. # reads in a cluster (for Gaussian dist.) [format: int]")\n-    parser.add_option("-d", "--deviation",     dest="deviation",      action="store",      default=None,  type="int",    help="deviation around the center of the cluster (for Gaussian dist.) [format: int]")\n-    parser.add_option("-v", "--verbosity",     dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-    rrg = RandomRegionsGenerator(options.verbosity)\n-    if options.reference == None:\n-        rrg.setGenomeSize(options.referenceSize)\n-        rrg.setChromosomeName(options.chromosome)\n-    else:\n-        rrg.setInput(options.reference)\n-    rrg.setOutputFile(options.outputFileName)\n-    if options.inputFileName == None:\n-        if options.size != None:\n-            rrg.setSize(options.size)\n-        else:\n-            rrg.setMinSize(options.minSize)\n-            rrg.setMaxSize(options.maxSize)\n-        rrg.setNumber(options.number)\n-        rrg.setStrands(options.strands)\n-    else:\n-        rrg.setAnnotation(options.inputFileName, options.format)\n-    rrg.setMaxDistribution(options.max)\n-    rrg.setDeviationDistribution(options.deviation)\n-    rrg.run()\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getReadDistribution.py
--- a/SMART/Java/Python/getReadDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,129 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Plot the data from the data files
-"""
-import os
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import FastaParser
-from commons.core.parsing.FastqParser import FastqParser
-from SMART.Java.Python.misc.RPlotter import RPlotter
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc import Utils
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Get Read Distribution v1.0.1: Plot the number of identical reads and give the most represented. [Category: Visualization]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file sequence [compulsory] [format: file in sequence format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of the file [compulsory] [format: sequence file format]")
-    parser.add_option("-n", "--number",    dest="number",         action="store", default=None, type="int",    help="keep the best n    [format: int]")
-    parser.add_option("-p", "--percent",   dest="percent",        action="store", default=None, type="float",  help="keep the best n\% [format: float]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output files in PNG format and txt format]")
-    parser.add_option("-x", "--xMax",      dest="xMax",           action="store", default=None, type="int",    help="maximum value on the x-axis to plot [format: int]")
-    parser.add_option("-D", "--directory", dest="working_Dir",    action="store", default=os.getcwd(), type="string", help="the directory to store the results [format: directory]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    if options.working_Dir[-1] != '/':
-        options.outputFileName = options.working_Dir + '/' + options.outputFileName
-        
-    if options.format == "fasta":
-        parser = FastaParser(options.inputFileName, options.verbosity)
-    elif options.format == "fastq":
-        parser = FastqParser(options.inputFileName, options.verbosity)
-    else:
-        raise Exception("Do not understand '%s' file format." % (options.format))
-
-    progress  = Progress(parser.getNbSequences(), "Reading %s" % (options.inputFileName), options.verbosity)
-    sequences = {}
-    for sequence in parser.getIterator():
-        sequence = sequence.sequence
-        if sequence not in sequences:
-            sequences[sequence] = 1
-        else:
-            sequences[sequence] += 1
-        progress.inc()
-    progress.done()
-
-    values = sequences.values()
-    values.sort()
-    if options.percent != None:
-        threshold = values[int(float(options.percent) / 100 * len(values))]
-    elif options.number != None:
-        threshold = values[-options.number]
-    else:
-        threshold = 0
-
-    # sort by value
-    progress     = Progress(parser.getNbSequences(), "Sorting values", options.verbosity)
-    sortedValues = dict([(value, []) for value in sequences.values()])
-    for sequence, value in sequences.iteritems():
-        sortedValues[value].append(sequence)
-        progress.inc()
-    progress.done()
-
-    outputFileName = "%s.txt" % (options.outputFileName)
-    handle         = open(outputFileName, "w")
-    progress       = Progress(parser.getNbSequences(), "Writing into %s" % (outputFileName), options.verbosity)
-    for value in reversed(sorted(sortedValues.keys())):
-        if value >= threshold:
-            for sequence in sortedValues[value]:
-                handle.write("%s\t%d\n" % (sequence, value))
-        progress.inc()
-    progress.done()
-    handle.close()
-
-    line     = {}
-    progress = Progress(len(values), "Preparing plot", options.verbosity)
-    for value in values:
-        if value not in line:
-            line[value] = 1
-        else:
-            line[value] += 1
-        progress.inc()
-    progress.done()
-
-    plot = RPlotter("%s.png" % (options.outputFileName), options.verbosity)
-    plot.setFill(0)
-    plot.setMaximumX(options.xMax)
-    plot.setXLabel("# occurrences")
-    plot.setYLabel("# reads")
-    plot.addLine(line)
-    plot.plot()
-
-    if options.verbosity > 0:
-        print "%d/%.2f/%.1f/%d occurrences" % (Utils.getMinAvgMedMax(line))
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getSequence.py
--- a/SMART/Java/Python/getSequence.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,60 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Get a given sequence in a multi-Fasta file"""
-import sys
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import FastaParser
-from SMART.Java.Python.misc.Progress import Progress
-from commons.core.writer.FastaWriter import FastaWriter
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Get Sequence v1.0.1: Get a single sequence in a FASTA file. [Category: Data Selection]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input", dest="inputFileName",action="store",type="string", help="multi-FASTA file [compulsory] [format: file in FASTA format]")
-    parser.add_option("-n", "--name",dest="name",action="store",type="string", help="name of the sequence [compulsory] [format: string]")
-    parser.add_option("-o", "--output",dest="outputFileName",action="store",type="string", help="output sequence file (FASTA) [compulsory] [format: file in FASTA format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",action="store",default=1,type="int",help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    # read Fasta file
-    sequenceListParser = FastaParser(options.inputFileName, options.verbosity)
-    for sequence in sequenceListParser.getIterator():
-        name = sequence.name.split(" ")[0]
-        if name == options.name:
-            writer = FastaWriter(options.outputFileName, options.verbosity)
-            writer.addSequence(sequence)
-            print sequence.printFasta(),
-            sys.exit(0)
-    writer.close()
-    print "No sequence found"
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getSizes.py
--- a/SMART/Java/Python/getSizes.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,218 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os, sys\n-from optparse import OptionParser\n-from commons.core.parsing.FastaParser import FastaParser\n-from commons.core.parsing.FastqParser import FastqParser\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from commons.core.parsing.GffParser import GffParser\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.misc import Utils\n-\n-from commons.core.LoggerFactory import LoggerFactory\n-from commons.core.utils.RepetOptionParser import RepetOptionParser\n-\n-LOG_DEPTH = "smart"\n-\n-class GetSizes(object):\n-\t\n-\tdef __init__(self, inFileName = None, inFormat=None, outFileName = None, query=None,xMax=None, xMin=None, verbosity = 0):\n-\t\tself.inFileName = inFileName\n-\t\tself.inFormat= inFormat\n-\t\tself.outFileName = outFileName\n-\t\tself.query = query\n-\t\tself.xMax = xMax\n-\t\tself.xMin = xMin\n-\t\tself.xLab = "Size"\n-\t\tself.yLab = "# reads"\n-\t\tself.barplot = False\n-\t\tself._verbosity = verbosity\n-\t\tself.parser = None\n-\t\tself._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)\n-\t\t\n-\tdef setAttributesFromCmdLine(self):\n-\t\tdescription = "Usage: getSizes.py [options]\\n\\nGet Sizes v1.0.2: Get the sizes of a set of genomic coordinates. [Category: Visualization]\\n"\n-\t\tepilog = ""\n-\t\tparser = RepetOptionParser(description = description, epilog = epilog)\n-\t\tparser.add_option("-i", "--input",\t dest="inputFileName", action="store",\t    default=None,\t   type="string", help="input file [compulsory] [format: file in transcript or sequence format given by -f]")\n-\t\tparser.add_option("-f", "--format",\tdest="format",\t\t   action="store",\t    default=None,\t   type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")\n-\t\tparser.add_option("-q", "--query",\t dest="query",\t\t   action="store",\t    default=None,\t   type="string", help="type to mesure [default: size] [format: choice (size, intron size, exon size, 1st exon size)]")\t \n-\t\tparser.add_option("-o", "--output",\tdest="outputFileName", action="store",\t    default=None,\t   type="string", help="output file [format: output file in PNG format]")\n-\t\tparser.add_option("-x", "--xMax",\t  dest="xMax",\t\t   action="store",\t    default=None,\t   type="int",\t  help="maximum value on the x-axis to plot [format: int]")\n-\t\tparser.add_option("-X", "--xMin",\t  dest="xMin",\t\t   action="store",\t    default=None,\t '..b'="store_true", default=False,\t\t\t\t\t  help="use barplot representation [format: bool] [default: false]")  \n-\t\toptions = parser.parse_args()[0]\n-\t\tself._setAttributesFromOptions(options)\n-\t\t\n-\tdef _setAttributesFromOptions(self, options):\n-\t\tself.setInFileName(options.inputFileName)\n-\t\tself.setInFormat(options.format)\n-\t\tself.setQuery(options.query)\n-\t\tself.setOutFileName(options.outputFileName)\n-\t\tself.setXMax(options.xMax)\n-\t\tself.setXMin(options.xMin)\n-\t\tself.setxLab(options.xLab)\n-\t\tself.setyLab(options.yLab)\n-\t\tself.setBarplot(options.barplot)\n-\t\tself.setVerbosity(options.verbosity)\n-\t\t\n-\tdef setInFileName(self, inputFileName):\n-\t\tself.inFileName = inputFileName\n-\t\t\n-\tdef setInFormat(self, inFormat):\n-\t\tself.inFormat = inFormat\n-\t\n-\tdef setQuery(self, query):\n-\t\tself.query = query\n-\t\t\n-\tdef setOutFileName(self, outFileName):\n-\t\tself.outFileName = outFileName\n-\t\n-\tdef setXMax(self, xMax):\n-\t\tself.xMax = xMax\n-\t\t\n-\tdef setXMin(self, xMin):\n-\t\tself.xMin = xMin\n-\t\n-\tdef setxLab(self, xLab):\n-\t\tself.xLab = xLab\n-\t\t\n-\tdef setyLab(self, yLab):\n-\t\tself.yLab = yLab\n-\t\t\n-\tdef setBarplot(self, barplot):\n-\t\tself.barplot = barplot\n-\t\t\n-\tdef setVerbosity(self, verbosity):\n-\t\tself._verbosity = verbosity\n-\t\t\n-\tdef _checkOptions(self):\n-\t\tif self.inFileName == None:\n-\t\t\tself._logAndRaise("ERROR: Missing input file name")\n-\t\tif self.inFormat == "fasta":\n-\t\t\tself.parser = FastaParser(self.inFileName, self._verbosity)\n-\t\telif self.inFormat == "fastq":\n-\t\t\tself.parser = FastqParser(self.inFileName, self._verbosity)\n-\t\telse:\n-\t\t\tself.parser = TranscriptContainer(self.inFileName, self.inFormat, self._verbosity)\n-\t\t\t\n-\tdef _logAndRaise(self, errorMsg):\n-\t\tself._log.error(errorMsg)\n-\t\traise Exception(errorMsg)\n-\n-\tdef run(self):\n-\t\tLoggerFactory.setLevel(self._log, self._verbosity)\n-\t\tself._checkOptions()\n-\t\tself._log.info("START getsizes")\n-\t\tself._log.debug("Input file name: %s" % self.inFileName)\n-\n-\t\tnbItems = self.parser.getNbItems()\n-\t\tself._log.info( "%i items found" % (nbItems))\n-\t\t\n-\t\t# treat items\n-\t\tprogress   = Progress(nbItems, "Analyzing sequences of %s" % (self.inFileName), self._verbosity)\n-\t\tsizes      = {}\n-\t\tminimum\t   = 1000000000000\n-\t\tmaximum\t   = 0\n-\t\tsum\t\t   = 0\n-\t\tnumber     = 0\n-\t\tnbSubItems = 0\n-\t\tfor item in self.parser.getIterator():\n-\t\t\titems = []\n-\t\t\tif self.query == "exon":\n-\t\t\t\titems = item.getExons()\n-\t\t\telif self.query == "exon1":\n-\t\t\t\tif len(item.getExons()) > 1:\n-\t\t\t\t\titem.sortExons()\n-\t\t\t\t\titems = [item.getExons()[0]]\n-\t\t\telif self.query == "intron":\n-\t\t\t\titems = item.getIntrons()\n-\t\t\telse:\n-\t\t\t\titems = [item, ]\n-\t\n-\t\t\tfor thisItem in items:\n-\t\t\t\ttry:\n-\t\t\t\t\tnbElements = int(float(thisItem.getTagValue("nbElements")))\n-\t\t\t\t\tif nbElements == None:\n-\t\t\t\t\t\tnbElements = 1\n-\t\t\t\texcept:\n-\t\t\t\t\tnbElements = 1\n-\t\t\t\tsize\t= thisItem.getSize()\n-\t\t\t\tminimum = min(minimum, size)\n-\t\t\t\tmaximum = max(maximum, size)\n-\t\t\t\t\n-\t\t\t\tif size not in sizes:\n-\t\t\t\t\tsizes[size] = nbElements\n-\t\t\t\telse:\n-\t\t\t\t\tsizes[size] += nbElements\n-\t\t\t\tsum\t\t+= size\n-\t\t\t\tnbSubItems += nbElements\n-\t\t\tnumber += 1\n-\t\t\tprogress.inc()\n-\t\tprogress.done()\n-\n-\t\tif self.outFileName != None:\n-\t\t\tplotter = RPlotter(self.outFileName, self._verbosity)\n-\t\t\tplotter.setFill(0)\n-\t\t\tplotter.setMinimumX(self.xMin)\n-\t\t\tplotter.setMaximumX(self.xMax)\n-\t\t\tplotter.setXLabel(self.xLab)\n-\t\t\tplotter.setYLabel(self.yLab)\n-\t\t\tplotter.setBarplot(self.barplot)\n-\t\t\tplotter.addLine(sizes)\n-\t\t\tplotter.plot()\n-\t\t\t\n-\t\tif nbSubItems == 0:\n-\t\t\tself._logAndRaise("No item found")\n-\t\t\t\n-\t\tself.items = number\t  \n-\t\tself.subItems = nbSubItems\n-\t\tself.nucleotides = sum\n-\t\tself.minAvgMedMax = Utils.getMinAvgMedMax(sizes)\n-\t\t\t\t  \n-\t\tprint "%d items" % (number)\n-\t\tprint "%d sub-items" % (nbSubItems)\n-\t\tprint "%d nucleotides" % (sum)\n-\t\tprint "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(sizes)\n-\n-\t\tself._log.info("END getsizes")\n-\n-\n-if __name__ == "__main__":\n-\tiGetSizes = GetSizes()\n-\tiGetSizes.setAttributesFromCmdLine()\n-\tiGetSizes.run()\n-\t\n-#TODO: add two more options!!!!!!\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getWigData.py
--- a/SMART/Java/Python/getWigData.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,67 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from commons.core.parsing.WigParser import WigParser
-from commons.core.writer.Gff3Writer import Gff3Writer
-from SMART.Java.Python.misc.Progress import Progress
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Get WIG Data v1.0.1: Compute the average data for some genomic coordinates using WIG files (thus covering a large proportion of the genome) and update a tag. [Category: WIG Tools]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
-    parser.add_option("-w", "--wig",         dest="wig",            action="store",                     type="string", help="wig file name [compulsory] [format: file in WIG format]")    
-    parser.add_option("-t", "--tag",         dest="tag",            action="store",                     type="string", help="choose a tag name to write the wig information to output file [compulsory] [format: file in WIG format]")    
-    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-s", "--strands",     dest="strands",        action="store_true", default=False,                help="consider both strands separately [format: boolean] [default: False]")    
-    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    # create parsers and writers
-    transcriptParser = TranscriptContainer(options.inputFileName, options.inputFormat, options.verbosity)
-    wigParser        = WigParser(options.wig)
-    writer           = Gff3Writer(options.outputFileName, options.verbosity)
-    wigParser.setStrands(options.strands)
-    
-    progress = Progress(transcriptParser.getNbTranscripts(), "Parsing %s" % (options.inputFileName), options.verbosity)
-    for transcript in transcriptParser.getIterator():
-        values = transcript.extractWigData(wigParser)
-        if options.strands:
-            values = values[transcript.getDirection()]
-        transcript.setTagValue(options.tag, str(float(sum(values)) / len(values)))
-        writer.addTranscript(transcript)
-        progress.inc()
-    progress.done()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getWigDistance.py
--- a/SMART/Java/Python/getWigDistance.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,105 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.
-"""
-
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from commons.core.parsing.WigParser import WigParser
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.RPlotter import RPlotter
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Get WIG Data v1.0.2: Compute the average data around some genomic coordinates using WIG files (thus covering a large proportion of the genome). [Category: WIG Tools]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
-    parser.add_option("-w", "--wig",         dest="wig",            action="store",                     type="string", help="wig file name [compulsory] [format: file in WIG format]")    
-    parser.add_option("-d", "--distance",    dest="distance",       action="store",      default=1000,  type="int",    help="distance around position [compulsory] [format: int] [default: 1000]")    
-    parser.add_option("-s", "--strands",     dest="strands",        action="store_true", default=False,                help="consider both strands separately [format: boolean] [default: False]")    
-    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in PNG format]")
-    parser.add_option("-a", "--default",     dest="defaultValue",   action="store",      default=0.0,   type="float",  help="default value (when value is NA) [default: 0.0] [format: float]")
-    parser.add_option("-l", "--log",         dest="log",            action="store_true", default=False,                help="use log scale for y-axis [format: boolean] [default: False]")
-    parser.add_option("-k", "--keep",        dest="keep",           action="store_true", default=False,                help="keep temporary files [format: boolean] [default: False]")
-    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    # create parsers and writers
-    transcriptParser = TranscriptContainer(options.inputFileName, options.inputFormat, options.verbosity)
-    wigParser        = WigParser(options.wig)
-    wigParser.setStrands(options.strands)
-    wigParser.setDefaultValue(options.defaultValue)
-    
-    # allocate data
-    strands = (1, -1) if options.strands else (1, )
-    values    = {}
-    for strand in strands:
-        values[strand] = dict([(i, 0.0) for i in range(-options.distance, options.distance+1)])
-
-    # read transcripts
-    progress = Progress(transcriptParser.getNbTranscripts(), "Parsing %s" % (options.inputFileName), options.verbosity)
-    for transcript in transcriptParser.getIterator():
-        transcript.removeExons()
-        transcript.restrictStart(2)
-        transcript.extendStart(options.distance)
-        transcript.extendEnd(options.distance-1)
-        theseValues = transcript.extractWigData(wigParser)
-        if len(strands) == 1:
-            theseValues = {1: theseValues}
-        for strand in strands:
-            if len(theseValues[strand]) < 2 * options.distance + 1:
-                theseValues[strand] = [options.defaultValue] * (2 * options.distance + 1 - len(theseValues[strand])) + theseValues[strand]
-            if len(theseValues[strand]) != 2 * options.distance + 1:
- raise Exception("Got something wrong with the size of the WIG data concerning %s: %d found instead of %d" % (transcript, len(theseValues[strand]), 2 * options.distance + 1))
-            for i in range(-options.distance, options.distance+1):
-                values[strand][i] += theseValues[strand][i + options.distance]
-        progress.inc()
-    progress.done()
-
-    for strand in strands:
-        for i in range(-options.distance, options.distance+1):
-            values[strand][i] /= transcriptParser.getNbTranscripts() * strand
-
-    # draw plot
-    plotter = RPlotter(options.outputFileName, options.verbosity, options.keep)
-    plotter.setXLabel("Distance")
-    plotter.setYLabel("WigValue")
-    for strand in strands:
-        plotter.addLine(values[strand])
-    if options.log:
-        plotter.setLog("y")
-    plotter.plot()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/getWigProfile.py
--- a/SMART/Java/Python/getWigProfile.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,160 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""\n-Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.\n-"""\n-\n-import math\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from commons.core.parsing.WigParser import WigParser\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-\n-class GetWigProfile(object):\n-\n-\tdef __init__(self, verbosity):\n-\t\tself.verbosity\t= verbosity\n-\t\tself.values\t\t = {}\n-\t\tself.defaultValue = 0.0\n-\n-\tdef _iToJ(self, i, size):\n-\t\treturn min(self.nbPoints+1, int(math.floor(float(i - self.distance) / (size) * (self.nbPoints))))\n-\n-\tdef readTranscripts(self):\n-\t\tself.strandNames = (1, -1) if self.strands else (1, )\n-\t\tself.values\t\t= dict([(strand, dict([(i, 0.0) for i in range(self.nbPoints + 2 * self.distance)])) for strand in self.strandNames])\n-\t\ttranscriptParser = TranscriptContainer(self.inputFileName, self.inputFormat, self.verbosity)\n-\t\twigParser\t\t= WigParser(self.wig)\n-\t\tnbValues\t\t = dict([(strand, dict([(i, 0.0) for i in range(self.nbPoints + 2 * self.distance)])) for strand in self.strandNames])\n-\t\twigParser.setStrands(self.strands)\n-\t\twigParser.setDefaultValue(self.defaultValue)\n-\n-\t\tprogress = Progress(transcriptParser.getNbTranscripts(), "Parsing %s" % (self.inputFileName), self.verbosity)\n-\t\tfor transcript in transcriptParser.getIterator():\n-\t\t\ttranscriptSize = transcript.getSize()\n-\t\t\texpectedSize   = transcriptSize + 2 * self.distance\n-\t\t\ttranscript.extendStart(self.distance)\n-\t\t\ttranscript.extendEnd(self.distance)\n-\t\t\ttheseValues = transcript.extractWigData(wigParser)\n-\n-\t\t\tif len(self.strandNames) == 1:\n-\t\t\t\ttheseValues = {1: theseValues}\n-\t\t\tfor strand in self.strandNames:\n-\t\t\t\tif len(theseValues[strand]) < expectedSize:\n-\t\t\t\t\ttheseValues[strand] = [self.defaultValue] * (expectedSize - len(theseValues[strand])) + theseValues[strand]\n-\t\t\t\tif len(theseValues[strand]) != expectedSize:\n-\t\t\t\t\traise Exception("Got something wrong with the size of the WIG data concerning %s [%s]: %d found instead of %d" % (transcript, ",".join(["%d-%d" % (exon.getStart(), exon.getEnd()) for exon in transcript.getExons()]), len(theseValues[strand]), expectedSize))\n-\t\t\t\tfivePValues = theseValues[strand][: self.distance]\n-\t\t\t\tnbValues         = [0.0] * (self.nbPoints)\n-\t\t\t\ttranscriptValues = [0.0] * (self.nbPoints)\n-\t\t\t\tfor i in range(self.distance, len(theseValues[stra'..b'-\t\t\t\tstrand = 1\n-\t\t\tfor i in range(self.nbPoints + 2 * self.distance):\n-\t\t\t\tself.values[strand][i] /= transcriptParser.getNbTranscripts() * strand\n-\n-\n-\tdef smoothen(self):\n-\t\tif self.smoothenForce == None:\n-\t\t\treturn\n-\t\tfor strand in self.strandNames:\n-\t\t\taverageValues = {}\n-\t\t\tfor center in range(self.distance, self.distance + self.nbPoints):\n-\t\t\t\tsum\t\t= 0.0\n-\t\t\t\tnbValues = 0.0\n-\t\t\t\tfor i in range(center - self.smoothenForce + 1, center + self.smoothenForce):\n-\t\t\t\t\tif i > self.distance and i < self.distance + self.nbPoints:\n-\t\t\t\t\t\tnbValues += 1\n-\t\t\t\t\t\tsum\t\t+= self.values[strand][i]\n-\t\t\t\taverageValues[center] = sum / nbValues\n-\t\t\tfor position in range(self.distance, self.distance + self.nbPoints):\n-\t\t\t\tself.values[strand][position] = averageValues[position]\n-\t\t\n-\n-\tdef plot(self):\n-\t\tplotter = RPlotter(self.outputFileName, self.verbosity)\n-\t\tfor strand in self.strandNames:\n-\t\t\tplotter.addLine(self.values[strand])\n-\t\tif self.log:\n-\t\t\tplotter.setLog("y")\n-\t\tplotter.setAxisLabel("x", {0: -self.distance, self.distance: "start", self.distance+self.nbPoints-1: "end", 2*self.distance+self.nbPoints-1: self.distance})\n-\t\tplotter.plot()\n-\n-\n-\n-if __name__ == "__main__":\n-\t\n-\t# parse command line\n-\tdescription = "Get WIG Profile v1.0.1: Compute the average profile of some genomic coordinates using WIG files (thus covering a large proportion of the genome). [Category: WIG Tools]"\n-\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--input",\t\t\t dest="inputFileName",\taction="store",\t\t\t\t\t\t\t\t\t\t\ttype="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n-\tparser.add_option("-f", "--inputFormat", dest="inputFormat",\t\taction="store",\t\t\t\t\t\t\t\t\t\t\ttype="string", help="format of the input file [compulsory] [format: transcript file format]")\n-\tparser.add_option("-w", "--wig",\t\t\t\t dest="wig",\t\t\t\t\t\taction="store",\t\t\t\t\t\t\t\t\t\t\ttype="string", help="wig file name [compulsory] [format: file in WIG format]")\t\n-\tparser.add_option("-p", "--nbPoints",\t\t dest="nbPoints",\t\t\t\taction="store",\t\t\t default=1000,\ttype="int",\t\t help="number of points on the x-axis [compulsory] [format: int] [default: 1000]")\t\n-\tparser.add_option("-d", "--distance",\t\t dest="distance",\t\t\t\taction="store",\t\t\t default=0,\t\t\ttype="int",\t\t help="distance around genomic coordinates [compulsory] [format: int] [default: 0]")\t\n-\tparser.add_option("-s", "--strands",\t\t dest="strands",\t\t\t\taction="store_true", default=False,\t\t\t\t\t\t\t\t help="consider both strands separately [format: boolean] [default: False]")\t\n-\tparser.add_option("-m", "--smoothen",\t\t dest="smoothen",\t\t\t\taction="store",\t\t\t default=None,\ttype="int",\t\t help="smoothen the curve [format: int] [default: None]")\t\n-\tparser.add_option("-a", "--default",\t\t dest="defaultValue",\t action="store",\t\t\t default=0.0,\t type="float",\thelp="default value (when value is NA) [default: 0.0] [format: float]")\n-\tparser.add_option("-o", "--output",\t\t\t dest="outputFileName", action="store",\t\t\t\t\t\t\t\t\t\t\ttype="string", help="output file [compulsory] [format: output file in PNG format]")\n-\tparser.add_option("-l", "--log",\t\t\t\t dest="log",\t\t\t\t\t\taction="store_true", default=False,\t\t\t\t\t\t\t\t help="use log scale for y-axis\t[format: boolean] [default: False]")\n-\tparser.add_option("-v", "--verbosity",\t dest="verbosity",\t\t\taction="store",\t\t\t default=1,\t\t\ttype="int",\t\t help="trace level [format: int]")\n-\t(options, args) = parser.parse_args()\n-\n-\twigProfile\t\t\t\t\t\t\t\t= GetWigProfile(options.verbosity)\n-\twigProfile.strands\t\t\t \t= options.strands\n-\twigProfile.inputFileName\t= options.inputFileName\n-\twigProfile.inputFormat\t\t= options.inputFormat\n-\twigProfile.wig\t\t\t\t\t\t= options.wig\n-\twigProfile.nbPoints\t\t\t\t= options.nbPoints\n-\twigProfile.distance\t\t\t\t= options.distance\n-\twigProfile.smoothenForce\t= options.smoothen\n-\twigProfile.defaultValue\t  = options.defaultValue\n-\twigProfile.outputFileName = options.outputFileName\n-\twigProfile.log\t\t\t\t\t\t= options.log\n-\n-\twigProfile.readTranscripts()\n-\twigProfile.smoothen()\n-\twigProfile.plot()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/gf.py
--- a/SMART/Java/Python/gf.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,231 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2011\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.misc.Progress import Progress\n-\n-QUERY        = 0\n-REFERENCE    = 1\n-INPUTS       = (QUERY, REFERENCE)\n-STRANDS      = (-1, 1)\n-TAG_DISTANCE = "distance_"\n-TAG_SENSE    = "_sense"\n-TAG_REGION   = "_region"\n-TAGS_REGION  = {-1: "_upstream", 0: "", 1: "_downstream"}\n-TAGS_RREGION = {-1: "upstream", 0: "overlapping", 1: "downstream"}\n-TAGS_SENSE   = {-1: "antisense", 0: "", 1: "colinear"}\n-STRANDSTOSTR = {-1: "(-)", 0: "", 1: "(+)"}\n-\n-\n-def getOrderKey(transcript, direction):\n-    if direction == 1:\n-        return transcript.getEnd()\n-    return - transcript.getStart()\n-\n-def isInGoodRegion(transcriptRef, transcriptQuery, direction):\n-    if direction == 1:\n-        return transcriptQuery.getEnd() > transcriptRef.getEnd()\n-    return transcriptQuery.getStart() < transcriptRef.getStart()\n-\n-\n-class GetFlanking(object):\n-\n-    def __init__(self, verbosity):\n-        self.verbosity   = verbosity\n-        self.transcripts = dict([id, {}] for id in INPUTS)\n-        self.directions  = []\n-        self.noOverlap   = False\n-        self.colinear    = False\n-        self.antisense   = False\n-        self.distance    = None\n-        self.minDistance = None\n-        self.maxDistance = None\n-        self.tagName     = "flanking"\n-\n-    def setInputFile(self, fileName, format, id):\n-        chooser = ParserChooser(self.verbosity)\n-        chooser.findFormat(format)\n-        parser = chooser.getParser(fileName)\n-        for transcript in parser.getIterator():\n-            chromosome = transcript.getChromosome()\n-            if chromosome not in self.transcripts[id]:\n-                self.transcripts[id][chromosome] = []\n-            self.transcripts[id][chromosome].append(transcript)\n-\n-    def setOutputFile(self, fileName):\n-        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n-\n-    def addUpstreamDirection(self, upstream):\n-        if upstream:\n-            self.directions.append(-1)\n-\n-    def addDownstreamDirection(self, downstream):\n-        if downstream:\n-            self.directions.append(1)\n-\n-    def setColinear(self, colinear):\n-        self.colinear = colinear\n-\n-    def setAntisense(self,'..b'    progress.inc()\n-        for transcript in sorted(list(outputs), key = lambda flanking: (flanking.getChromosome(), flanking.getStart(), flanking.getEnd())):\n-            self.writer.addTranscript(transcript)\n-        self.writer.close()\n-        progress.done()\n-\n-    def run(self):\n-        self.flankings = {}\n-        for direction in STRANDS:\n-            self.getFlanking(direction)\n-        self.write()\n-\n-if __name__ == "__main__":\n-    \n-    description = "Get Flanking v1.0.1: Get the flanking regions of a set of reference. [Category: Data Selection]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input1",      dest="inputFileName1", action="store",                          type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format1",     dest="format1",        action="store",                          type="string", help="format of previous file [compulsory] [format: transcript file format]")\n-    parser.add_option("-j", "--input2",      dest="inputFileName2", action="store",                          type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n-    parser.add_option("-g", "--format2",     dest="format2",        action="store",                          type="string", help="format of previous file [compulsory] [format: transcript file format]")\n-    parser.add_option("-5", "--upstream",    dest="upstream",       action="store_true", default=False,                     help="output upstream elements [format: boolean] [default: False]")\n-    parser.add_option("-3", "--downstream",  dest="downstream",     action="store_true", default=False,                     help="output downstream elements [format: boolean] [default: False]")\n-    parser.add_option("-c", "--colinear",    dest="colinear",       action="store_true", default=False,                     help="find first colinear element [format: boolean] [default: False]")\n-    parser.add_option("-a", "--antisense",   dest="antisense",      action="store_true", default=False,                     help="find first anti-sense element [format: boolean] [default: False]")\n-    parser.add_option("-e", "--noOverlap",   dest="noOverlap",      action="store_true", default=False,                     help="do not consider elements which are overlapping reference elements [format: boolean] [default: False]")\n-    parser.add_option("-d", "--minDistance", dest="minDistance",    action="store",      default=None,       type="int",    help="minimum distance between 2 elements [format: int]")\n-    parser.add_option("-D", "--maxDistance", dest="maxDistance",    action="store",      default=None,       type="int",    help="maximum distance between 2 elements [format: int]")\n-    parser.add_option("-t", "--tag",         dest="tagName",        action="store",      default="flanking", type="string", help="name of the new tag [format: string] [default: flanking]")\n-    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                          type="string", help="output file [format: output file in GFF3 format]")\n-    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,          type="int",    help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-    gf = GetFlanking(options.verbosity)\n-    gf.setInputFile(options.inputFileName1, options.format1, QUERY)\n-    gf.setInputFile(options.inputFileName2, options.format2, REFERENCE)\n-    gf.setOutputFile(options.outputFileName)\n-    gf.addUpstreamDirection(options.upstream)\n-    gf.addDownstreamDirection(options.downstream)\n-    gf.setColinear(options.colinear)\n-    gf.setAntisense(options.antisense)\n-    gf.setNoOverlap(options.noOverlap)\n-    gf.setMinDistance(options.minDistance)\n-    gf.setMaxDistance(options.maxDistance)\n-    gf.setNewTagName(options.tagName)\n-    gf.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mapperAnalyzer.py
--- a/SMART/Java/Python/mapperAnalyzer.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,486 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""\n-Read a mapping file (many formats supported) and select some of them\n-Mappings should be sorted by read names\n-"""\n-import os, random, shelve\n-from optparse import OptionParser, OptionGroup\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.parsing.FastaParser import FastaParser\n-from commons.core.parsing.FastqParser import FastqParser\n-from commons.core.parsing.GffParser import GffParser\n-from commons.core.writer.BedWriter import BedWriter\n-from commons.core.writer.UcscWriter import UcscWriter\n-from commons.core.writer.GbWriter import GbWriter\n-from commons.core.writer.Gff2Writer import Gff2Writer\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from commons.core.writer.FastaWriter import FastaWriter\n-from commons.core.writer.FastqWriter import FastqWriter\n-from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter\n-from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection\n-from SMART.Java.Python.mySql.MySqlTable import MySqlTable\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-\n-\n-distanceExons = 20\n-exonSize      = 20\n-\n-\n-class MapperAnalyzer(object):\n-    """\n-    Analyse the output of a parser\n-    """\n-\n-    def __init__(self, verbosity = 0):\n-        self.verbosity                = verbosity\n-        self.mySqlConnection          = MySqlConnection(verbosity)\n-        self.tooShort                 = 0\n-        self.tooManyMismatches        = 0\n-        self.tooManyGaps              = 0\n-        self.tooShortExons            = 0\n-        self.tooManyMappings          = 0\n-        self.nbMappings               = 0\n-        self.nbSequences              = 0\n-        self.nbAlreadyMapped          = 0\n-        self.nbAlreadyMappedSequences = 0\n-        self.nbWrittenMappings        = 0\n-        self.nbWrittenSequences       = 0\n-        self.parser                   = None\n-        self.logHandle                = None\n-        self.randomNumber             = random.randint(0, 100000)\n-        self.gff3Writer               = None\n-        self.alreadyMappedReader      = None\n-        self.unmatchedWriter          = None\n-        self.sequenceListParser       = None\n-        self.sequences                = None\n-        self.alreadyMapped            = None\n-        self.mappedNamesTable         = None\n-        s'..b'up.add_option("-v", "--verbosity",        dest="verbosity",         action="store",      default=1,        type="int",    help="trace level [default: 1] [format: int]")\n-    otheGroup.add_option("-l", "--log",              dest="log",               action="store_true", default=False,                   help="write a log file [format: bool] [default: false]")\n-    parser.add_option_group(compGroup)\n-    parser.add_option_group(filtGroup)\n-    parser.add_option_group(tranGroup)\n-    parser.add_option_group(outpGroup)\n-    parser.add_option_group(otheGroup)\n-    (options, args) = parser.parse_args()\n-\n-    \n-    analyzer = MapperAnalyzer(options.verbosity)\n-    analyzer.setMappingFile(options.inputFileName, options.format)\n-    analyzer.setSequenceFile(options.sequencesFileName, options.sequenceFormat)\n-    analyzer.setOutputFile(options.outputFileName, options.title)\n-    if options.appendFileName != None:\n-        analyzer.setAlreadyMatched(options.appendFileName)\n-    if options.remaining:\n-        analyzer.setRemainingFile(options.outputFileName, options.sequenceFormat)\n-    if options.number != None:\n-        analyzer.setMaxMappings(options.number)\n-    if options.size != None:\n-        analyzer.setMinSize(options.size)\n-    if options.identity != None:\n-        analyzer.setMinId(options.identity)\n-    if options.mismatch != None:\n-        analyzer.setMaxMismatches(options.mismatch)\n-    if options.gap != None:\n-        analyzer.setMaxGaps(options.gap)\n-    if options.mergeExons:\n-        analyzer.mergeExons(True)\n-    if options.removeExons:\n-        analyzer.acceptShortExons(False)\n-    if options.log:\n-        analyzer.setLog("%s.log" % (options.outputFileName))\n-    analyzer.analyze()\n-    \n-    if options.verbosity > 0:\n-        print "kept %i sequences over %s (%f%%)" % (analyzer.nbWrittenSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences) / analyzer.nbSequences * 100)\n-        if options.appendFileName != None:\n-            print "kept %i sequences over %s (%f%%) including already mapped sequences" % (analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100)\n-        print "kept %i mappings over %i (%f%%)" % (analyzer.nbWrittenMappings, analyzer.nbMappings, float(analyzer.nbWrittenMappings) / analyzer.nbMappings * 100)\n-        if options.appendFileName != None:\n-            print "kept %i mappings over %i (%f%%) including already mapped" % (analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped, analyzer.nbMappings, float(analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped) / analyzer.nbMappings * 100)\n-        print "removed %i too short mappings (%f%%)" % (analyzer.tooShort, float(analyzer.tooShort) / analyzer.nbMappings * 100)\n-        print "removed %i mappings with too many mismatches (%f%%)" % (analyzer.tooManyMismatches, float(analyzer.tooManyMismatches) / analyzer.nbMappings * 100)\n-        print "removed %i mappings with too many gaps (%f%%)" % (analyzer.tooManyGaps, float(analyzer.tooManyGaps) / analyzer.nbMappings * 100)\n-        print "removed %i mappings with too short exons (%f%%)" % (analyzer.tooShortExons, float(analyzer.tooShortExons) / analyzer.nbMappings * 100)\n-        print "removed %i sequences with too many hits (%f%%)" % (analyzer.tooManyMappings, float(analyzer.tooManyMappings) / analyzer.nbSequences * 100)\n-        print "%i sequences have no mapping (%f%%)" % (analyzer.nbSequences - analyzer.nbWrittenSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences) / analyzer.nbSequences * 100)\n-        if options.appendFileName != None:\n-            print "%i sequences have no mapping (%f%%) excluding already mapped sequences" % (analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100)\n-\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mappingToCoordinates.py
--- a/SMART/Java/Python/mappingToCoordinates.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,91 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-
-"""Convert files with some mapping format to coordinates format"""
-
-import os
-from optparse import OptionParser
-from commons.core.parsing.PslParser import PslParser
-from commons.core.parsing.AxtParser import AxtParser
-from commons.core.writer.Gff3Writer import Gff3Writer
-from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
-from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.misc.Progress import Progress
-
-
-class MappingToCoordinates(object):
-    def __init__(self,verbosity=1, inputFileName=None, format = None, output=None,galaxy = False, title="S-MART"):
-        self.verbosity = verbosity
-        self.inputFileName = inputFileName
-        self.format = format
-        self.output = output
-        self.galaxy = galaxy
-        self.title = title
-    
-    def setAttributesFromCmdLine(self):
-        description = "Mapping To Coordinates v1.0.1: Convert a set of mappings (given by a mapping tool) to a set of transcripts. [Category: Conversion]"
-        parser = OptionParser(description = description)
-        parser.add_option("-i", "--input",     dest="inputFileName", action="store",                     type="string", help="input file [compulsory] [format: file in mapping format given by -f]")
-        parser.add_option("-f", "--format",    dest="format",        action="store",                     type="string", help="format of file [compulsory] [format: mapping file format]")
-        parser.add_option("-o", "--output",    dest="output",        action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-        parser.add_option("-v", "--verbosity", dest="verbosity",     action="store",      default=1,     type="int",    help="trace level [format: int]")
-        parser.add_option("-G", "--galaxy",    dest="galaxy",        action="store_true", default=False,                help="used for galaxy [format: bool] [default: False]")    
-        (options, args) = parser.parse_args()
-    
-        self.verbosity = options.verbosity
-        self.inputFileName = options.inputFileName
-        self.format = options.format
-        self.output = options.output
-        self.galaxy = options.galaxy
-
-    def run(self):  
-        if self.verbosity > 0:
-            print "Reading input file..."
-        parser = TranscriptContainer(self.inputFileName, self.format, self.verbosity)
-        if self.verbosity > 0:
-            print "... done"
-        writer = Gff3Writer(self.output, self.verbosity, self.title)
-            
-        progress = Progress(parser.getNbTranscripts(), "Reading %s" % (self.inputFileName), self.verbosity)
-        for transcript in parser.getIterator():
-            writer.addTranscript(transcript)
-            progress.inc()
-        progress.done()
-        
-        if self.galaxy:
-            os.rename("%s.gff3" % (self.output), self.output) 
-            
-if __name__ == '__main__':
-    launcher = MappingToCoordinates()
-    launcher.setAttributesFromCmdLine()
-    launcher.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mergeSlidingWindowsClusters.py
--- a/SMART/Java/Python/mergeSlidingWindowsClusters.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,144 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Merge sliding windows of two different clusterings
-"""
-
-import sys
-import re
-import os
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from commons.core.writer.Gff3Writer import Gff3Writer
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.structure.Transcript import Transcript
-
-class MergeSlidingWindowsClusters(object):
-    """
-    Merge the ouptput of several sets of sliding windows
-    """
-
-    def __init__(self, verbosity = 0):
-        self.verbosity     = verbosity
-        self.inputs        = []
-        self.outputData    = {}
-        self.nbData        = 0
-        self.nbWrittenData = 0
-        self.chromosomes   = []
-        self.writer        = None
-
-    def __del__(self):
-        if self.writer != None:
-            self.writer.close()
-
-    def addInput(self, fileName, fileFormat):
-        self.inputs.append(TranscriptContainer(fileName, fileFormat, self.verbosity))
-        self.chromosomes = list(set(self.chromosomes).union(set(self.inputs[-1].getChromosomes())))
-
-    def setOutput(self, fileName):
-        self.writer = Gff3Writer(fileName, self.verbosity)
-
-    def readInput(self, i, chromosome):
-        progress = Progress(self.inputs[i].getNbTranscripts(), "Reading file #%d -- chromosome %s" % (i+1, chromosome), self.verbosity)
-        for transcript in self.inputs[i].getIterator():
-            progress.inc()
-            if chromosome != transcript.getChromosome(): continue
-            start     = transcript.getStart()
-            end       = transcript.getEnd()
-            direction = transcript.getDirection()
-            tags      = transcript.tags
-            if chromosome not in self.outputData:
-                self.outputData[chromosome] = {}
-            if direction not in self.outputData[chromosome]:
-                self.outputData[chromosome][direction] = {}
-            if start not in self.outputData[chromosome][direction]:
-                self.outputData[chromosome][direction][start] = {}
-            if end in self.outputData[chromosome][direction][start]:
-                ends = self.outputData[chromosome][direction][start].keys()
-                if ends[0] != end:
-                    sys.exit("Error! Two regions starting at %d end are not consistent (%d and %d) in %s on strand %d" % (start, end, ends[0], chromosome, direction))
-                self.outputData[chromosome][direction][start][end].update(tags)
-            else:
-                self.outputData[chromosome][direction][start][end] = tags
-                self.nbData += 1
-        progress.done()
-
-
-    def writeOutput(self, chromosome):
-        progress = Progress(self.nbData - self.nbWrittenData, "Writing output for chromosome %s" % (chromosome), self.verbosity)
-        for direction in self.outputData[chromosome]:
-            for start in self.outputData[chromosome][direction]:
-                for end in self.outputData[chromosome][direction][start]:
-                    transcript = Transcript()
-                    transcript.setChromosome(chromosome)
-                    transcript.setStart(start)
-                    transcript.setEnd(end)
-                    transcript.setDirection(direction)
-                    transcript.tags = self.outputData[chromosome][direction][start][end]
-                    transcript.setName("region_%d" % (self.nbWrittenData + 1))
-                    tags = transcript.getTagNames()
-                    for tag in tags:
-                        if tag.startswith("Name_") or tag.startswith("ID_"):
-                            del transcript.tags[tag]
-                    self.nbWrittenData += 1
-                    self.writer.addTranscript(transcript)
-                    progress.inc()
-        self.writer.write()
-        progress.done()
-        self.outputData = {}
-
-    def merge(self):
-        for chromosome in self.chromosomes:
-            for i, input in enumerate(self.inputs):
-                self.readInput(i, chromosome)
-            self.writeOutput(chromosome)
-        self.writer.close()
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Merge Sliding Windows Clusters v1.0.2: Merge two files containing the results of a sliding windows clustering. [Category: Sliding Windows]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input1",       dest="inputFileName1", action="store",                     type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--inputFormat1", dest="inputFormat1",   action="store",                     type="string", help="format of the input file 1 [compulsory] [format: transcript file format]")
-    parser.add_option("-j", "--input2",       dest="inputFileName2", action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
-    parser.add_option("-g", "--inputFormat2", dest="inputFormat2",   action="store",                     type="string", help="format of the input file 2 [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",       dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-v", "--verbosity",    dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    merger = MergeSlidingWindowsClusters(options.verbosity)
-    merger.addInput(options.inputFileName1, options.inputFormat1)
-    merger.addInput(options.inputFileName2, options.inputFormat2)
-    merger.setOutput(options.outputFileName)
-    merger.merge()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mergeTranscriptLists.py
--- a/SMART/Java/Python/mergeTranscriptLists.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,174 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""Merge elements of two transcript lists with some condition"""\n-\n-import os, random, shutil, glob\n-from optparse import OptionParser\n-from commons.core.parsing.SequenceListParser import SequenceListParser\n-from commons.core.parsing.BedParser import BedParser\n-from commons.core.parsing.GffParser import GffParser\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.misc.Progress import Progress\n-\n-\n-\n-class MergeLists(object):\n-\n-    def __init__(self, verbosity):\n-        self.verbosity     = verbosity\n-        self.seed          = random.randint(0, 100000)\n-        self.aggregation   = False\n-        self.normalization = False\n-        self.distance      = False\n-        self.antisense     = False\n-        self.colinear      = False\n-        self.fileNames     = {}\n-        self.formats       = {}\n-        self.tmpFileNames  = []\n-        self.logHandle     = None\n-\n-#    def __del__(self):\n-#        for fileNameRoot in self.tmpFileNames:\n-#            for fileName in glob.glob("%s*" % (fileNameRoot)):\n-#                os.remove(fileName)\n-#        if self.logHandle != None:\n-#            self.logHandle.close()\n-#            self.logHandle = None\n-\n-    def setLogFileName(self, fileName):\n-        self.logHandle = open(fileName, "w")\n-\n-    def setInputFileName(self, fileName, format, id):\n-        self.fileNames[id] = fileName\n-        self.formats[id]   = format\n-\n-    def setOutputFileName(self, fileName):\n-        self.outputFileName = fileName\n-\n-    def setAggregate(self, aggregation):\n-        self.aggregation = aggregation\n-\n-    def setNormalization(self, normalization):\n-        self.normalization = normalization\n-\n-    def setDistance(self, distance):\n-        self.distance = distance\n-\n-    def setAntisense(self, antisense):\n-        self.antisense = antisense\n-\n-    def setColinear(self, colinear):\n-        self.colinear = colinear\n-\n-    def createTmpFileName(self, root):\n-        fileName = "tmp_%s_%d.gff3" % (root, self.seed)\n-        self.tmpFileNames.append(fileName)\n-        return fileName\n-\n-    def selfMerge(self, fileName, format, outputFileName):\n-        transcriptListComparator = TranscriptListsComparator(self.logHandle,'..b'nscriptListComparator.compareTranscriptList()\n-\n-    def mergeFiles(self, fileName1, fileName2, outputFileName):\n-        outputFile = open(outputFileName, "w")\n-        shutil.copyfileobj(open(fileName1, "r"), outputFile)\n-        shutil.copyfileobj(open(fileName2, "r"), outputFile)\n-        outputFile.close()\n-\n-    def run(self):\n-        selectedFileQuery = self.createTmpFileName("query")\n-        self.keepOverlapping({0: self.fileNames[0], 1: self.fileNames[0]}, {0: "gff3", 1: "gff3"}, selectedFileQuery)\n-        mergeFileTarget = self.createTmpFileName("target")\n-        self.selfMerge(self.fileNames[1], self.formats[1], mergeFileTarget)\n-        if not self.aggregation:\n-            overlapFile = self.createTmpFileName("overlap")\n-            self.keepOverlapping({0: mergeFileTarget, 1: selectedFileQuery}, {0: "gff3", 1: "gff3"}, overlapFile)\n-            mergeFileTarget = overlapFile\n-        mergeFileMerged = self.createTmpFileName("merged")\n-        self.mergeFiles(mergeFileTarget, selectedFileQuery, mergeFileMerged)\n-        self.selfMerge(mergeFileMerged, "gff3", self.outputFileName)\n-\n-\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Merge Lists v1.0.3: Merge the elements of two lists of genomic coordinates. [Category: Merge]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input1",    dest="inputFileName1", action="store",                       type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format1",   dest="format1",        action="store",                       type="string", help="format of file 1 [compulsory] [format: transcript file format]")\n-    parser.add_option("-j", "--input2",    dest="inputFileName2", action="store",      default=None,    type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n-    parser.add_option("-g", "--format2",   dest="format2",        action="store",      default=None,    type="string", help="format of file 2 [compulsory] [format: file in transcript format]")\n-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",      default=None,    type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-    parser.add_option("-k", "--all",       dest="all",            action="store_true", default=False,                  help="print all the transcripts, not only those overlapping [format: bool] [default: false]")\n-    parser.add_option("-d", "--distance",  dest="distance",       action="store",      default=0,       type="int",    help="max. distance between two transcripts [format: int] [default: 0]")\n-    parser.add_option("-a", "--antisense", dest="antisense",      action="store_true", default=False,                  help="antisense only [format: bool] [default: false]")\n-    parser.add_option("-c", "--colinear",  dest="colinear",       action="store_true", default=False,                  help="colinear only [format: bool] [default: false]")\n-    parser.add_option("-n", "--normalize", dest="normalize",      action="store_true", default=False,                  help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")\n-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,       type="int",    help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-#    ml = MergeLists(logHandle, options.verbosity)\n-    \n-    ml = MergeLists(0)\n-    ml.setInputFileName(options.inputFileName1, options.format1, 0)\n-    ml.setInputFileName(options.inputFileName2, options.format2, 1)\n-    ml.setOutputFileName(options.outputFileName)\n-    ml.setAntisense(options.antisense)\n-    ml.setColinear(options.colinear)\n-    ml.setAggregate(options.all)\n-    ml.setNormalization(options.normalize)\n-    ml.setDistance(options.distance)\n-    ml.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/MultipleRPlotter.py
--- a/SMART/Java/Python/misc/MultipleRPlotter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,160 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2012
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-import os
-import subprocess
-import random
-import math
-from SMART.Java.Python.misc.RPlotter import RPlotter
-
-NBCOLORS = 9
-
-"""
-Plot multiple curves with RPlotter
-"""
-
-class MultipleRPlotter(object):
- """
- Plot some curves
- @ivar fileName: name of the file
- @type fileName: string
- @ivar height: height of the file
- @type height: int
- @ivar width: width of the file
- @type width: int
- @ivar plots: plots to be included
- @type plots: list of L{RPlotter{RPlotter}}
- @ivar keep: keep script lines
- @type keep: boolean
- @ivar format: format of the file
- @type format: string
- """
-
- def __init__(self, fileName, verbosity = 0, keep = False):
- """
- Constructor
- @param fileName: name of the file to produce
- @type  fileName: string
- @param verbosity: verbosity
- @type  verbosity: int
- @param keep: keep temporary files
- @type  keep: boolean
- """
- self.fileName = fileName
- self.verbosity = verbosity
- self.keep = keep
- self.format = "png"
- self.width = 1000
- self.height = 500
- self.plots = []
- self.scriptFileName = "tmpScript-%d.R" % (os.getpid())
-
- def __del__(self):
- """
- Destructor
- Remove script files
- """
- if not self.keep:
- if os.path.exists(self.scriptFileName):
- os.remove(self.scriptFileName)
- outputFileName = "%sout" % (self.scriptFileName)
- if os.path.exists(outputFileName):
- os.remove(outputFileName)
-
- def setFormat(self, format):
- """
- Set the format of the picture
- @param format: the format
- @type format: string
- """
- if format not in ("png", "pdf", "jpeg", "bmp", "tiff"):
- raise Exception("Format '%s' is not supported by RPlotter" % (format))
- self.format = format
-
-
- def setWidth(self, width):
- """
- Set the dimensions of the image produced
- @param width: width of the image
- @type width: int
- """
- self.width = width
-
-
- def setHeight(self, height):
- """
- Set the dimensions of the image produced
- @param height: heigth of the image
- @type height: int
- """
- self.height = height
-
-
- def setImageSize(self, width, height):
- """
- Set the dimensions of the image produced
- @param width: width of the image
- @type width: int
- @param height: heigth of the image
- @type height: int
- """
- self.width = width
- self.height = height
-
- def addPlot(self, plot):
- """
- Add a plot
- @param plots: plot to be included
- @type  plots: L{RPlotter{RPlotter}}
- """
- self.plots.append(plot)
-
- def plot(self):
- """
- Plot the figures
- """
- scriptHandle = open(self.scriptFileName, "w")
- scriptHandle.write("library(RColorBrewer)\n")
- scriptHandle.write("colorPanel = brewer.pal(n=%d, name=\"Set1\")\n" % (NBCOLORS))
- scriptHandle.write("%s(%s = \"%s\", width = %d, height = %d, bg = \"white\")\n" % (self.format, "filename" if self.format != "pdf" else "file", self.fileName, self.width, self.height))
- scriptHandle.write("par(mfrow=c(%d, 1))\n" % (len(self.plots)))
- for plot in self.plots:
- scriptHandle.write(plot.getScript())
- scriptHandle.write("dev.off()\n")
- scriptHandle.close()
- rCommand = "R"
- if "SMARTRPATH" in os.environ:
- rCommand = os.environ["SMARTRPATH"]
- command = "\"%s\" CMD BATCH %s" % (rCommand, self.scriptFileName)
- status = subprocess.call(command, shell=True)
- if status != 0:
- self.keep = True
- raise Exception("Problem with the execution of script file %s, status is: %s" % (self.scriptFileName, status))
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/MultipleRPlotter.pyc
b
Binary file SMART/Java/Python/misc/MultipleRPlotter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/Progress.py
--- a/SMART/Java/Python/misc/Progress.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,93 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-import time
-
-class Progress(object):
-    """Show the progress of a process"""
-
-    def __init__(self, aim, message = "Progress", verbosity = 0):
-        self.aim            = aim
-        self.progress       = 0
-        self.message        = message
-        self.length         = -1
-        self.verbosity      = verbosity
-        self.maxMessageSize = 50
-        self.barSize        = 80
-        self.startTime      = time.time()
-        self.elapsed        = 0
-        if len(self.message) > self.maxMessageSize:
-            self.message = self.message[0:self.maxMessageSize-3] + "..."
-        self.show()
-
-
-    def inc(self):
-        self.progress += 1
-        self.show()
-        
-        
-    def getPrintableElapsedTime(self, time):
-        timeHou = int(time) / 3600
-        timeMin = int(time) / 60 - 60 * timeHou
-        timeSec = int(time) % 60
-        if timeHou > 0:
-            return "%3dh %2dm" % (timeHou, timeMin)
-        if timeMin > 0:
-            return "%2dm %2ds" % (timeMin, timeSec)
-        return "%2ds   " % (timeSec)
-
-
-    def show(self):
-        if self.verbosity <= 0:
-            return
-        if self.aim == 0:
-            return
-        messageSize = len(self.message)
-        length = int(self.progress / float(self.aim) * self.barSize)
-        elapsed = int(time.time() - self.startTime)
-        if (length > self.length) or (elapsed > self.elapsed + 10):
-            self.length = length
-            self.elapsed = elapsed            
-            string = "%s%s[%s%s] %d/%d" % (self.message, " " * max(0, self.maxMessageSize - messageSize), "=" * self.length, " " * (self.barSize - self.length), self.progress, self.aim)
-            if elapsed > 5:
-                done = float(self.progress) / self.aim
-                total = elapsed / done
-                remaining = total - elapsed
-                string += " ETA: %s " % (self.getPrintableElapsedTime(remaining))
-            string += "\r"
-            sys.stdout.write(string)
-            sys.stdout.flush()
-
-
-    def done(self):
-        if self.verbosity > 0:
-            messageSize = len(self.message)
-            elapsed = time.time() - self.startTime
-            print "%s%s[%s] %d completed in %s " % (self.message, " " * max(0, self.maxMessageSize - messageSize), "=" * self.barSize, self.aim, self.getPrintableElapsedTime(elapsed))
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/Progress.pyc
b
Binary file SMART/Java/Python/misc/Progress.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/RPlotter.py
--- a/SMART/Java/Python/misc/RPlotter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,820 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-\n-import os\n-import subprocess\n-import random\n-import math\n-\n-minPositiveValue = 10e-6\n-\n-"""\n-Plot simple curves in R\n-"""\n-\n-class RPlotter(object):\n-    """\n-    Plot some curves\n-    @ivar nbColors: number of different colors\n-    @type nbColors: int\n-    @ivar fileName: name of the file\n-    @type fileName: string\n-    @ivar lines: lines to be plotted\n-    @type lines: array of dict\n-    @ivar names: name of the lines\n-    @type names: array of strings\n-    @ivar colors: color of the lines\n-    @type colors: array of strings\n-    @ivar types: type of the lines (plain or dashed)\n-    @type types: array of strings\n-    @ivar format: format of the picture\n-    @type format: string\n-    @ivar lineWidth: width of the line in a xy-plot\n-    @type lineWidth: int\n-    @ivar xMin: minimum value taken on the x-axis\n-    @type xMin: int\n-    @ivar xMax: maximum value taken on the x-axis\n-    @type xMax: int\n-    @ivar yMin: minimum value taken on the y-axis\n-    @type yMin: int\n-    @ivar yMax: maximum value taken on the y-axis\n-    @type yMax: int\n-    @ivar minimumX: minimum value allowed on the x-axis\n-    @type minimumX: int\n-    @ivar maximumX: maximum value allowed on the x-axis\n-    @type maximumX: int\n-    @ivar minimumY: minimum value allowed on the y-axis\n-    @type minimumY: int\n-    @ivar maximumY: maximum value allowed on the y-axis\n-    @type maximumY: int\n-    @ivar leftMargin:  add some margin in the left part of the plot\n-    @type leftMargin:  float\n-    @ivar rightMargin: add some margin in the right part of the plot\n-    @type rightMargin: float\n-    @ivar downMargin:  add some margin at the top of the plot\n-    @type downMargin:  float\n-    @ivar upMargin:    add some margin at the bottom of the plot\n-    @type upMargin:    float\n-    @ivar logX: use log scale on the x-axis\n-    @type logX: boolean\n-    @ivar logY: use log scale on the y-axis\n-    @type logY: boolean\n-    @ivar logZ: use log scale on the z-axis (the color)\n-    @type logZ: boolean\n-    @ival fill: if a value is not given, fill it with given value\n-    @type fill: int\n-    @ival bucket: cluster the data into buckets of given size\n-    @type bucket: int\n-    @ival seed: a random number\n-    @type seed: int\n-    @ival regression: plot a linear regression\n-    @type regression: boolean\n-    @ival legend: set the legend\n-    @type legend: boolean\n-    @ival legendBySide: set the legend outside of the plot\n-    @type legendBySde: boolean\n-    @ival xLabel: l'..b' lwd = %d, cex = 1.5, ncol = 1, bg = \\"white\\")\\n" % (self.lineWidth)\n-\n-        return script\n-            \n-\n-\n-    def plot(self):\n-        """\n-        Plot the lines\n-        """\n-        scriptFileName = "tmpScript-%d.R" % (self.seed)\n-        scriptHandle = open(scriptFileName, "w")\n-        scriptHandle.write("library(RColorBrewer)\\n")\n-        scriptHandle.write("colorPanel = brewer.pal(n=%d, name=\\"Set1\\")\\n" % (self.nbColors))\n-        scriptHandle.write("%s(%s = \\"%s\\", width = %d, height = %d, bg = \\"white\\")\\n" % (self.format, "filename" if self.format != "pdf" else "file", self.fileName, self.width, self.height))\n-        scriptHandle.write(self.getScript())\n-        scriptHandle.write("dev.off()\\n")\n-        scriptHandle.close()\n-        rCommand = "R"\n-        if "SMARTRPATH" in os.environ:\n-            rCommand = os.environ["SMARTRPATH"]\n-        command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n-        status = subprocess.call(command, shell=True)\n-\n-        if status != 0:\n-            self.keep = True\n-            raise Exception("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))\n-            \n-\n-    def getCorrelationData(self):\n-        if not self.regression:\n-            return ""\n-        scriptFileName = "tmpScript-%d.R" % (self.seed)\n-        rScript = open(scriptFileName, "w")\n-        rScript.write("data = scan(\\"tmpData-%d-0.dat\\", list(x = -0.000000, y = -0.000000))\\n" % (self.seed))\n-        x = "log10(data$x)" if self.logX else "data$x"\n-        y = "log10(data$y)" if self.logY else "data$y"\n-        rScript.write("summary(lm(%s ~ %s))\\n" % (y, x))\n-        rScript.close()\n-        rCommand = "R"\n-        if "SMARTRPATH" in os.environ:\n-            rCommand = os.environ["SMARTRPATH"]\n-        command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n-        status = subprocess.call(command, shell=True)\n-        if status != 0:\n-            self.keep = True\n-            raise Exception("Problem with the execution of script file %s computing the correlation, status is: %s" % (scriptFileName, status))\n-        outputRFile = open("%sout" % (scriptFileName))\n-        output      = ""\n-        start       = False\n-        end         = False\n-        for line in outputRFile:\n-            if start and "> " in line:\n-                end = True\n-            if start and not end:\n-                output += line\n-            if "summary" in line:\n-                start = True\n-        return output\n-\n-\n-    def getSpearmanRho(self):\n-        """\n-        Get the Spearman rho correlation using R\n-        """\n-        return None\n-        if not self.points and not self.barplot and not self.heatPoints:\n-            raise Exception("Cannot compute Spearman rho correlation whereas not in \'points\' or \'bar\' mode.")\n-        \n-        scriptFileName = "tmpScript-%d.R" % (self.seed)\n-        rScript = open(scriptFileName, "w")\n-        rScript.write("library(Hmisc)\\n")\n-        rScript.write("data = scan(\\"tmpData-%d-0.dat\\", list(x = -0.000000, y = -0.000000))\\n" % (self.seed))\n-        rScript.write("spearman(data$x, data$y)\\n")\n-        rScript.close()\n-\n-        rCommand = "R"\n-        if "SMARTRPATH" in os.environ:\n-            rCommand = os.environ["SMARTRPATH"]\n-        command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n-        status = subprocess.call(command, shell=True)\n-\n-        if status != 0:\n-            self.keep = True\n-            raise Exception("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))\n-\n-        outputRFile = open("%sout" % (scriptFileName))\n-        nextLine = False\n-        for line in outputRFile:\n-            line = line.strip()\n-            if nextLine:\n-                if line == "NA":\n-                    return None\n-                return float(line)\n-                nextLine = False\n-            if line == "rho":\n-                nextLine = True\n-\n-        return None\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/RPlotter.pyc
b
Binary file SMART/Java/Python/misc/RPlotter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/UnlimitedProgress.py
--- a/SMART/Java/Python/misc/UnlimitedProgress.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,81 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-import time
-
-class UnlimitedProgress(object):
-    """Show the progress of a process when no upper bound is known"""
-
-    def __init__(self, step = 1000, message = "Progress", verbosity = 0):
-        self.step           = step
-        self.progress       = 0
-        self.message        = message
-        self.verbosity      = verbosity
-        self.maxMessageSize = 50
-        self.startTime      = time.time()
-        self.elapsed        = 0
-        if len(self.message) > self.maxMessageSize:
-            self.message = self.message[0:self.maxMessageSize-3] + "..."
-        self.show()
-
-
-    def inc(self):
-        self.progress += 1
-        self.show()
-        
-        
-    def getPrintableElapsedTime(self, time):
-        timeHou = int(time) / 3600
-        timeMin = int(time) / 60 - 60 * timeHou
-        timeSec = int(time) % 60
-        if timeHou > 0:
-            return "%3dh %2dm" % (timeHou, timeMin)
-        if timeMin > 0:
-            return "%2dm %2ds" % (timeMin, timeSec)
-        return "%2ds" % (timeSec)
-
-
-    def show(self):
-        if self.verbosity <= 0:
-            return
-        elapsed = int(time.time() - self.startTime)
-        if (self.progress % self.step == 0) or (elapsed > self.elapsed + 10):
-            self.elapsed = elapsed            
-            string = "%s %d -- time spent: %s\r" % (self.message, self.progress, self.getPrintableElapsedTime(elapsed))
-            sys.stdout.write(string)
-            sys.stdout.flush()
-
-
-    def done(self):
-        if self.verbosity > 0:
-            elapsed = time.time() - self.startTime
-            string = "%s %d -- time spent: %s\r" % (self.message, self.progress, self.getPrintableElapsedTime(elapsed))
-            print string
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/UnlimitedProgress.pyc
b
Binary file SMART/Java/Python/misc/UnlimitedProgress.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/Utils.py
--- a/SMART/Java/Python/misc/Utils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,271 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""Some useful functions"""\n-\n-import sys, os\n-import random\n-import subprocess\n-\n-\n-def writeFile(fileName, content):\n-    """\n-    Write the content of a file\n-    """\n-    handle = open(fileName, "w")\n-    handle.write(content)\n-    handle.close()\n-\n-def sumOfLists(list1, list2):\n-    """\n-    Element by element sum\n-    """\n-    if len(list1) != len(list2):\n-        sys.exit("Cannot sum list whose sizes are different!")\n-    return [list1[i] + list2[i] for i in range(len(list1))]\n-\n-\n-def protectBackslashes(string):\n-    """\n-    Protect the backslashes in a path by adding another backslash\n-    """\n-    return string.replace("\\\\", "\\\\\\\\")\n-    \n-\n-def getHammingDistance(string1, string2):\n-    """\n-    Compute Hamming distance between two strings\n-    """\n-    if len(string1) != len(string2):\n-        raise Exception("Error, size of %s and %s differ" % (string1, string2))\n-    return sum(ch1 != ch2 for ch1, ch2 in zip(string1, string2))\n-\n-\n-def getLevenshteinDistance(string1, string2):\n-    """\n-    Compute Levenshtein distance between two strings\n-    """\n-    if len(string1) < len(string2):\n-        return getLevenshteinDistance(string2, string1)\n-    if not string1:\n-        return len(string2)\n-    previousRow = xrange(len(string2) + 1)\n-    for i, c1 in enumerate(string1):\n-        currentRow = [i + 1]\n-        for j, c2 in enumerate(string2):\n-            insertions    = previousRow[j + 1] + 1\n-            deletions     = currentRow[j] + 1\n-            substitutions = previousRow[j] + (c1 != c2)\n-            currentRow.append(min(insertions, deletions, substitutions))\n-        previousRow = currentRow\n-    return previousRow[-1]\n-\n-\n-def getMinAvgMedMax(values):\n-    """\n-    Get some stats about a dict\n-    @param values: a distribution (the value being the number of occurrences of the key)\n-    @type values: dict int to int\n-    @return: a tuple\n-    """\n-    minValues = min(values.keys())\n-    maxValues = max(values.keys())\n-    sumValues = sum([value * values[value] for value in values])\n-    nbValues = sum(values.values())\n-    allValues = []\n-    for key in values:\n-        for i in range(values[key]):\n-            allValues.append(key)\n-    sortedValues = sorted(allValues)\n-    sorted(values.values())\n-    if (nbValues % 2 == 0):\n-        medValues = (sortedValues[nbValues / 2 - 1] + sortedValues[nbValues / 2]) / 2.0\n-    else:\n-        medValues = sortedValues[(nbValues + 1) / 2 - 1]\n-    return (minValues, float('..b'les differ (%d != %d)" % (len(lines1), len(lines2))\n-        return False\n-    for i in xrange(len(lines1)):\n-        if lines1[i] != lines2[i]:\n-            print "Line %d differ (\'%s\' != \'%s\')" % (i, lines1[i].strip(), lines2[i].strip())\n-            return False\n-    return True\n-\n-\n-def binomialCoefficient(a, b):\n-    """\n-    Compute cumulated product from a to b\n-    @param a: a value\n-    @type    a: int\n-    @param b: a value\n-    @type    b: int\n-    """\n-    if a > b / 2:\n-        a = b-a\n-    p = 1.0\n-    for i in range(b-a+1, b+1):\n-        p *= i\n-    q = 1.0\n-    for i in range(1, a+1):\n-        q *= i\n-    return p / q\n-\n-\n-memory = {}\n-\n-# def fisherExactPValue(a, b, c, d):\n-#     """\n-#     P-value of Fisher exact test for 2x2 contingency table\n-#     """\n-#     if (a, b, c, d) in memory:\n-#         return memory[(a, b, c, d)]\n-\n-#     n = a + b + c + d\n-#     i1 = binomialCoefficient(a, a+b)\n-#     i2 = binomialCoefficient(c, a+c)\n-#     i3 = binomialCoefficient(c+d, n)\n-#     pValue = i1 * i2 / i3\n-\n-#     memory[(a, b, c, d)] = pValue\n-\n-#     return pValue\n-    \n-\n-def fisherExactPValue(a, b, c, d):\n-    if (a, b, c, d) in memory:\n-        return memory[(a, b, c, d)]\n-\n-    scriptFileName = "tmpScript-%d.R" % (random.randint(0, 10000))\n-    rScript = open(scriptFileName, "w")\n-    rScript.write("data = matrix(c(%d, %d, %d, %d), nr=2)\\n" % (a, b, c, d))\n-    rScript.write("fisher.test(data)\\n")\n-    #rScript.write("chisq.test(data)\\n")\n-    rScript.close()\n-\n-    rCommand = "R"\n-    if "SMARTRPATH" in os.environ:\n-        rCommand = os.environ["SMARTRPATH"]\n-    command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n-    status = subprocess.call(command, shell=True)\n-\n-    if status != 0:\n-        sys.exit("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))\n-\n-    outputRFileName = "%sout" % (scriptFileName)\n-    outputRFile = open(outputRFileName)\n-    pValue = None\n-    pValueTag = "p-value "\n-    for line in outputRFile:\n-        line = line.strip()\n-        if line == "": continue\n-        for splittedLine in line.split(","):\n-            splittedLine = splittedLine.strip()\n-            if splittedLine.startswith(pValueTag):\n-                pValue = float(splittedLine.split()[-1])\n-                break\n-\n-    if pValue == None:\n-        sys.exit("Problem with the cannot find p-value! File %s, values are: %d, %d, %d, %d" % (scriptFileName, a, b, c, d))\n-\n-    os.remove(scriptFileName)\n-    os.remove(outputRFileName)\n-\n-    memory[(a, b, c, d)] = pValue\n-\n-    return pValue\n-\n-\n-def fisherExactPValueBulk(list):\n-\n-    scriptFileName = "tmpScript-%d.R" % (random.randint(0, 10000))\n-    rScript = open(scriptFileName, "w")\n-    for element in list:\n-        rScript.write("fisher.test(matrix(c(%d, %d, %d, %d), nr=2))$p.value\\n" % (int(element[0]), int(element[1]), int(element[2]), int(element[3])))\n-    rScript.close()\n-\n-    rCommand = "R"\n-    if "SMARTRPATH" in os.environ:\n-        rCommand = os.environ["SMARTRPATH"]\n-    command = "\\"%s\\" CMD BATCH %s" % (rCommand, scriptFileName)\n-    status = subprocess.call(command, shell=True)\n-\n-    if status != 0:\n-        sys.exit("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))\n-\n-    outputRFileName = "%sout" % (scriptFileName)\n-    outputRFile = open(outputRFileName)\n-    pValue = None\n-    pValueTag = "[1] "\n-    results = {}\n-    cpt = 0\n-    for line in outputRFile:\n-        line = line.strip()\n-        if line == "": continue\n-        if line.startswith(pValueTag):\n-            pValue = float(line.split()[-1])\n-            results[list[cpt][0:2]] = pValue\n-            cpt += 1\n-\n-    if pValue == None:\n-        sys.exit("Problem with the cannot find p-value!")\n-    if cpt != len(list):\n-        sys.exit("Error in the number of p-values computed by R in file \'%s\'!" % (scriptFileName))\n-\n-    os.remove(scriptFileName)\n-    os.remove(outputRFileName)\n-\n-    return results\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/Utils.pyc
b
Binary file SMART/Java/Python/misc/Utils.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/__init__.pyc
b
Binary file SMART/Java/Python/misc/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/misc/test/Test_Utils.py
--- a/SMART/Java/Python/misc/test/Test_Utils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,13 +0,0 @@
-import unittest
-from SMART.Java.Python.misc import Utils
-
-
-class Test_Utils(unittest.TestCase):
-
-    def testFisherExactPValue(self):
-        self.assertAlmostEqual(Utils.fisherExactPValue(3, 1, 1, 3), 0.4857142857142842, 3)
-
-
-if __name__ == '__main__':
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/modifyFasta.py
--- a/SMART/Java/Python/modifyFasta.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,62 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Modify the content of a FASTA file"""
-
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import FastaParser
-from commons.core.writer.FastaWriter import FastaWriter
-from SMART.Java.Python.misc.Progress import Progress
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Modify Sequence List v1.0.1: Extend or shring a list of sequences. [Category: Data Modification]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",                     dest="inputFileName",    action="store",                                         type="string", help="input file [compulsory] [format: file in FASTA format]")
-    parser.add_option("-o", "--output",                    dest="outputFileName", action="store",            default=None,    type="string", help="output file [compulsory] [format: output file in FASTA format]")
-    parser.add_option("-s", "--start",                     dest="start",                    action="store",            default=None,    type="int",        help="keep first nucleotides [format: int]")
-    parser.add_option("-e", "--end",                         dest="end",                        action="store",            default=None,    type="int",        help="keep last nucleotides [format: int]")
-    parser.add_option("-v", "--verbosity",             dest="verbosity",            action="store",            default=1,         type="int",        help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    parser     = FastaParser(options.inputFileName, options.verbosity)
-    writer     = FastaWriter(options.outputFileName, options.verbosity)
-    progress = Progress(parser.getNbSequences(), "Reading file %s" % (options.inputFileName), options.verbosity)
-    for sequence in parser.getIterator():
-        if options.start != None:
-            sequence.shrinkToFirstNucleotides(options.start)
-        if options.end != None:
-            sequence.shrinkToLastNucleotides(options.end)
-        writer.addSequence(sequence)
-        progress.inc()
-    progress.done()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/modifyGenomicCoordinates.py
--- a/SMART/Java/Python/modifyGenomicCoordinates.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,80 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Modify the genomic coordinates of a file"""
-
-from optparse import OptionParser
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.misc.Progress import Progress
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Modify Genomic Coordinates v1.0.1: Extend or shrink a list of genomic coordinates. [Category: Data Modification]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",      dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",     dest="format",         action="store",               type="string", help="format of the input [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",     dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-s", "--start",      dest="start",          action="store", default=None, type="int",    help="restrict to the start of the transcript [format: int]")
-    parser.add_option("-e", "--end",        dest="end",            action="store", default=None, type="int",    help="restrict to the end of the transcript [format: int]")
-    parser.add_option("-5", "--fivePrime",  dest="fivePrime",      action="store", default=None, type="int",    help="extend to the 5' direction [format: int]")
-    parser.add_option("-3", "--threePrime", dest="threePrime",     action="store", default=None, type="int",    help="extend to the 3' direction [format: int]")
-    parser.add_option("-v", "--verbosity",  dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
-
-    (options, args) = parser.parse_args()
-
-    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
-        
-    writer = TranscriptWriter(options.outputFileName, "gff3", options.verbosity)
-
-    nbItems = 0
-    nbItems = parser.getNbItems()
-    print "%i items found" % (nbItems)
-
-    progress = Progress(nbItems, "Analyzing sequences of " + options.inputFileName, options.verbosity)
-    for transcript in parser.getIterator():
-        if options.start != None:
-            transcript.restrictStart(options.start)
-        if options.end != None:
-            transcript.restrictEnd(options.end)
-        if options.fivePrime != None:
-            transcript.extendStart(options.fivePrime)
-        if options.threePrime != None:
-            transcript.extendEnd(options.threePrime)
-
-        writer.addTranscript(transcript)
-
-        progress.inc()
-    progress.done()
-
-    writer.write()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/modifySequenceList.py
--- a/SMART/Java/Python/modifySequenceList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,72 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Modify the content of a FASTA file"""
-import sys
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import FastaParser
-from commons.core.parsing.FastqParser import FastqParser
-from commons.core.writer.FastaWriter import FastaWriter
-from commons.core.writer.FastqWriter import FastqWriter
-from SMART.Java.Python.misc.Progress import Progress
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Modify Sequence List v1.0.1: Extend or shring a list of sequences. [Category: Data Modification]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input", dest="inputFileName",action="store", type="string", help="input file [compulsory] [format: file in format given by -f]")
-    parser.add_option("-o", "--output", dest="outputFileName", action="store",default=None,    type="string", help="output file [compulsory] [format: output file in format given by -f]")
-    parser.add_option("-f", "--format", dest="format",action="store",type="string", help="format of the file [compulsory] [format: sequence file format]")
-    parser.add_option("-s", "--start", dest="start", action="store", default=None,type="int",help="keep first nucleotides [format: int]")
-    parser.add_option("-e", "--end",  dest="end", action="store",default=None,type="int",help="keep last nucleotides [format: int]")
-    parser.add_option("-v", "--verbosity",dest="verbosity",action="store",default=1,type="int",help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    if options.format == "fasta":
-        parser = FastaParser(options.inputFileName, options.verbosity)
-        writer = FastaWriter(options.outputFileName, options.verbosity)
-    elif options.format == "fastq":
-        parser = FastqParser(options.inputFileName, options.verbosity)
-        writer = FastqWriter(options.outputFileName, options.verbosity)
-    else:
-        sys.exit("Do not understand '%s' file format." % (options.format))
-
-    progress = Progress(parser.getNbSequences(), "Reading file %s" % (options.inputFileName), options.verbosity)
-    for sequence in parser.getIterator():
-        if options.start != None:
-            sequence.shrinkToFirstNucleotides(options.start)
-        if options.end != None:
-            sequence.shrinkToLastNucleotides(options.end)
-        writer.addSequence(sequence)
-        progress.inc()
-    progress.done()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlConnection.py
--- a/SMART/Java/Python/mySql/MySqlConnection.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,137 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-#! /usr/bin/env python
-import os
-import random
-import sqlite3
-from SMART.Java.Python.mySql.MySqlQuery import MySqlQuery
-
-
-class MySqlConnection(object):
-    """Connection to a database"""
-
-    def __init__(self, verbosity = 0):
-        self.verbosity = verbosity
-        self.databaseName = os.path.join(os.environ.get("SMARTTMPPATH", "."), "smartdb%d" % random.randint(0, 100000))
-        self.connection = sqlite3.connect(self.databaseName)
-        self.executeQuery("PRAGMA journal_mode = OFF")
-        self.executeQuery("PRAGMA synchronous = 0")
-        self.executeQuery("PRAGMA locking_mode = EXCLUSIVE")
-        self.executeQuery("PRAGMA count_change = OFF")
-        self.executeQuery("PRAGMA temp_store = 2")
-
-    def __del__(self):
-        self.connection.close()
-
-
-    def createDatabase(self):
-        pass
-
-
-    def deleteDatabase(self):
-        if os.path.exists(self.databaseName):
-            os.remove(self.databaseName)
-
-
-    def executeQuery(self, command, insertion = False):
-        cursor = self.connection.cursor()
-        query = MySqlQuery(cursor, self.verbosity)
-        try:
-            result = query.execute(command, insertion)
-            self.connection.commit()
-        except:
-            result = query.execute(command, insertion)
-            self.connection.commit()
-        if insertion:
-            return result
-        else:
-            return query
-
-
-    def executeManyQueries(self, commands):
-        cursor = self.connection.cursor()
-        query = MySqlQuery(cursor, self.verbosity)
-        try:
-            for cpt, command in enumerate(commands):
-                query.execute(command)
-            self.connection.commit()
-        except:
-            for cpt, command in enumerate(commands):
-                query.execute(command)
-            self.connection.commit()
-
-
-    def executeManyFormattedQueries(self, command, lines, insertion = False):
-        cursor = self.connection.cursor()
-        query = MySqlQuery(cursor, self.verbosity)
-        for line in lines:
-            result = query.executeFormat(command, line)
-        self.connection.commit()
-        if insertion:
-            return result
-        else:
-            return query
-
-
-    def executeManyQueriesIterator(self, table):
-        cursor = self.connection.cursor()
-        query = MySqlQuery(cursor, self.verbosity)
-        try:
-            for command in table.getIterator():
-                query.execute(command)
-            self.connection.commit()
-        except:
-            for command in table.getIterator():
-                query.execute(command)
-            self.connection.commit()
-
-
-    def executeManyFormattedQueriesIterator(self, table):
-        cursor = self.connection.cursor()
-        query = MySqlQuery(cursor, self.verbosity)
-        try:
-            for command, values in table.getIterator():
-                query.executeFormat(command, values)
-            self.connection.commit()
-        except:
-            for command, values in table.getIterator():
-                query.execute(command, values)
-            self.connection.commit()
-
-
-    def executeFormattedQuery(self, command, parameters, insertion = False):
-        cursor = self.connection.cursor()
-        query = MySqlQuery(cursor, self.verbosity)
-        result = query.executeFormat(command, parameters)
-        self.connection.commit()
-        if insertion:
-            return result
-        else:
-            return query
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlConnection.pyc
b
Binary file SMART/Java/Python/mySql/MySqlConnection.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlExonTable.py
--- a/SMART/Java/Python/mySql/MySqlExonTable.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,97 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import random
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.mySql.MySqlTable import MySqlTable
-
-
-class MySqlExonTable(MySqlTable):
-    """A table of exon in a mySQL database"""
-
-    def __init__(self, connection, name = None, chromosome = None, verbosity = 0):
-        if chromosome == None:
-            chromosome = ""
-        else:
-            chromosome = "_%s" % chromosome
-        if name == None:
-            name = "TmpTable_%d" % (random.randint(0, 100000))
-        name = "%s%s_exons" % (name, chromosome)
-        super(MySqlExonTable, self).__init__(connection, name, verbosity)
-
-
-    def createExonTable(self):
-        variables = Interval.getSqlVariables()
-        variables.append("transcriptId")
-        types = Interval.getSqlTypes()
-        types["transcriptId"] = "int"
-        sizes = Interval.getSqlSizes()
-        sizes["transcriptId"] = 11
-        self.create(variables, types, sizes)
-
-
-    def rename(self, name):
-        super(MySqlExonTable, self).rename("%s_exons" % name)
-    
-        
-    def addExon(self, exon, transcriptId):
-        values = exon.getSqlValues()
-        values["transcriptId"] = transcriptId
-        id = self.addLine(values)
-        exon.id = id
-
-
-    def retrieveExonsFromTranscriptId(self, transcriptId):
-        if not self.created:
-            return []
-        query = self.mySqlConnection.executeQuery("SELECT * FROM %s WHERE transcriptId = %d" % (self.name, transcriptId))
-        exons = []
-        for exonLine in query.getIterator():
-            exon = Interval()
-            exon.setSqlValues(exonLine)
-            exons.append(exon)
-        return exons
-            
-
-    def retrieveExonsFromBulkTranscriptIds(self, transcriptIds):
-        if not transcriptIds:
-            return {}
-        if not self.created:
-            return {}
-        exons = dict([(transcriptId, []) for transcriptId in transcriptIds])
-        query = self.mySqlConnection.executeQuery("SELECT * FROM %s WHERE transcriptId IN (%s)" % (self.name, ", ".join(["%s" % (transcriptId) for transcriptId in transcriptIds])))
-        for exonLine in query.getIterator():
-            exon = Interval()
-            exon.setSqlValues(exonLine)
-            exons[exonLine[-1]].append(exon)
-        return exons
-            
-
-    def removeFromTranscriptId(self, transcriptId):
-        self.mySqlConnection.executeQuery("DELETE FROM %s WHERE transcriptId = %d" % (self.name, transcriptId))
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlExonTable.pyc
b
Binary file SMART/Java/Python/mySql/MySqlExonTable.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlQuery.py
--- a/SMART/Java/Python/mySql/MySqlQuery.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,94 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-class MySqlQuery(object):
-    """Query to a database"""
-
-    def __init__(self, cursor, verbosity = 0):
-        self.verbosity = verbosity
-        self.cursor = cursor
-        self.insertedId = None
-
-
-    def __del__(self):
-        self.cursor.close()
-        
-        
-    def execute(self, query, insertion = False):
-        if self.verbosity > 99:
-            print "Querying %s" % (query)
-        try:
-            results = self.cursor.execute(query)
-        except Exception:
-            raise Exception("Error! Command \"%s\" failed!" % (query))
-        if insertion:
-            return self.cursor.lastrowid
-        return results
-
-
-    def executeFormat(self, query, parameters):
-        if self.verbosity > 99:
-            print "Querying %s |" % (query),
-            for parameter in parameters:
-                print parameter,
-            print
-        results = self.cursor.execute(query, parameters)
-        return results
-
-
-    def getLine(self):
-        return self.cursor.fetchone()
-
-
-    def getLines(self, lines = None):
-        if lines == None:
-            return self.cursor.fetchall()
-        return self.cursor.fetchmany(lines)
-
-
-    def isEmpty(self):
-        self.getLines()
-        return self.cursor.rowcount == None or self.cursor.rowcount == 0
-    
-
-    def getInsertedId(self):
-        return self.insertedId
-
-
-    def getIterator(self):
-        line = self.getLine()
-        while line != None:
-            yield line
-            line = self.getLine()
-         
-
-    def show(self):
-        for line in self.getIterator():
-            print line
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlQuery.pyc
b
Binary file SMART/Java/Python/mySql/MySqlQuery.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlTable.py
--- a/SMART/Java/Python/mySql/MySqlTable.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,349 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import re\n-import sys\n-\n-class MySqlTable(object):\n-    """\n-    Store a table of a mySQL database, used for transcripts or exons\n-    Record a a name and a type (int, float, double) for each column\n-    @ivar name:            name of the table\n-    @type name:            string\n-    @ivar variables:       name of the columns\n-    @type variables:       list of string\n-    @ivar types:           type of the columns\n-    @type types:           dict of string\n-    @ivar mySqlConnection: connection to a database\n-    @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}\n-    @ivar nbLines:         number of rows\n-    @type nbLines:         int\n-    @ivar verbosity:       verbosity\n-    @type verbosity:       int\n-    """\n-\n-    def __init__(self, connection, name, verbosity = 0):\n-        """\n-        Constructor\n-        Possibly retrieve column names and types if table exists\n-        @param mySqlConnection: connection to a databas\n-        @type  mySqlConnection: class L{MySqlConnection<MySqlConnection>}\n-        @param name:            name of the table\n-        @type  name:            string\n-        @param verbosity:       verbosity\n-        @type  verbosity:       int\n-        """\n-        self.name      = name\n-        self.variables = []\n-        self.types     = {}\n-        self.sizes     = {}\n-        self.nbLines   = None\n-        self.verbosity = verbosity\n-        self.mySqlConnection = connection\n-        queryTables = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE \'table\' AND name LIKE \'%s\'" % (self.name))\n-        self.created = not queryTables.isEmpty()\n-        if self.created:\n-            queryFields = self.mySqlConnection.executeQuery("PRAGMA table_info(\'%s\')" % (name))\n-            for field in queryFields.getIterator():\n-                if field[1] != "id":\n-                    self.variables.append(field[1])\n-                    self.types[field[1]] = field[2]\n-                    self.sizes[field[1]] = field[3]\n-                    \n-                    \n-    def getName(self):\n-        return self.name\n-\n-\n-    def create(self, variables, types, sizes):\n-        """\n-        Create a table using give column names and types\n-        @param variables: names of the columns\n-        @type  variables: list of string\n-        @param types:     types of the columns\n-        @type  types:     dict of string\n-        @param sizes:     sizes of the types\n-        @type  size'..b', id):\n-        """\n-        Retrieve a row from its id\n-        @param id: the id of the row\n-        @type  id: int\n-        @return:   the row\n-        """\n-        query = self.mySqlConnection.executeQuery("SELECT * FROM \'%s\' WHERE id = %d" % (self.name, id))\n-        result = query.getLine()\n-        if result == None:\n-            raise Exception("Error! Id %d is not in the table %s!" % (id, self.name))\n-        return result\n-\n-\n-    def retrieveBulkFromId(self, ids):\n-        """\n-        Retrieve a row from its id\n-        @param id: the ids of the row\n-        @type  id: list of int\n-        @return:   the row\n-        """\n-        if not ids:\n-            return []\n-        MAXSIZE = 1000\n-        results = []\n-        for batch in range(len(ids) / MAXSIZE + 1):\n-            theseIds = ids[batch * MAXSIZE : (batch+1) * MAXSIZE]\n-            if theseIds:\n-                query = self.mySqlConnection.executeQuery("SELECT * FROM \'%s\' WHERE id IN (%s)" % (self.name, ", ".join(["%d" % (id) for id in theseIds])))\n-                lines = query.getLines()\n-                if len(lines) != len(theseIds):\n-                    raise Exception("Error! Some Ids of (%s) is are missing in the table \'%s\' (got %d instead of %d)!" % (", ".join(["%d" % (id) for id in theseIds]), self.name, len(lines)), len(theseIds))\n-                results.extend(lines)\n-        return results\n-\n-\n-    def removeFromId(self, id):\n-        """\n-        Remove a row from its id\n-        @param id: the id of the row\n-        @type  id: int\n-        """\n-        self.mySqlConnection.executeQuery("DELETE FROM \'%s\' WHERE id = %d" % (self.name, id))\n-    \n-    \n-    def getIterator(self):\n-        """\n-        Iterate on the content of table\n-        @return: iterator to the rows of the table\n-        """\n-        if not self.created:\n-            return\n-        MAXSIZE = 1000\n-        query = self.mySqlConnection.executeQuery("SELECT count(id) FROM \'%s\'" % (self.name))\n-        nbRows = int(query.getLine()[0])\n-        for chunk in range((nbRows / MAXSIZE) + 1):\n-            query = self.mySqlConnection.executeQuery("SELECT * FROM \'%s\' LIMIT %d, %d" % (self.name, chunk * MAXSIZE, MAXSIZE))\n-            for line in query.getIterator():\n-                yield line\n-\n-\n-    def createIndex(self, indexName, values, unique = False, fullText = False):\n-        """\n-        Add an index on the table\n-        @param indexName: name of the index\n-        @type  indexName: string\n-        @param values:    values to be indexed\n-        @type  values:    string\n-        @param unique:    if the index is unique\n-        @type  unique:    boolean\n-        @param fullText:  whether full text should be indexed\n-        @type  fullText:  boolean\n-        """\n-        self.mySqlConnection.executeQuery("CREATE %s%sINDEX \'%s\' ON \'%s\' (%s)" % ("UNIQUE " if unique else "", "FULLTEXT " if fullText else "", indexName, self.name, ", ".join(values)))\n-\n-\n-    def setDefaultTagValue(self, field, name, value):\n-        """\n-        Add a tag value\n-        @param name:  name of the tag\n-        @type  name:  string\n-        @param value: value of the tag\n-        @type  value: string or int\n-        """\n-        newData = {}\n-        for line in MySqlTable.getIterator(self):\n-            id = line[0]\n-            tags = line[field]\n-            if tags == \'\':\n-                newTag = "%s=%s" % (name, value)\n-            else:\n-                newTag = "%s;%s=%s" % (tags, name, value)\n-            if name not in [tag.split("=")[0] for tag in tags.split(";")]:\n-                newData[id] = newTag\n-        for id, tag in newData.iteritems():\n-            query = self.mySqlConnection.executeQuery("UPDATE \'%s\' SET tags = \'%s\' WHERE id = %i" % (self.name, tag, id))\n-\n-\n-\n-    def show(self):\n-        """\n-        Drop the content of the current table\n-        """\n-        query = self.mySqlConnection.executeQuery("SELECT * FROM \'%s\'" % (self.name))\n-        print query.getLines()\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlTable.pyc
b
Binary file SMART/Java/Python/mySql/MySqlTable.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlTranscriptTable.py
--- a/SMART/Java/Python/mySql/MySqlTranscriptTable.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,149 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import random
-import sys
-from SMART.Java.Python.structure.TranscriptList import TranscriptList
-from SMART.Java.Python.mySql.MySqlExonTable import MySqlExonTable
-from SMART.Java.Python.mySql.MySqlTable import MySqlTable
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.misc.Progress import Progress
-
-class MySqlTranscriptTable(MySqlTable):
-    """A table of transcripts in a mySQL database"""
-
-    def __init__(self, connection, name = None, chromosome = None, verbosity = 0):
-        if chromosome == None:
-            chromosome = ""
-        else:
-            chromosome = "_%s" % chromosome
-        if name == None:
-            name = "TmpTable_%d" % (random.randint(0, 100000))
-        name = "%s%s" % (name, chromosome)
-        super(MySqlTranscriptTable, self).__init__(connection, "%s_transcripts" % name, verbosity)
-
-
-    def createTranscriptTable(self):
-        self.create(Transcript.getSqlVariables(), Transcript.getSqlTypes(), Transcript.getSqlSizes())
-
-
-    def rename(self, name):
-        super(MySqlTranscriptTable, self).rename("%s_transcripts" % name)
-
-
-    def remove(self):
-        super(MySqlTranscriptTable, self).remove()
-        
-        
-    def clear(self):
-        super(MySqlTranscriptTable, self).clear()
-
-        
-    def copy(self, transcriptTable):
-        self.remove()
-        super(MySqlTranscriptTable, self).copy(transcriptTable)
-
-
-    def add(self, transcriptTable):
-        super(MySqlTranscriptTable, self).add(transcriptTable)
-
-
-    def addTranscript(self, transcript):
-        id = self.addLine(transcript.getSqlValues())
-        transcript.id = id
-            
-            
-    def addTranscriptList(self, transcriptList):
-        progress = Progress(transcriptList.getNbTranscript(), "Storing list to %s" % (self.name), self.verbosity)
-        for transcript in transcriptList.getIterator():
-            self.addTranscript(transcript)
-            progress.inc()
-        progress.done()
-
-            
-    def removeTranscript(self, transcript):
-        self.removeFromId(transcript.id)
-            
-            
-    def retrieveTranscriptFromId(self, id):
-        transcript = Transcript()
-        transcript.setSqlValues(self.retrieveFromId(id))
-        return transcript
-    
-    
-    def retrieveBulkTranscriptFromId(self, ids):
-        if not ids:
-            return []
-        transcripts = self.retrieveBulkFromId(ids)
-        idsToTranscripts = {}
-        for values in transcripts:
-            transcript = Transcript()
-            transcript.setSqlValues(values)
-            idsToTranscripts[values[0]] = transcript
-        return idsToTranscripts.values()
-    
-    
-    def selectTranscripts(self, command, simple = False):
-        MAXSIZE = 100000
-        found   = True
-        cpt     = 0
-        while found:
-            found = False
-            if simple:
-                thisCommand = command
-            else:
-                thisCommand = "%s LIMIT %d OFFSET %d" % (command, MAXSIZE, MAXSIZE * cpt)
-            query = self.mySqlConnection.executeQuery(thisCommand)
-            for line in query.getIterator():
-                found      = True
-                id         = int(line[0])
-                transcript = Transcript()
-                transcript.setSqlValues(line)
-                yield (id, transcript)
-            cpt += 1
-            if simple:
-                return
-
-    
-    def getIterator(self):
-        for id, transcript in self.selectTranscripts("SELECT * FROM '%s'" % (self.name)):
-            yield transcript
-
-
-    def retrieveTranscriptList(self):
-        transcriptList = TranscriptList()
-        for transcriptLine in self.getLines():
-            transcript = Transcript()
-            transcript.setSqlValues(transcriptLine)
-            transcriptList.addTranscript(transcript)
-        return transcriptList
-            
-
-    def setDefaultTagValue(self, name, value):
-        super(MySqlTranscriptTable, self).setDefaultTagValue(Transcript.getSqlVariables().index("tags")+1, name, value)
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/MySqlTranscriptTable.pyc
b
Binary file SMART/Java/Python/mySql/MySqlTranscriptTable.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/__init__.pyc
b
Binary file SMART/Java/Python/mySql/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/mySql/test/Test_MySqlTranscriptTable.py
--- a/SMART/Java/Python/mySql/test/Test_MySqlTranscriptTable.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,158 +0,0 @@
-from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection
-from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable
-import unittest
-
-
-class Test_MySqlTranscriptTable(unittest.TestCase):
-  
-    def test_getRange(self):
-        transcript = Transcript()
-        transcript.setName("test1.1")
-        transcript.setChromosome("arm_X")
-        transcript.setStart(1000)
-        transcript.setEnd(4000)
-        transcript.setSize(2000)
-        transcript.setDirection("+")
-        
-        exon1 = Interval()
-        exon1.setName("test1.1")
-        exon1.setChromosome("arm_X")
-        exon1.setStart(1000)
-        exon1.setEnd(2000)
-        exon1.setSize(1000)
-        
-        exon2 = Interval()
-        exon2.setName("test1.1")
-        exon2.setChromosome("arm_X")
-        exon2.setStart(3000)
-        exon2.setEnd(4000)
-        exon2.setSize(1000)
-        
-        transcript.addExon(exon1)
-        transcript.addExon(exon2)
-        
-        connection = MySqlConnection()
-        writer = MySqlTranscriptWriter(connection, "testMySqlTranscriptTableGetRange")
-        writer.addTranscript(transcript)
-        writer.write()
-        
-        transcriptContainer = TranscriptContainer("testMySqlTranscriptTableGetRange", "sql")
-        transcriptContainer.mySqlConnection = connection
-        self.assertEqual(transcriptContainer.getNbTranscripts(), 1)
-        for transcript in transcriptContainer.getIterator():
-            self.assertEqual(transcript.getName(), "test1.1")
-            self.assertEqual(transcript.getChromosome(), "arm_X")
-            self.assertEqual(transcript.getStart(), 1000)
-            self.assertEqual(transcript.getEnd(), 4000)
-            self.assertEqual(transcript.getSize(), 2002)
-            self.assertEqual(transcript.getNbExons(), 2)
-            exons = transcript.getExons()
-            self.assertEqual(exons[0].getStart(), 1000)
-            self.assertEqual(exons[0].getEnd(), 2000)
-            self.assertEqual(exons[1].getStart(), 3000)
-            self.assertEqual(exons[1].getEnd(), 4000)
-    
-    
-    def test_setDefaultTagValue(self):
-        transcript1 = Transcript()
-        transcript1.setName("test1.1")
-        transcript1.setChromosome("arm_X")
-        transcript1.setStart(1000)
-        transcript1.setEnd(2000)
-        transcript1.setDirection("+")
-        
-        exon1 = Interval()
-        exon1.setName("test1.1")
-        exon1.setChromosome("arm_X")
-        exon1.setStart(1000)
-        exon1.setEnd(2000)
-        
-        transcript1.addExon(exon1)
-        
-        transcript2 = Transcript()
-        transcript2.setName("test2.1")
-        transcript2.setChromosome("arm_X")
-        transcript2.setStart(1000)
-        transcript2.setEnd(2000)
-        transcript2.setDirection("+")
-        transcript2.setTagValue("nbOccurrences", "2")
-        
-        exon2 = Interval()
-        exon2.setName("test2.1")
-        exon2.setChromosome("arm_X")
-        exon2.setStart(1000)
-        exon2.setEnd(2000)
-        
-        transcript2.addExon(exon2)
-        
-        transcript3 = Transcript()
-        transcript3.setName("test3.1")
-        transcript3.setChromosome("arm_X")
-        transcript3.setStart(1000)
-        transcript3.setEnd(2000)
-        transcript3.setDirection("+")
-        transcript3.setTagValue("occurrences", "2")
-        
-        exon3 = Interval()
-        exon3.setName("test3.1")
-        exon3.setChromosome("arm_X")
-        exon3.setStart(1000)
-        exon3.setEnd(2000)
-        
-        transcript3.addExon(exon3)
-        
-        connection = MySqlConnection()
-        table      = MySqlTranscriptTable(connection, "testMySqlTranscriptTableSetDefaultTagValue")
-        table.createTranscriptTable()
-        table.addTranscript(transcript1)
-        table.addTranscript(transcript2)
-        table.addTranscript(transcript3)
-        table.setDefaultTagValue("occurrence", "1")
-        
-        cpt = 0
-        for transcript in table.getIterator():
-            cpt += 1
-            self.assert_(cpt != 4)
-            if cpt == 1:
-                self.assertEqual(transcript.name, "test1.1")
-                self.assertEqual(transcript.getChromosome(), "arm_X")
-                self.assertEqual(transcript.getStart(), 1000)
-                self.assertEqual(transcript.getEnd(), 2000)
-                self.assertEqual(transcript.getSize(), 1001)
-                self.assertEqual(transcript.getNbExons(), 1)
-                exons = transcript.getExons()
-                self.assertEqual(exons[0].getStart(), 1000)
-                self.assertEqual(exons[0].getEnd(), 2000)
-                self.assertEqual(transcript.getTagValue("occurrence"), 1)
-            elif cpt == 2:
-                self.assertEqual(transcript.name, "test2.1")
-                self.assertEqual(transcript.getChromosome(), "arm_X")
-                self.assertEqual(transcript.getStart(), 1000)
-                self.assertEqual(transcript.getEnd(), 2000)
-                self.assertEqual(transcript.getSize(), 1001)
-                self.assertEqual(transcript.getNbExons(), 1)
-                exons = transcript.getExons()
-                self.assertEqual(exons[0].getStart(), 1000)
-                self.assertEqual(exons[0].getEnd(), 2000)
-                self.assertEqual(transcript.getTagValue("nbOccurrences"), 2)
-                self.assertEqual(transcript.getTagValue("occurrence"), 1)
-            elif cpt == 2:
-                self.assertEqual(transcript.name, "test3.1")
-                self.assertEqual(transcript.getChromosome(), "arm_X")
-                self.assertEqual(transcript.getStart(), 1000)
-                self.assertEqual(transcript.getEnd(), 2000)
-                self.assertEqual(transcript.getSize(), 1001)
-                self.assertEqual(transcript.getNbExons(), 1)
-                exons = transcript.getExons()
-                self.assertEqual(exons[0].getStart(), 1000)
-                self.assertEqual(exons[0].getEnd(), 2000)
-                self.assertEqual(transcript.getTagValue("occurrence"), 2)
-      
-        table.remove()
-
-if __name__ == '__main__':
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/ConvertToNCList.py
--- a/SMART/Java/Python/ncList/ConvertToNCList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,172 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2012
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-import random, os, time, shutil
-from optparse import OptionParser
-from commons.core.parsing.ParserChooser import ParserChooser
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.ncList.NCList import NCList
-from SMART.Java.Python.ncList.NCListCursor import NCListCursor
-from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle
-from SMART.Java.Python.ncList.FileSorter import FileSorter
-from SMART.Java.Python.ncList.NCListMerger import NCListMerger
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-try:
-   import cPickle as pickle
-except:
-   import pickle
-
-class ConvertToNCList(object):
-
- def __init__(self, verbosity = 1):
- self._parsers   = {}
- self._sortedFileNames   = {}
- self._inputFileName       = None
- self._outputFileName   = None
- self._index       = False
- self._ncLists   = {}
- self._splittedFileNames       = {}
- self._nbElements   = 0
- self._nbElementsPerChromosome = {}
- self._randomNumber   = random.randint(0, 10000)
- self._sorted                  = False
- self._verbosity       = verbosity
-
- def setInputFileName(self, fileName, format):
- self._inputFileName = fileName
- chooser = ParserChooser(self._verbosity)
- chooser.findFormat(format)
- self._parser = chooser.getParser(fileName)
-
- def setOutputFileName(self, fileName):
- self._outputFileName = fileName
- fileNameNoExtension  = os.path.splitext(fileName)[0]
- baseName  = "%s_%d" % (fileNameNoExtension, self._randomNumber)
- self._directory      = "%s_files" % (baseName)
- if not os.path.exists(self._directory):
- os.makedirs(self._directory)
- self._sortedFileNames = os.path.join(self._directory, baseName)
-
- def setIndex(self, boolean):
- self._index = boolean
-
- def setSorted(self, boolean):
- self._sorted = boolean
-
- def sortFile(self):
- if self._verbosity > 2:
- print "%s file %s..." % ("Rewriting" if self._sorted else "Sorting", self._inputFileName)
- startTime = time.time()
- fs = FileSorter(self._parser, self._verbosity-4)
- fs.setPresorted(self._sorted)
- fs.perChromosome(True)
- fs.setOutputFileName(self._sortedFileNames)
- fs.sort()
- self._splittedFileNames       = fs.getOutputFileNames()
- self._nbElementsPerChromosome = fs.getNbElementsPerChromosome()
- self._nbElements   = fs.getNbElements()
- endTime = time.time()
- if self._verbosity > 2:
- print " ...done (%ds)" % (endTime - startTime)
-
- def createNCLists(self):
- self._ncLists = {}
- if self._verbosity > 2:
- print "Creating NC-list for %s..." % (self._inputFileName)
- startTime = time.time()
- for chromosome, fileName in self._splittedFileNames.iteritems():
- if self._verbosity > 3:
- print "  chromosome %s" % (chromosome)
- ncList = NCList(self._verbosity)
- if self._index:
- ncList.createIndex(True)
- ncList.setChromosome(chromosome)
- ncList.setFileName(fileName)
- ncList.setNbElements(self._nbElementsPerChromosome[chromosome])
- ncList.buildLists()
- self._ncLists[chromosome] = ncList
- endTime = time.time()
- if self._verbosity > 2:
- print " ...done (%ds)" % (endTime - startTime)
-
- def writeOutputFile(self):
- merger = NCListMerger(self._verbosity)
- merger.setFileName(self._outputFileName)
- merger.addIndex(self._index)
- merger.setNCLists(self._ncLists)
- merger.merge()
-
- def cleanFiles(self):
- shutil.rmtree(self._directory)
-
- def run(self):
- self.sortFile()
- self.createNCLists()
- self.writeOutputFile()
- self.cleanFiles()
-
- def getSortedFileNames(self):
- return self._splittedFileNames
-
- def getNbElements(self):
- return self._nbElements
-
- def getNbElementsPerChromosome(self):
- return self._nbElementsPerChromosome
-
- def getNCLists(self):
- return self._ncLists
-
- def getTmpDirectory(self):
- return self._directory
-
-
-if __name__ == "__main__":
- description = "Convert To NC-List v1.0.0: Convert a mapping or transcript file into a NC-List. [Category: NC-List]"
-
- parser = OptionParser(description = description)
- parser.add_option("-i", "--input",    dest="inputFileName",  action="store",   type="string",  help="Query input file [compulsory] [format: file in transcript format given by -f]")
- parser.add_option("-f", "--format",    dest="format",   action="store",   type="string",  help="format of previous file [compulsory] [format: transcript file format]")
- parser.add_option("-d", "--index",    dest="index",   action="store_true", default=False,   help="create an index [default: false] [format: boolean]")
- parser.add_option("-o", "--output",    dest="outputFileName", action="store",   type="string",  help="Output file [compulsory] [format: output file in NCList format]")
- parser.add_option("-s", "--sorted",    dest="sorted",       action="store_true", default=False,               help="input file is already sorted [format: boolean] [default: False]")
- parser.add_option("-v", "--verbosity", dest="verbosity",   action="store",    default=1,   type="int",   help="Trace level [format: int] [default: 1]")
- (options, args) = parser.parse_args()
-
- ctncl = ConvertToNCList(options.verbosity)
- ctncl.setInputFileName(options.inputFileName, options.format)
- ctncl.setOutputFileName(options.outputFileName)
- ctncl.setIndex(options.index)
- ctncl.setSorted(options.sorted)
- ctncl.run()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/ConvertToNCList.pyc
b
Binary file SMART/Java/Python/ncList/ConvertToNCList.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/FileSorter.py
--- a/SMART/Java/Python/ncList/FileSorter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,210 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-try:
- import cPickle as pickle
-except:
- import pickle
-import random, os
-from heapq import heapify, heappop, heappush
-from itertools import islice, cycle
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-BUFFER_SIZE = 100 * 1024
-
-class FileSorter(object):
-
- def __init__(self, parser, verbosity = 1):
- self._parser   = parser
- self._verbosity       = verbosity
- self._chunks   = {}
- self._nbElements   = 0
- self._nbElementsPerChromosome = {}
- self._perChromosome       = False
- self._isPreSorted             = False
- self._outputFileNames   = {}
- self._prefix   = "tmpFile_%d" % (random.randint(0, 100000))
- self._chromosome   = None
- if "SMARTTMPPATH" in os.environ:
- self._prefix = os.path.join(os.environ["SMARTTMPPATH"], self._prefix)
-
- def selectChromosome(self, chromosome):
- self._chromosome = chromosome
-
- def perChromosome(self, boolean):
- self._perChromosome = boolean
-
- def setOutputFileName(self, fileName):
- self._outputFileName = fileName
- if self._perChromosome:
- self._outputFileName = os.path.splitext(self._outputFileName)[0]
-
- def setPresorted(self, presorted):
- self._isPreSorted = presorted
-
- def sort(self):
- if not self._isPreSorted:
- self._batchSort()
- else:
- self._presorted()
-
- def _presorted(self):
- progress = UnlimitedProgress(1000, "Writing files %s" % (self._parser.fileName), self._verbosity)
- curChromosome = None
- outputHandle  = None
-
- if not self._perChromosome:
- outputHandle = open(self._outputFileName, "wb")
- for transcript in self._parser.getIterator():
- progress.inc()
- if transcript.__class__.__name__ == "Mapping":
- transcript = transcript.getTranscript()
- chromosome = transcript.getChromosome()
- if self._chromosome != None and chromosome != self._chromosome:
- continue
- self._nbElements += 1
- self._nbElementsPerChromosome[chromosome] = self._nbElementsPerChromosome.get(chromosome, 0) + 1
- if self._perChromosome:
- if chromosome != curChromosome:
- if outputHandle != None:
- outputHandle.close()
- self._outputFileNames[chromosome] = "%s_%s.pkl" % (self._outputFileName, chromosome)
- outputHandle  = open(self._outputFileNames[chromosome], "wb")
- curChromosome = chromosome
- outputHandle.writelines("%s" % pickle.dumps(transcript))  
- if outputHandle != None:
- outputHandle.close()
- progress.done() 
-
- def getNbElements(self):
- return self._nbElements
-
- def getNbElementsPerChromosome(self):
- return self._nbElementsPerChromosome
-
- def _printSorted(self, chromosome, chunk):
- chunk.sort(key = lambda transcript: (transcript.getStart(), -transcript.getEnd()))
- outputChunk = open("%s_%s_%06i.tmp" % (self._prefix, chromosome, len(self._chunks[chromosome])), "wb", 32000)
- self._chunks[chromosome].append(outputChunk)
- for transcript in chunk:
- outputChunk.write(pickle.dumps(transcript, -1))
- outputChunk.close()
-
- def _merge(self, chunks):
- values = []
- for chunk in chunks:
- chunk = open(chunk.name, "rb")
- try:
- transcript = pickle.load(chunk)
- start    = transcript.getStart()
- end    = -transcript.getEnd()
- except EOFError:
- try:
- chunk.close()
- chunks.remove(chunk)
- os.remove(chunk.name)
- except:
- pass
- else:
- heappush(values, (start, end, transcript, chunk))
- while values:
- start, end, transcript, chunk = heappop(values)
- yield transcript
- try:
- transcript = pickle.load(chunk)
- start    = transcript.getStart()
- end    = -transcript.getEnd()
- except EOFError:
- try:
- chunk.close()
- chunks.remove(chunk)
- os.remove(chunk.name)
- except:
- pass
- else:
- heappush(values, (start, end, transcript, chunk))
-
- def _batchSort(self):
- currentChunks = {}
- counts   = {}
- try:
- progress = UnlimitedProgress(1000, "Sorting file %s" % (self._parser.fileName), self._verbosity)
- for transcript in self._parser.getIterator():
- progress.inc()
- if transcript.__class__.__name__ == "Mapping":
- transcript = transcript.getTranscript()
- chromosome = transcript.getChromosome()
- if self._chromosome != None and chromosome != self._chromosome:
- continue
- if chromosome not in self._chunks:
- self._chunks[chromosome]  = []
- currentChunks[chromosome] = []
- counts[chromosome] = 0
- currentChunks[chromosome].append(transcript)
- counts[chromosome] += 1
- if counts[chromosome] == BUFFER_SIZE:
- self._printSorted(chromosome, currentChunks[chromosome])
- currentChunks[chromosome] = []
- counts[chromosome]   = 0
- self._nbElements += 1
- self._nbElementsPerChromosome[chromosome] = self._nbElementsPerChromosome.get(chromosome, 0) + 1
- for chromosome in self._chunks:
- if counts[chromosome] > 0:
- self._printSorted(chromosome, currentChunks[chromosome])
- progress.done()
- if not self._perChromosome:
- outputHandle = open(self._outputFileName, "wb")
- progress = Progress(len(self._chunks), "Writing sorted file %s" % (self._parser.fileName), self._verbosity)
- for chromosome in self._chunks:
- if self._perChromosome:
- self._outputFileNames[chromosome] = "%s_%s.pkl" % (self._outputFileName, chromosome)
- outputHandle = open(self._outputFileNames[chromosome], "wb")
- for sequence in self._merge(self._chunks[chromosome]):
- pickle.dump(sequence, outputHandle, -1)
- if self._perChromosome:
- outputHandle.close()
- progress.inc()
- if not self._perChromosome:
- outputHandle.close()
- progress.done()
- finally:
- for chunks in self._chunks.values():
- for chunk in chunks:
- try:
- chunk.close()
- os.remove(chunk.name)
- except Exception:
- pass
-
- def getOutputFileNames(self):
- return self._outputFileNames
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/FileSorter.pyc
b
Binary file SMART/Java/Python/ncList/FileSorter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/FindOverlapsWithOneInterval.py
--- a/SMART/Java/Python/ncList/FindOverlapsWithOneInterval.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,197 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-import struct
-import math
-import os
-from optparse import OptionParser
-from commons.core.writer.Gff3Writer import Gff3Writer
-from SMART.Java.Python.ncList.NCList import NCList
-from SMART.Java.Python.ncList.FileSorter import FileSorter
-from commons.core.parsing.ParserChooser import ParserChooser
-from SMART.Java.Python.ncList.NCListCursor import NCListCursor
-from SMART.Java.Python.structure.Transcript import Transcript
-
-LONGSIZE = struct.calcsize('l')
-
-class FindOverlapsWithOneInterval(object):
-
- def __init__(self, verbosity):
- self._sortedFileName   = None
- self._verbosity = verbosity
- self._overlappingNames = []
- self._nbOverlaps    = 0
- self._nbWritten = 0
-
- def __del__(self):
- if self._sortedFileName and os.path.exists(self._sortedFileName):
- os.remove(self._sortedFileName)
-
- def close(self):
- self._iWriter.close()
-
- def setOutputFileName(self, fileName):
- self._iWriter = Gff3Writer(fileName)
-
- def setFileName(self, fileName, format):
- chooser = ParserChooser(self._verbosity)
- chooser.findFormat(format)
- self._parser  = chooser.getParser(fileName)
- self._sortedFileName = "%s_sorted.pkl" % (os.path.splitext(fileName)[0])
-
- def setInterval(self, chromosome, start, end):
- self._chromosome = chromosome
- self._start   = start
- self._end = end
- self._transcript = Transcript()
- self._transcript.setChromosome(chromosome)
- self._transcript.setStart(start)
- self._transcript.setEnd(end)
- self._transcript.setDirection("+")
-
- def setTranscript(self, transcript):
- if transcript.__class__.__name__ == "Mapping":
- transcript = transcript.getTranscript()
- self._chromosome = transcript.getChromosome()
- self._start   = transcript.getStart()
- self._end = transcript.getEnd()
- self._transcript = transcript
-
- def prepareIntermediateFiles(self):
- fs = FileSorter(self._parser, self._verbosity-4)
- fs.selectChromosome(self._chromosome)
- fs.perChromosome(False)
- fs.setOutputFileName(self._sortedFileName)
- fs.sort()
- self._nbTotalLines = fs.getNbElements()
- self._nbLines   = fs.getNbElementsPerChromosome()[self._chromosome]
-
- def createNCList(self):
- if self._verbosity > 2:
- print "Creating NC-list..."
- ncList = NCList(self._verbosity)
- ncList.createIndex(True)
- ncList.setChromosome(self._chromosome)
- ncList.setFileName(self._sortedFileName)
- ncList.setNbElements(self._nbTotalLines)
- ncList.buildLists()
- self.setNCList(ncList, ncList.getIndex())
- if self._verbosity > 2:
- print " ...done (%ds)" % (endTime - startTime)
-
- def setNCList(self, ncList, index):
- self._ncList = ncList
- self._indix  = index
-
- def binarySearch(self, cursor, startL, endL):
- if startL > endL:
- return None
- middleL = (startL + endL) / 2
- cursor.moveSibling(middleL)
- overlap = self.isOverlapping(cursor)
- if overlap == 0:
- if middleL == startL:
- return cursor
- else:
- return self.binarySearch(cursor, startL, middleL)
- if overlap == -1:
- return self.binarySearch(cursor, middleL + 1, endL)
- return self.binarySearch(cursor, startL, middleL - 1)
-
- def compare(self, cursor = None):
- self._ncList.openFiles()
- if cursor == None:
- dump   = True
- cursor = NCListCursor(None, self._ncList, 0, self._verbosity)
- cursor._getSiblingData()
- cursor = self.binarySearch(cursor, cursor._firstSiblingLIndex, cursor._lastSiblingLIndex)
- if cursor == None:
- return
- while not cursor.isOut() and self.isOverlapping(cursor) == 0:
- self.write(cursor)
- newCursor = NCListCursor(cursor)
- if newCursor.hasChildren():
- newCursor.moveDown()
- self.compare(newCursor)
- if cursor.isLast():
- return
- cursor.moveRight()
-
- def isOverlapping(self, cursor):
- if self._end < cursor.getStart():
- return 1
- if self._start > cursor.getEnd():
- return -1
- return 0
-
- def write(self, cursor):
- self._nbOverlaps += 1
- refTranscript = cursor.getTranscript()
- self._overlappingNames.append(refTranscript.getName())
-
- def dumpWriter(self):
- if (not self._overlappingNames) or self._transcript == None:
- return
- self._transcript.setTagValue("nbOverlaps", len(self._overlappingNames))
- self._transcript.setTagValue("overlapsWith", "--".join(self._overlappingNames))
- self._iWriter.addTranscript(self._transcript)
- self._nbWritten    += 1
- self._overlappingNames = []
-
- def run(self):
- self.prepareIntermediateFiles()
- self.createNCList()
- self.compare()
- self.dumpWriter()
- self.close()
- if self._verbosity > 0:
- print "# refs: %d" % (self._nbLines)
- print "# written: %d (%d overlaps)" % (self._nbOverlappingQueries, self._nbOverlaps)
-
-
-if __name__ == "__main__":
- description = "FindOverlapsWithOneInterval: Finds overlaps with one query interval."
-
- parser = OptionParser(description = description)
- parser.add_option("-i", "--input",    dest="inputFileName",   action="store", type="string",  help="Input file [compulsory] [format: file in transcript format given by -f]")
- parser.add_option("-f", "--format",   dest="format",  action="store", type="string",  help="Format of previous file [compulsory] [format: transcript file format]")
- parser.add_option("-s", "--start",    dest="start",   action="store", type="int",  help="The start of the query interval [compulsory] [format: int]")
- parser.add_option("-e", "--end",  dest="end", action="store", type="int",  help="The end of the query interval [compulsory] [format: int]")
- parser.add_option("-c", "--chromosome",  dest="chromosome",  action="store", type="string",  help="Chromosome of the query interval [compulsory] [format: string]")
- parser.add_option("-o", "--output",   dest="outputFileName",  action="store", type="string",  help="Output file [compulsory] [format: output file in GFF3 format]")
- parser.add_option("-v", "--verbosity",   dest="verbosity",   action="store", default=1, type="int",  help="Trace level [format: int] [default: 1]")
- (options, args) = parser.parse_args()
-
- iFOWOI = FindOverlapsWithOneInterval(options.verbosity)
- iFOWOI.setFileName(options.inputFileName, options.format)
- iFOWOI.setInterval(options.chromosome, options.start, options.end)
- iFOWOI.setOutputFileName(options.outputFileName)
- iFOWOI.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervals.py
--- a/SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervals.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,182 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-\n-import os, struct, time\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.ncList.NCList import NCList\n-from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n-from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n-from SMART.Java.Python.ncList.FileSorter import FileSorter\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n-from SMART.Java.Python.ncList.FindOverlapsWithOneInterval import FindOverlapsWithOneInterval\n-\n-REFERENCE = 0\n-QUERY = 1\n-TYPETOSTRING = {0: "reference", 1: "query"}\n-\n-class FindOverlapsWithSeveralIntervals(object):\n-    \n-    def __init__(self, verbosity = 1):\n-        self._parsers            = {}\n-        self._outputFileName     = "outputOverlaps.gff3"\n-        self._iWriter            = None\n-        self._nbLines            = {REFERENCE: 0, QUERY: 0}\n-        self._verbosity          = verbosity\n-        self._ncLists            = {}\n-        self._sortedRefFileNames = None\n-        self._transQueryFileName = None\n-        self._cursors            = {}\n-        self._iFowoi             = FindOverlapsWithOneInterval(self._verbosity)\n-        \n-    def __del__(self):\n-        self.close()\n-        for fileName in (self._sortedRefFileNames, self._transQueryFileName):\n-            if os.path.exists(fileName):\n-                os.remove(fileName)\n-    \n-    def close(self):\n-        self._iFowoi.close()\n-        \n-    def setRefFileName(self, fileName, format):\n-        self.setFileName(fileName, format, REFERENCE)\n-        self._sortedRefFileNames = "%s_ref_sorted.pkl" % (os.path.splitext(fileName)[0])\n-        \n-    def setQueryFileName(self, fileName, format):\n-        self.setFileName(fileName, format, QUERY)\n-        self._transQueryFileName = "%s_query_trans.pkl" % (os.path.splitext(fileName)[0])\n-\n-    def setFileName(self, fileName, format, type):\n-        chooser = ParserChooser(self._verbosity)\n-        chooser.findFormat(format)\n-        self._parsers[type]   = chooser.getParser(fileName)\n-        \n-    def setOutputFileName(self, outputFileName):\n-        self._iFowoi.setOutputFileName(outputFileName)\n-\n-    def _sortRefFile(self):\n-        fs = FileSorter(self._p'..b'\n-        self._sortRefFile()\n-        self._translateQueryFile()\n-\n-    def createNCLists(self):\n-        self._ncLists = {}\n-        self._indices = {}\n-        self._cursors = {}\n-        for chromosome, fileName in self._splittedFileNames.iteritems():\n-            if self._verbosity > 3:\n-                print "  chromosome %s" % (chromosome)\n-            ncList = NCList(self._verbosity)\n-            ncList.createIndex(True)\n-            ncList.setChromosome(chromosome)\n-            ncList.setFileName(fileName)\n-            ncList.setNbElements(self._nbRefLinesPerChromosome[chromosome])\n-            ncList.buildLists()\n-            self._ncLists[chromosome] = ncList\n-            cursor = NCListCursor(None, ncList, 0, self._verbosity)\n-            self._cursors[chromosome] = cursor\n-            self._indices[chromosome] = ncList.getIndex()\n-        endTime = time.time()\n-\n-    def compare(self):\n-        progress = Progress(self._nbLines[QUERY], "Comparing data", self._verbosity-3)\n-        startTime = time.time()\n-        for cpt, queryTranscript in enumerate(self._parsers[QUERY].getIterator()):\n-            chromosome = queryTranscript.getChromosome()\n-            if chromosome not in self._ncLists:\n-                continue\n-            self._iFowoi.setNCList(self._ncLists[chromosome], self._indices[chromosome])\n-            self._iFowoi.setTranscript(queryTranscript)\n-            self._iFowoi.compare()\n-            self._iFowoi.dumpWriter()\n-            progress.inc()\n-        progress.done()\n-        endTime = time.time()\n-        self._timeSpent = endTime - startTime\n-\n-    def run(self):\n-        startTime = time.time()\n-        if self._verbosity > 2:\n-            print "Creating NC-list..."\n-        self.prepareIntermediateFiles()\n-        self.createNCLists()\n-        endTime = time.time()\n-        if self._verbosity > 2:\n-            print "    ...done (%.2gs)" % (endTime - startTime)\n-        self.compare()\n-        self.close()\n-        if self._verbosity > 0:\n-            print "# queries: %d" % (self._nbLines[QUERY])\n-            print "# refs:    %d" % (self._nbLines[REFERENCE])\n-            print "# written: %d (%d overlaps)" % (self._iFowoi._nbWritten, self._iFowoi._nbOverlaps)\n-            print "time:      %.2gs" % (self._timeSpent)\n-\n-\n-if __name__ == "__main__":\n-    description = "FindOverlaps With Several Intervals v1.0.0: Finds overlaps with several query intervals. [Category: Data comparison]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--query",       dest="inputQueryFileName", action="store",            type="string",  help="Query input file [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--queryFormat", dest="queryFormat",        action="store",            type="string",  help="format of previous file [compulsory] [format: transcript file format]")\n-    parser.add_option("-j", "--ref",         dest="inputRefFileName",   action="store",            type="string",  help="Reference input file [compulsory] [format: file in transcript format given by -g]")\n-    parser.add_option("-g", "--refFormat",   dest="refFormat",          action="store",            type="string",  help="format of previous file [compulsory] [format: transcript file format]")\n-    parser.add_option("-o", "--output",      dest="outputFileName",     action="store",            type="string",  help="Output file [compulsory] [format: output file in GFF3 format]")\n-    parser.add_option("-v", "--verbosity",   dest="verbosity",          action="store", default=1, type="int",     help="Trace level [format: int] [default: 1]")\n-    (options, args) = parser.parse_args()\n-    \n-    iFWSI = FindOverlapsWithSeveralIntervals(options.verbosity)\n-    iFWSI.setRefFileName(options.inputRefFileName, options.refFormat)\n-    iFWSI.setQueryFileName(options.inputQueryFileName, options.queryFormat)\n-    iFWSI.setOutputFileName(options.outputFileName)\n-    iFWSI.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsBin.py
--- a/SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsBin.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,204 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2011\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import random, os, os.path, time, sqlite3\n-from optparse import OptionParser\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from commons.core.writer.TranscriptWriter import TranscriptWriter\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Mapping import Mapping\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress\n-try:\n-   import cPickle as pickle\n-except:\n-   import pickle\n-\n-MINBIN = 3\n-MAXBIN = 7\n-\n-\n-def getBin(start, end):\n-\tfor i in range(MINBIN, MAXBIN + 1):\n-\t\tbinLevel = 10 ** i\n-\t\tif int(start / binLevel) == int(end / binLevel):\n-\t\t\treturn int(i * 10 ** (MAXBIN + 1) + int(start / binLevel))\n-\treturn int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n-\n-def getOverlappingBins(start, end):\n-\tarray\t= []\n-\tbigBin = int((MAXBIN + 1) * 10 ** (MAXBIN + 1))\n-\tfor i in range(MINBIN, MAXBIN + 1):\n-\t\tbinLevel = 10 ** i\n-\t\tarray.append((int(i * 10 ** (MAXBIN + 1) + int(start / binLevel)), int(i * 10 ** (MAXBIN + 1) + int(end / binLevel))))\n-\tarray.append((bigBin, bigBin))\n-\treturn array\n-\n-\n-class FindOverlapsWithSeveralIntervalsBin(object):\n-\n-\tdef __init__(self, verbosity):\n-\t\tself.verbosity\t= verbosity\n-\t\tself.randomNumber = random.randint(0, 10000)\n-\t\tself.dbName\t   = "smartdb%d" % (self.randomNumber)\n-\t\tif "SMARTTMPPATH" in os.environ:\n-\t\t\tself.dbName = os.join(os.environ["SMARTTMPPATH"], self.dbName)\n-\t\tself.connection = sqlite3.connect(self.dbName)\n-\t\tself.tableNames = {}\n-\t\tself.nbQueries  = 0\n-\t\tself.nbRefs\t = 0\n-\t\tself.nbWritten  = 0\n-\t\tself.nbOverlaps = 0\n-\t\tcursor = self.connection.cursor()\n-\t\tcursor.execute("PRAGMA journal_mode = OFF")\n-\t\tcursor.execute("PRAGMA synchronous = 0")\n-\t\tcursor.execute("PRAGMA locking_mode = EXCLUSIVE")\n-\t\tcursor.execute("PRAGMA count_change = OFF")\n-\t\tcursor.execute("PRAGMA temp_store = 2")\n-\n-\tdef __del__(self):\n-\t\tcursor = self.connection.cursor()\n-\t\tfor tableName in self.tableNames.values():\n-\t\t\tcursor.execute("DROP TABLE IF EXISTS %s" % (tableName))\n-\t\tif os.path.exists(self.dbName):\n-\t\t\tos.remove(self.dbName)\n-\t\t\n-\tdef createTable(self, chromosome):\n-\t\tcursor = self.connection.cursor()\n-\t\ttableName = "tmpTable_%s_%d" % (chromosome.replace("-", "_"), self.randomNumber)\n-\t\tcursor.execute("CREATE TABLE %s (start INT, end INT, transcript BLOB, bin INT)" % (tableName))\n-\t\tcursor.execute("CRE'..b'ursor\t = self.connection.cursor()\n-\t\t\tcursor.execute("INSERT INTO %s (start, end, transcript, bin) VALUES (?, ?, ?, ?)" % (self.tableNames[chromosome]), (start, end, sqlite3.Binary(transcriptString), bin))\n-\t\t\tself.nbRefs += 1\n-\t\tself.connection.commit()\n-\t\tendTime = time.time()\n-\t\tif self.verbosity > 2:\n-\t\t\tprint "\t...done (%.2gs)" % (endTime - startTime)\n-\n-\tdef setQueryFile(self, fileName, format):\n-\t\tchooser = ParserChooser(self.verbosity)\n-\t\tchooser.findFormat(format)\n-\t\tself.queryParser = chooser.getParser(fileName)\n-\t\tself.nbQueries = self.queryParser.getNbItems()\n-\n-\tdef setOutputFile(self, fileName):\n-\t\tself.writer = TranscriptWriter(fileName, "gff3", self.verbosity)\n-\n-\tdef compare(self):\n-\t\tprogress = Progress(self.nbQueries, "Reading queries", self.verbosity)\n-\t\tstartTime = time.time()\n-\t\tfor queryTranscript in self.queryParser.getIterator():\n-\t\t\tif queryTranscript.__class__.__name__ == "Mapping":\n-\t\t\t\tqueryTranscript = queryTranscript.getTranscript()\n-\t\t\tprogress.inc()\n-\t\t\tqueryChromosome = queryTranscript.getChromosome()\n-\t\t\tif queryChromosome not in self.tableNames:\n-\t\t\t\tcontinue\n-\t\t\tqueryStart = queryTranscript.getStart()\n-\t\t\tqueryEnd   = queryTranscript.getEnd()\n-\t\t\tbins\t   = getOverlappingBins(queryStart, queryEnd)\n-\t\t\tcommands   = []\n-\t\t\tfor bin in bins:\n-\t\t\t\tcommand = "SELECT * FROM %s WHERE bin " % (self.tableNames[queryChromosome])\n-\t\t\t\tif bin[0] == bin[1]:\n-\t\t\t\t\tcommand += "= %d" % (bin[0])\n-\t\t\t\telse:\n-\t\t\t\t\tcommand += "BETWEEN %d AND %d" % (bin[0], bin[1])\n-\t\t\t\tcommands.append(command)\n-\t\t\tcommand = " UNION ".join(commands)\n-\t\t\tcursor  = self.connection.cursor()\n-\t\t\tcursor.execute(command)\n-\t\t\toverlap = False\n-\t\t\tline\t= cursor.fetchone()\n-\t\t\twhile line:\n-\t\t\t\trefStart, refEnd, refTranscriptString, refBin = line\n-\t\t\t\tif refStart <= queryEnd and refEnd >= queryStart:\n-\t\t\t\t\trefTranscript = pickle.loads(str(refTranscriptString))\n-\t\t\t\t\tif refTranscript.overlapWith(queryTranscript):\n-\t\t\t\t\t\toverlap = True\n-\t\t\t\t\t\tself.nbOverlaps += 1\n-\t\t\t\tline = cursor.fetchone()\n-\t\t\tif overlap:\n-\t\t\t\tself.writer.addTranscript(queryTranscript)\n-\t\t\t\tself.nbWritten += 1\n-\t\tprogress.done()\n-\t\tendTime = time.time()\n-\t\tself.timeSpent = endTime - startTime\n-\n-\tdef displayResults(self):\n-\t\tprint "# queries:  %d" % (self.nbQueries)\n-\t\tprint "# refs:\t   %d" % (self.nbRefs)\n-\t\tprint "# written:  %d (%d overlaps)" % (self.nbWritten, self.nbOverlaps)\n-\t\tprint "time:\t   %.2gs" % (self.timeSpent)\n-\n-\tdef run(self):\n-\t\tself.compare()\n-\t\tself.displayResults()\n-\n-if __name__ == "__main__":\n-\t\n-\tdescription = "Find Overlaps With Several Intervals Using Bin v1.0.1: Use MySQL binning to compare intervals. [Category: Personal]"\n-\n-\tparser = OptionParser(description = description)\n-\tparser.add_option("-i", "--input1",\t  dest="inputFileName1", action="store",\t\t\ttype="string", help="query input file [compulsory] [format: file in transcript format given by -f]")\n-\tparser.add_option("-f", "--format1",\t dest="format1",\t\taction="store",\t\t\ttype="string", help="format of previous file [compulsory] [format: transcript file format]")\n-\tparser.add_option("-j", "--input2",\t  dest="inputFileName2", action="store",\t\t\ttype="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")\n-\tparser.add_option("-g", "--format2",\t dest="format2",\t\taction="store",\t\t\ttype="string", help="format of previous file [compulsory] [format: transcript file format]")\n-\tparser.add_option("-o", "--output",\t  dest="outputFileName", action="store",\t\t\ttype="string", help="output file [format: output file in GFF3 format]")\n-\tparser.add_option("-v", "--verbosity",   dest="verbosity",\t  action="store", default=1, type="int",\thelp="trace level [format: int]")\n-\t(options, args) = parser.parse_args()\n-\n-\tfowsib = FindOverlapsWithSeveralIntervalsBin(options.verbosity)\n-\tfowsib.setQueryFile(options.inputFileName1, options.format1)\n-\tfowsib.setReferenceFile(options.inputFileName2, options.format2)\n-\tfowsib.setOutputFile(options.outputFileName)\n-\tfowsib.run()\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsIndex.py
--- a/SMART/Java/Python/ncList/FindOverlapsWithSeveralIntervalsIndex.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,137 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2011
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import random, os, time, MySQLdb
-from optparse import OptionParser
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-
-class FindOverlapsWithSeveralIntervalsIndex(object):
-
-    def __init__(self, verbosity):
-        self.verbosity = verbosity
-        randomNumber   = random.randint(0, 10000)
-        self.dbName    = "smartdb"
-        if "SMARTTMPPATH" in os.environ:
-            self.dbName = os.join(os.environ["SMARTTMPPATH"], self.dbName)
-        self.db         = MySQLdb.connect(db = self.dbName)
-        self.tableName  = "table_%s" % (randomNumber)
-        self.nbQueries  = 0
-        self.nbRefs     = 0
-        self.nbOverlaps = 0
-
-    def __del__(self):
-        cursor = self.db.cursor()
-        cursor.execute("DROP TABLE IF EXISTS %s" % (self.tableName))
-        
-
-    def setReferenceFile(self, fileName, format):
-        cursor = self.db.cursor()
-        cursor.execute("CREATE TABLE %s (start INT, end INT)" % (self.tableName))
-        cursor.execute("CREATE INDEX index_%s ON %s (start, end)" % (self.tableName, self.tableName))
-        chooser = ParserChooser(self.verbosity)
-        chooser.findFormat(format)
-        parser = chooser.getParser(fileName)
-        progress = UnlimitedProgress(1000, "Reading references", self.verbosity)
-        for transcript in parser.getIterator():
-            start      = transcript.getStart()
-            end        = transcript.getEnd()
-            cursor     = self.db.cursor()
-            cursor.execute("INSERT INTO %s (start, end) VALUES (%d, %d)" % (self.tableName, start, end))
-            self.nbRefs += 1
-            progress.inc()
-        self.db.commit()
-        progress.done()
-
-    def setQueryFile(self, fileName, format):
-        chooser = ParserChooser(self.verbosity)
-        chooser.findFormat(format)
-        self.queryParser = chooser.getParser(fileName)
-        self.nbQueries = self.queryParser.getNbTranscripts()
-
-    def setOutputFile(self, fileName):
-        self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
-
-    def compare(self):
-        progress = Progress(self.nbQueries, "Reading queries", self.verbosity)
-        startTime = time.time()
-        for queryTranscript in self.queryParser.getIterator():
-            queryStart = queryTranscript.getStart()
-            queryEnd   = queryTranscript.getEnd()
-            command    = "SELECT 1 FROM %s WHERE start <= %d and end >= %d" % (self.tableName, queryEnd, queryStart)
-            cursor     = self.db.cursor()
-            cursor.execute(command)
-            overlap = False
-            line = cursor.fetchone()
-            while line:
-                overlap = True
-                line    = cursor.fetchone()
-            if overlap:
-                self.writer.addTranscript(queryTranscript)
-                self.nbOverlaps += 1
-            progress.inc()
-        progress.done()
-        endTime = time.time()
-        self.timeSpent = endTime - startTime
-
-    def displayResults(self):
-        print "# queries:  %d" % (self.nbQueries)
-        print "# refs:     %d" % (self.nbRefs)
-        print "# overlaps: %d" % (self.nbOverlaps)
-        print "time:       %.2gs" % (self.timeSpent)
-
-    def run(self):
-        self.compare()
-        self.displayResults()
-
-if __name__ == "__main__":
-    
-    description = "Find Overlaps With Several Intervals Using Indices v1.0.1: Use MySQL to compare intervals. [Category: Personal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input1",      dest="inputFileName1", action="store",            type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format1",     dest="format1",        action="store",            type="string", help="format of previous file [compulsory] [format: transcript file format]")
-    parser.add_option("-j", "--input2",      dest="inputFileName2", action="store",            type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
-    parser.add_option("-g", "--format2",     dest="format2",        action="store",            type="string", help="format of previous file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",      dest="outputFileName", action="store",            type="string", help="output file [format: output file in GFF3 format]")
-    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store", default=1, type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    fowsii = FindOverlapsWithSeveralIntervalsIndex(options.verbosity)
-    fowsii.setQueryFile(options.inputFileName1, options.format1)
-    fowsii.setReferenceFile(options.inputFileName2, options.format2)
-    fowsii.setOutputFile(options.outputFileName)
-    fowsii.run()
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/FindOverlaps_naif.py
--- a/SMART/Java/Python/ncList/FindOverlaps_naif.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,85 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-import os
-import struct
-from optparse import OptionParser
-from commons.core.parsing.GffParser import GffParser
-from commons.core.writer.Gff3Writer import Gff3Writer
-
-LONGSIZE = struct.calcsize('l')
-
-class FindOverlaps_naif(object):
-    
-    def __init__(self, inputRefGff3FileName, inputQueryGff3FileName):
-        self._inputRefGff3FileName = inputRefGff3FileName
-        self._inputQueryGff3FileName = inputQueryGff3FileName
-                
-    def close(self):
-        self._iGff3Writer.close()
-    
-    def setGff3FileName(self, fileName):
-        self._inputRefGff3FileName = fileName
-        
-    def setQueryGff3FileName(self, fileName):
-        self._inputQueryGff3FileName = fileName
-    
-    def setOutputGff3FileName(self, outputGff3FileName):
-        if outputGff3FileName != '':
-            self._outputGff3FileName = outputGff3FileName
-        self._iGff3Writer = Gff3Writer(self._outputGff3FileName)
-                
-    def run(self):
-        queryParser = GffParser(self._inputQueryGff3FileName, 0)
-        for queryTranscript in queryParser.getIterator():
-            ids       = []
-            refParser = GffParser(self._inputRefGff3FileName, 0)
-            for refTranscript in refParser.getIterator():
-                if queryTranscript.overlapWith(refTranscript):
-                    ids.append(refTranscript.getTagValue('ID'))
-            if ids:
-                queryTranscript.setTagValue("nbOverlaps", len(ids))
-                queryTranscript.setTagValue("overlapsWith", "--".join(ids))
-                self._iGff3Writer.addTranscript(queryTranscript)
-    
-if __name__ == "__main__":
-    description = "FindOverlapsWithSeveralInterval: Finds overlaps with several query intervals."
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--inputRef", dest="inputRefGff3FileName", action="store", type="string", help="Reference input file [compulsory] [format: file in gff3 format]")
-    parser.add_option("-j", "--inputQuery", dest="inputQueryGff3FileName", action="store", type="string", help="Query input file [compulsory] [format: file in gff3 format]")
-    parser.add_option("-o", "--output", dest="outputGff3FileName", action="store", type="string", help="output file [compulsory] [format: output file in gff3 format]")
-    (options, args) = parser.parse_args()
-    
-    iFON = FindOverlaps_naif(options.inputRefGff3FileName, options.inputQueryGff3FileName)
-    iFON.setOutputGff3FileName(options.outputGff3FileName)
-    iFON.run()
-    iFON.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCIndex.py
--- a/SMART/Java/Python/ncList/NCIndex.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,55 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-from SMART.Java.Python.structure.Transcript import Transcript
-
-class NCIndex(object):
-
-    def __init__(self, verbosity):
-        self._verbosity = verbosity
-        self._step      = 10000
-        self._indices   = []
-
-    def setStep(self, step):
-        self._step = step
-
-    def addTranscript(self, end, index):
-        binStart = len(self._indices)
-        binEnd   = int(end / self._step)
-        for bin in range(binStart, binEnd+1):
-            self._indices.append(index)
-
-    def getIndex(self, transcript):
-        bin = int(transcript.getStart() / self._step)
-        if bin >= len(self._indices):
-            return self._indices[-1]
-        return self._indices[bin]
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCIndex.pyc
b
Binary file SMART/Java/Python/ncList/NCIndex.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCList.py
--- a/SMART/Java/Python/ncList/NCList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,337 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os, os.path\n-import struct\n-import shelve\n-import sys\n-from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle\n-from SMART.Java.Python.ncList.NCIndex import NCIndex\n-from SMART.Java.Python.misc.Progress import Progress\n-\n-LONG_SIZE = struct.calcsize(\'l\')\n-\n-H = 0\n-L = 1\n-T = 2\n-G = 3\n-\n-H_CELL_SIZE = 2\n-L_CELL_SIZE = 5\n-T_CELL_SIZE = 6\n-\n-START   = 0\n-END\t = 1\n-ADDRESS = 2\n-LIST\t= 3\n-PARENT  = 4\n-NEW\t = 5\n-LENGTH  = 1\n-\n-def pack(input):\n-\treturn struct.pack("l", long(input))\n-def unpack(input):\n-\treturn struct.unpack("l", input)[0]\n-\n-\n-class NCList(object):\n-\n-\tdef __init__(self, verbosity):\n-\t\tself._verbosity\t\t         = verbosity\n-\t\tself._subPos\t\t\t     = 0\n-\t\tself._parentPos\t\t         = 0\n-\t\tself._nbLines\t\t\t     = 0\n-\t\tself._nbLists\t\t\t     = 0\n-\t\tself._chromosome\t\t     = None\n-\t\tself._transcriptFileName     = None\n-\t\tself._lHandle\t\t\t     = None\n-\t\tself._hHandle\t\t\t     = None\n-\t\tself._tHandle\t\t\t     = None\n-\t\tself._parser\t\t\t     = None\n-\t\tself._sizeDict\t\t         = {H: H_CELL_SIZE, L: L_CELL_SIZE, T: T_CELL_SIZE}\n-\t\tself._offsets\t\t\t     = {H: 0, L: 0, G: 0}\n-\t\tself._fileNameDict\t         = {}\n-\t\tself._handleDict\t\t     = {}\n-\t\tself._createIndex\t\t     = False\n-\t\tself._missingValues\t         = dict([table, {}] for table in self._sizeDict)\n-\t\tself._missingValues[T][LIST] = -1\n-\t\tself._missingValues[L][LIST] =  0\n-\t\tself._missingValues[T][NEW]  = -1\n-\n-\tdef __del__(self):\n-\t\tfor handle in (self._lHandle, self._hHandle):\n-\t\t\tif handle != None:\n-\t\t\t\thandle.close()\n-\n-\tdef createIndex(self, boolean):\n-\t\tself._createIndex = boolean\n-\n-\tdef setChromosome(self, chromosome):\n-\t\tself._chromosome = chromosome\n-\n-\tdef setFileName(self, fileName):\n-\t\tself._transcriptFileName = fileName\n-\t\tself._parser = NCListFileUnpickle(fileName, self._verbosity)\n-\t\tself._setFileNames(fileName)\n-\n-\tdef setNbElements(self, nbElements):\n-\t\tself._nbLines = nbElements\n-\n-\tdef setOffset(self, fileType, offset):\n-\t\tself._offsets[fileType] = offset\n-\n-\tdef _setFileNames(self, fileName):\n-\t\tif self._chromosome != None and fileName != None:\n-\t\t\tcoreName = os.path.splitext(fileName)[0]\n-\t\t\tif "SMARTTMPPATH" in os.environ:\n-\t\t\t\tcoreName = os.path.join(os.environ["SMARTTMPPATH"], coreName)\n-\t\t\tself._hFileName = "%s_H.bin" % (coreName)\n-\t\t\tself._lFileName = "%s_L.bin" % (coreName)\n-\t\t\tself._tFileName = "%s_T.bin" % (coreName)\n-\t\t\tself._fileNameDict = {H: self._hFileName, L: self._lFileName, T: self._tFileName'..b's, "Filling table T", self._verbosity-5)\n-\t\tfor i, transcript in enumerate(self._parser.getIterator()):\n-\t\t\tself._writeValue(T, i, START,   transcript.getStart())\n-\t\t\tself._writeValue(T, i, END,\t transcript.getEnd())\n-\t\t\tself._writeValue(T, i, ADDRESS, self._parser.getCurrentTranscriptAddress())\n-\t\t\tself._writeValue(T, i, PARENT,  -1)\n-\t\t\tself._writeValue(T, i, LIST,\t-1)\n-\t\t\tprogress.inc()\n-\t\tprogress.done()\n-\t\tprogress = Progress(self._nbLists, "Filling table H", self._verbosity-5)\n-\t\tfor i in xrange(self._nbLists):\n-\t\t\tself._writeValue(H, i, LENGTH, 0)\n-\t\t\tprogress.inc()\n-\t\tprogress.done()\n-\n-\tdef _labelLists(self):\n-\t\tprogress = Progress(self._nbLines, "Getting table structure", self._verbosity-5)\n-\t\tnextL = 0\n-\t\tfor i in xrange(self._nbLines):\n-\t\t\tp\t = i - 1\n-\t\t\tstart = self._readValue(T, i, START)\n-\t\t\tend   = self._readValue(T, i, END)\n-\t\t\twhile p != -1 and (start < self._readValue(T, p, START) or end > self._readValue(T, p, END)):\n-\t\t\t\tp = self._readValue(T, p, PARENT)\n-\t\t\tthisL = self._readValue(T, p, LIST)\n-\t\t\tif thisL == -1:\n-\t\t\t\t#print "entering"\n-\t\t\t\tthisL  = nextL\n-\t\t\t\tnextL += 1\n-\t\t\t\tlength = 0\n-\t\t\t\tself._writeValue(T, p, LIST, thisL)\n-\t\t\telse:\n-\t\t\t\tlength = self._readValue(H, thisL, LENGTH)\n-\t\t\tself._writeValue(T, i,\t PARENT, p)\n-\t\t\tself._writeValue(H, thisL, LENGTH, length + 1)\n-\t\t\tprogress.inc()\n-\t\tprogress.done()\n-\n-\tdef _computeSubStart(self):\n-\t\tprogress = Progress(self._nbLines, "Getting table sub-lists", self._verbosity-5)\n-\t\ttotal = 0\n-\t\tfor i in xrange(self._nbLists):\n-\t\t\tself._writeValue(H, i, START, total)\n-\t\t\ttotal += self._readValue(H, i, LENGTH)\n-\t\t\tself._writeValue(H, i, LENGTH, 0)\n-\t\t\tprogress.inc()\n-\t\tprogress.done()\n-\n-\tdef _computeAbsPosition(self):\n-\t\tprogress = Progress(self._nbLines, "Writing table", self._verbosity-5)\n-\t\tself._sizeFirstList = 0\n-\t\tfor i in xrange(self._nbLines):\n-\t\t\ts  = self._readValue(T, i,  START)\n-\t\t\te  = self._readValue(T, i,  END)\n-\t\t\ta  = self._readValue(T, i,  ADDRESS)\n-\t\t\tpt = self._readValue(T, i,  PARENT)\n-\t\t\th  = self._readValue(T, pt, LIST)\n-\t\t\tpl = self._readValue(T, pt, NEW)\n-\t\t\tnb = self._readValue(H, h,  LENGTH)\n-\t\t\tl  = self._readValue(H, h,  START) + nb\n-\t\t\tself._writeValue(T, i, NEW,\t l)\n-\t\t\tself._writeValue(L, l, START,   s)\n-\t\t\tself._writeValue(L, l, END,\t e)\n-\t\t\tself._writeValue(L, l, ADDRESS, a)\n-\t\t\tself._writeValue(L, l, LIST,\t-1)\n-\t\t\tself._writeValue(L, l, PARENT,  pl)\n-\t\t\tself._writeValue(H, h, LENGTH,  nb+1)\n-\t\t\tif nb == 0:\n-\t\t\t\t#print "adding it"\n-\t\t\t\tself._writeValue(L, pl, LIST, h)\n-\t\t\tif pl == -1:\n-\t\t\t\tself._sizeFirstList += 1\n-\t\t\t\tif self._createIndex:\n-\t\t\t\t\tself._index.addTranscript(e, l)\n-\t\t\tprogress.inc()\n-\t\tprogress.done()\n-\n-\tdef closeFiles(self):\n-\t\tfor handle in self._handleDict.values():\n-\t\t\thandle.close()\n-\t\tdel self._handleDict\n-\t\tself._lHandle = None\n-\t\tself._hHandle = None\n-\t\tself._tHandle = None\n-\t\tself._parser = None\n-\n-\tdef openFiles(self):\n-\t\tself._lHandle = open(self._fileNameDict[L], "rb")\n-\t\tself._hHandle = open(self._fileNameDict[H], "rb")\n-\t\tself._handleDict = {H: self._hHandle, L: self._lHandle}\n-\t\tself._parser  = NCListFileUnpickle(self._transcriptFileName, self._verbosity)\n-\n-\tdef _cleanFiles(self):\n-\t\tself.closeFiles()\n-\t\tos.remove(self._fileNameDict[T])\n-\n-\tdef _getPosition(self, table, line, key):\n-\t\thandle = self._handleDict[table]\n-\t\thandle.seek(self._sizeDict[table] * line * LONG_SIZE + key * LONG_SIZE)\n-\t\treturn handle\n-\n-\tdef _writeValue(self, table, line, key, value):\n-\t\t#print "writing", table, line, key, "<-", value\n-\t\tif line == -1:\n-\t\t\tself._missingValues[table][key] = value\n-\t\t\treturn\n-\t\thandle = self._getPosition(table, line, key)\n-\t\thandle.write(pack(value))\n-\n-\tdef _readValue(self, table, line, key):\n-\t\t#print "reading", table, line, key, "->",\n-\t\tif line == -1:\n-\t\t\t#print self._missingValues[table][key]\n-\t\t\treturn self._missingValues[table][key]\n-\t\thandle = self._getPosition(table, line, key)\n-\t\tr = unpack(handle.read(LONG_SIZE))\n-\t\t#print r\n-\t\treturn r\n-\n-\tdef getIndex(self):\n-\t\treturn self._index\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCList.pyc
b
Binary file SMART/Java/Python/ncList/NCList.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListCursor.py
--- a/SMART/Java/Python/ncList/NCListCursor.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,325 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os, os.path, struct\n-from commons.core.parsing.GffParser import GffParser\n-from SMART.Java.Python.misc.Progress import Progress\n-\n-\n-class Data(object):\n-    def __init__(self, hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end):\n-        self.hIndex           = hIndex\n-        self.transcript       = transcript\n-        self.firstChildLIndex = firstChildLIndex\n-        self.lastChildLIndex  = lastChildLIndex\n-        self.start            = start\n-        self.end              = end\n-\n-class NCListCursor(object):\n-\n-    def __init__(self, cursor = None, ncList = None, lIndex = 0, verbosity = 0):\n-        self._verbosity = verbosity\n-        self._mainListData = []\n-        if cursor:\n-            self.copy(cursor)\n-        else:\n-            self._ncList = ncList\n-            self.setLIndex(lIndex)\n-\n-    def setLIndex(self, lIndex):\n-        self._lIndex             = lIndex\n-        self._start              = None\n-        self._end                = None\n-        self._hIndex             = None\n-        self._gffIndex           = None\n-        self._parentGffIndex     = None\n-        self._parentLIndex       = None\n-        self._parentHIndex       = None\n-        self._parentStart        = None\n-        self._parentEnd          = None\n-        self._transcript         = None\n-        self._firstSiblingLIndex = None\n-        self._lastSiblingLIndex  = None\n-        self._firstChildLIndex   = None\n-        self._lastChildLIndex    = None\n-        self._mainListIndex      = lIndex if lIndex < self._ncList.getSizeFirstList() else None\n-\n-    def precompute(self):\n-        self._mainListIndex = 0\n-        progress = Progress(self._ncList.getSizeFirstList(), "Precomputing data", self._verbosity)\n-        for i in range(self._ncList.getSizeFirstList()):\n-            gffIndex, hIndex, parentLIndex, start, end = self._ncList.getLLineElements(i)\n-            transcript = self._ncList.getIntervalFromAdress(gffIndex)\n-            firstChildLIndex, nbChildren = self._ncList.getHLineElements(hIndex)\n-            lastChildLIndex = -1 if firstChildLIndex == -1 else firstChildLIndex + nbChildren-1\n-            self._mainListData.append(Data(hIndex, transcript, firstChildLIndex, lastChildLIndex, start, end))\n-            progress.inc()\n-        progress.done()\n-\n-    def _updateFromMainListData(self):\n-        if not self._mainListData or self._lIndex >= self._ncList.getSizeFirstList():\n-            #p'..b'-        self._gffIndex         = None\n-        self._transcript       = None\n-        self._firstChildLIndex = None\n-        self._lastChildLIndex  = None\n-\n-    def moveSibling(self, lIndex):\n-        if self._lIndex < self._ncList.getSizeFirstList() - 1:\n-            self._mainListIndex = lIndex\n-            self._updateFromMainListData()\n-        self._lIndex           = lIndex\n-        self._hIndex           = None\n-        self._start            = None\n-        self._end              = None\n-        self._gffIndex         = None\n-        self._transcript       = None\n-        self._firstChildLIndex = None\n-        self._lastChildLIndex  = None\n-\n-    def moveLastSibling(self):\n-        if self._lIndex < self._ncList.getSizeFirstList() - 1:\n-            self._mainListIndex = self._ncList.getSizeFirstList() - 1\n-            self._updateFromMainListData()\n-        if self._lastSiblingLIndex == None:\n-            self._getSiblingData()\n-        self._lIndex           = self._lastSiblingLIndex\n-        self._hIndex           = None\n-        self._start            = None\n-        self._end              = None\n-        self._gffIndex         = None\n-        self._transcript       = None\n-        self._firstChildLIndex = None\n-        self._lastChildLIndex  = None\n-\n-    def moveDown(self):\n-        if self._firstChildLIndex == None:\n-            self._getChildrenData()\n-        self._parentLIndex      = self._lIndex\n-        self._parentHIndex      = self._hIndex\n-        self._parentGffIndex    = self._gffIndex\n-        self._lIndex            = self._firstChildLIndex\n-        self._lastSiblingLIndex = self._lastChildLIndex\n-        self._hIndex            = None\n-        self._gffIndex          = None\n-        self._transcript        = None\n-        self._firstChildLIndex  = None\n-        self._lastChildLIndex   = None\n-        self._parentStart       = self._start\n-        self._parentEnd         = self._end\n-        self._start             = None\n-        self._end               = None\n-\n-    def isOut(self):\n-        return (self._lIndex == -1)\n-\n-    def isTop(self):\n-        if self._parentLIndex == None:\n-            self._getCurrentData()\n-        return (self._parentLIndex == -1)\n-\n-    def hasChildren(self):\n-        if self._hIndex == None:\n-            self._getCurrentData()\n-        if self._hIndex == -1:\n-            return False\n-        if self._firstChildLIndex == None:\n-            self._getChildrenData()\n-        return (self._firstChildLIndex != -1)\n-\n-    def copy(self, cursor):\n-        self._ncList             = cursor._ncList\n-        self._lIndex             = cursor._lIndex\n-        self._hIndex             = cursor._hIndex\n-        self._gffIndex           = cursor._gffIndex\n-        self._parentLIndex       = cursor._parentLIndex\n-        self._parentHIndex       = cursor._parentHIndex\n-        self._parentGffIndex     = cursor._parentGffIndex\n-        self._transcript         = cursor._transcript\n-        self._firstSiblingLIndex = cursor._firstSiblingLIndex\n-        self._lastSiblingLIndex  = cursor._lastSiblingLIndex\n-        self._firstChildLIndex   = cursor._firstChildLIndex\n-        self._lastChildLIndex    = cursor._lastChildLIndex\n-        self._mainListData       = cursor._mainListData\n-        self._mainListIndex      = cursor._mainListIndex\n-        self._verbosity          = cursor._verbosity\n-        self._parentStart        = cursor._parentStart\n-        self._parentEnd          = cursor._parentEnd\n-        self._start              = cursor._start\n-        self._end                = cursor._end\n-\n-    def __str__(self):\n-        return "NC-list: %s, Lindex: %s, Hindex: %s, GFFindex: %s, start: %s, end: %s, parent Lindex: %s, parent Hindex: %s, parent GFFindex: %s, transcript: %s, last sibling: %s" % (self._ncList, self._lIndex, self._hIndex, self._gffIndex, self._start, self._end, self._parentLIndex, self._parentHIndex, self._parentGffIndex, self._transcript, self._lastSiblingLIndex)\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListCursor.pyc
b
Binary file SMART/Java/Python/ncList/NCListCursor.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListFilePickle.py
--- a/SMART/Java/Python/ncList/NCListFilePickle.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,123 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-try:
-  import cPickle as pickle
-except:
-  import pickle
-from SMART.Java.Python.structure.Transcript import Transcript
-
-
-class NCListFilePickle(object):
-
- def __init__(self, fileName, verbosity = 1):
- self.fileName  = fileName
- self.handle = open(fileName, "wb")
- self.verbosity = verbosity
-
- def __del__(self):
- if self.handle != None:
- self.handle.close()
-
- def addTranscript(self, transcript):
- pickle.dump(transcript, self.handle, -1)
-
- def write(self):
- pass
-
- def close(self):
- self.__del__()
-
-
-class NCListFileUnpickle(object):
-
- def __init__(self, fileName, verbosity = 1):
- self.handle    = open(fileName, "rb")
- self.verbosity    = verbosity
- self.initAddress   = 0
- self.address    = self.initAddress
- self.nbTranscripts = None
- self.fileName    = fileName
- self.over    = False
- self.chromosome    = None
-
- def __del__(self):
- if self.handle != None:
- self.handle.close()
-
- def reset(self):
- self.handle.seek(0)
- self.initAddress = 0
-
- def setChromosome(self, chromosome):
- self.chromosome = chromosome
-
- def getNbTranscripts(self):
- if self.nbTranscripts != None:
- return self._nbTranscripts
- self.nbTranscripts = 0
- for transcript in self.getIterator():
- self_nbTranscripts += 1
- return self.nbTranscripts
-
- def gotoAddress(self, address):
- self.handle.seek(address)
- self.address = address
-
- def getNextTranscript(self):
- self.address = self.handle.tell()
- try:
- transcript = pickle.load(self.handle)
- if self.chromosome != None and transcript.getChromosome() != self.chromosome:
- self.over = True
- return False
- return transcript
- except EOFError:
- self.over = True
- return False
-
- def getIterator(self):
- self.gotoAddress(self.initAddress)
- while True:
- transcript = self.getNextTranscript()
- if not transcript:
- self.over = True
- return
- yield transcript
-
- def setInitAddress(self, address):
- self.initAddress = address
-
- def getCurrentTranscriptAddress(self):
- return self.address
-
- def isOver(self):
- return self.over
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListFilePickle.pyc
b
Binary file SMART/Java/Python/ncList/NCListFilePickle.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListHandler.py
--- a/SMART/Java/Python/ncList/NCListHandler.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,125 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-import struct
-try:
- import cPickle as pickle
-except:
- import pickle
-from SMART.Java.Python.ncList.NCList import NCList
-from SMART.Java.Python.ncList.NCIndex import NCIndex
-from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
-
-LONG_SIZE = struct.calcsize('l')
-
-INFO_PER_NCLIST = 5
-H_FILE     = 0
-L_FILE     = 1
-G_FILE     = 2
-FIRST_LIST_SIZE = 3
-INDEX     = 4
-
-H = 0
-L = 1
-T = 2
-G = 3
-
-def pack(input):
- return struct.pack("l", long(input))
-def unpack(input):
- return struct.unpack("l", input)[0]
-
-
-class NCListHandler(object):
-
- def __init__(self, verbosity):
- self._verbosity = verbosity
- self._index     = False
-
- def setFileName(self, fileName):
- self._fileName = fileName
- self._handle   = open(fileName, "rb")
-
- def loadData(self):
- self._chromosomes = pickle.load(self._handle)
- self._nbElements = 0
- self._nbElementsPerChromosome = {}
- self._ncLists = {}
- for chromosome in self._chromosomes:
- self._nbElementsPerChromosome[chromosome] = unpack(self._handle.read(LONG_SIZE))
- self._nbElements += self._nbElementsPerChromosome[chromosome]
- self._headerPos = self._handle.tell()
- for i, chromosome in enumerate(self._chromosomes):
- ncList = NCList(self._verbosity)
- ncList._hHandle = self._handle
- ncList._lHandle = self._handle
- ncList._parser  = NCListFileUnpickle(self._fileName)
- self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + H_FILE * LONG_SIZE)
- ncList.setOffset(H, unpack(self._handle.read(LONG_SIZE)))
- self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + L_FILE * LONG_SIZE)
- ncList.setOffset(L, unpack(self._handle.read(LONG_SIZE)))
- self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + G_FILE * LONG_SIZE)
- ncList.setOffset(G, unpack(self._handle.read(LONG_SIZE)))
- self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + FIRST_LIST_SIZE * LONG_SIZE)
- ncList._sizeFirstList = unpack(self._handle.read(LONG_SIZE))
- self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + INDEX * LONG_SIZE)
- indices = unpack(self._handle.read(LONG_SIZE))
- if indices != -1:
- self._handle.seek(indices)
- data = pickle.load(self._handle)
- index = NCIndex(self._verbosity)
- index._indices = data
- ncList._index = index
- self._ncLists[chromosome] = ncList
-
- def getChromosomes(self):
- return self._chromosomes
-
- def getNbElements(self):
- return self._nbElements
-
- def getNbElementsPerChromosome(self):
- return self._nbElementsPerChromosome
-
- def getNCLists(self):
- return self._ncLists
-
- def getParser(self, chromosome = None):
- parser = NCListFileUnpickle(self._fileName)
- if chromosome == None:
- parser.setInitAddress(unpack(self._handle, self._headerPos + G_FILE * LONG_SIZE))
- return parser
- i = self._chromosomes.index(chromosome)
- self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + G_FILE * LONG_SIZE)
- pos = unpack(self._handle.read(LONG_SIZE))
- parser.setInitAddress(pos)
- parser.setChromosome(chromosome)
- return parser
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListHandler.pyc
b
Binary file SMART/Java/Python/ncList/NCListHandler.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListMerger.py
--- a/SMART/Java/Python/ncList/NCListMerger.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,126 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-import struct, os, shutil
-try:
- import cPickle as pickle
-except:
- import pickle
-
-LONG_SIZE = struct.calcsize('l')
-
-INFO_PER_NCLIST = 5
-H_FILE     = 0
-L_FILE     = 1
-G_FILE     = 2
-FIRST_LIST_SIZE = 3
-INDEX     = 4
-
-def pack(input):
- return struct.pack("l", long(input))
-def unpack(input):
- return struct.unpack("l", input)[0]
-
-
-class NCListMerger(object):
-
- def __init__(self, verbosity):
- self._verbosity = verbosity
- self._index  = False
-
- def setFileName(self, fileName):
- self._handle = open(fileName, "wb")
-
- def setNCLists(self, ncLists):
- self._ncLists = ncLists
- self._chromosomes = sorted(self._ncLists.keys())
-
- def addIndex(self, boolean):
- self._index = boolean
-
- def merge(self):
- self._writeHeader()
- self._addNCLists()
- self._handle.close()
- self._removeInputFiles()
-
- def _writeHeader(self):
- pickle.dump(self._chromosomes, self._handle, -1)
- for chromosome in self._chromosomes:
- self._handle.write(pack(self._ncLists[chromosome]._nbLines))
- self._headerPos = self._handle.tell()
- for chromosome in self._chromosomes:
- for i in range(INFO_PER_NCLIST):
- self._handle.write(pack(-1))
-
- def _addInHeader(self, i, info, value = None):
- currentPos = self._handle.tell()
- if value == None:
- value = currentPos
- self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + info * LONG_SIZE)
- self._handle.write(pack(value))
- self._handle.seek(currentPos)
-
- def _addNCLists(self):
- self._inputFileNames = []
- for i, chromosome in enumerate(self._chromosomes):
- ncList = self._ncLists[chromosome]
- self._addInHeader(i, H_FILE)
- hFile = open(ncList._hFileName)
- shutil.copyfileobj(hFile, self._handle)
- hFile.close()
- self._inputFileNames.append(ncList._hFileName)
- for i, chromosome in enumerate(self._chromosomes):
- ncList = self._ncLists[chromosome]
- self._addInHeader(i, L_FILE)
- lFile = open(ncList._lFileName)
- shutil.copyfileobj(lFile, self._handle)
- lFile.close()
- self._inputFileNames.append(ncList._lFileName)
- for i, chromosome in enumerate(self._chromosomes):
- ncList = self._ncLists[chromosome]
- self._addInHeader(i, FIRST_LIST_SIZE, ncList.getSizeFirstList())
- if self._index:
- for i, chromosome in enumerate(self._chromosomes):
- ncList = self._ncLists[chromosome]
- self._addInHeader(i, INDEX)
- pickle.dump(ncList.getIndex()._indices, self._handle, -1)
- for i, chromosome in enumerate(self._chromosomes):
- ncList = self._ncLists[chromosome]
- self._addInHeader(i, G_FILE)
- tFile = open(ncList._transcriptFileName)
- shutil.copyfileobj(tFile, self._handle)
- tFile.close()
- self._inputFileNames.append(ncList._transcriptFileName)
-
- def _removeInputFiles(self):
- for fileName in self._inputFileNames:
- os.remove(fileName)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListMerger.pyc
b
Binary file SMART/Java/Python/ncList/NCListMerger.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListParser.py
--- a/SMART/Java/Python/ncList/NCListParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,74 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2012
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-import random, os, time
-from optparse import OptionParser
-from commons.core.parsing.ParserChooser import ParserChooser
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.ncList.NCList import NCList
-from SMART.Java.Python.ncList.NCListCursor import NCListCursor
-try:
-   import cPickle as pickle
-except:
-   import pickle
-
-class NCListParser(object):
-    
-    def __init__(self, fileName, verbosity = 1):
-        self._fileName                = fileName
-        self._ncLists                 = {}
-        self._sortedFileNames         = {}
-        self._nbElements              = 0
-        self._nbElementsPerChromosome = {}
-        self._verbosity               = verbosity
-        
-    def parse(self):
-        handle                        = open(self._fileName)
-        self._sortedFileNames         = pickle.load(handle)
-        self._nbElements              = pickle.load(handle)
-        self._nbElementsPerChromosome = pickle.load(handle)
-        self._ncLists                 = pickle.load(handle)
-        for ncList in self._ncLists.values():
-            ncList._reopenFiles()
-        handle.close()
-
-    def getSortedFileNames(self):
-        return self._sortedFileNames
-
-    def getNbElements(self):
-        return self._nbElements
-
-    def getNbElementsPerChromosome(self):
-        return self._nbElementsPerChromosome
-
-    def getNCLists(self):
-        return self._ncLists
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/NCListParser.pyc
b
Binary file SMART/Java/Python/ncList/NCListParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/__init__.pyc
b
Binary file SMART/Java/Python/ncList/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/MockFindOverlapsWithSeveralIntervals.py
--- a/SMART/Java/Python/ncList/test/MockFindOverlapsWithSeveralIntervals.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,197 +0,0 @@\n-from SMART.Java.Python.misc import Utils\n-\n-class MockFindOverlapsWithOneInterval (object) :\n-  def write(self, inFileName):\n-    Utils.writeFile(inFileName, "chr1\\ttest\\ttest1.1\\t0\\t1000\\t.\\t+\\t.\\tID=test1.1;Name=test1.1\\n")\n-\n-class MockFindOverlapsWithServeralIntervals_case1 (object) :\n-\tdef write(self,inFileName):\n-\t\tf = open(inFileName, "w")\n-\t\tf.write("chr1\\ttest\\ttest1.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test1.1;Name=test1.1\\n")\n-\t\tf.write("chr1\\ttest\\ttest1.2\\t50\\t350\\t301\\t+\\t.\\tID=test1.2;Name=test1.2\\n")\n-\t\tf.write("chr1\\ttest\\ttest1.3\\t100\\t600\\t501\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n-\t\tf.write("chr1\\ttest\\ttest1.4\\t200\\t450\\t251\\t+\\t.\\tID=test1.4;Name=test1.4\\n")\n-\t\tf.write("chr1\\ttest\\ttest1.5\\t700\\t950\\t251\\t+\\t.\\tID=test1.5;Name=test1.5\\n")\n-\t\tf.write("chr1\\ttest\\ttest1.6\\t800\\t900\\t101\\t+\\t.\\tID=test1.6;Name=test1.6\\n")\n-\t\tf.write("chr1\\ttest\\ttest1.7\\t1200\\t1300\\t101\\t+\\t.\\tID=test1.7;Name=test1.7\\n")\n-\t\tf.close()\n-\n-class MockFindOverlapsWithServeralIntervals_case2 (object) :\n-\tdef write(self,inFileName):\n-\t\tf = open(inFileName,\'w\')\n-\t\tf.write("chr1\\ttest\\ttest2.1\\t0\\t500\\t501\\t+\\t.\\tID=test2.1;Name=test2.1\\n")\n-\t\tf.write("chr1\\ttest\\ttest2.2\\t50\\t450\\t401\\t+\\t.\\tID=test2.2;Name=test2.2\\n")\n-\t\tf.write("chr1\\ttest\\ttest2.3\\t100\\t400\\t301\\t+\\t.\\tID=test2.3;Name=test2.3\\n")\n-\t\tf.write("chr1\\ttest\\ttest2.4\\t100\\t200\\t101\\t+\\t.\\tID=test2.4;Name=test2.4\\n")\n-\t\tf.write("chr1\\ttest\\ttest2.5\\t900\\t1200\\t301\\t+\\t.\\tID=test2.5;Name=test2.5\\n")\n-\t\tf.close()\n-\n-class MockFindOverlapsWithServeralIntervals_case3 (object) :\n-\tdef write(self,inFileName):\n-\t\tf = open(inFileName,\'w\')\n-\t\tf.write("chr1\\ttest\\ttest3.1\\t0\\t500\\t501\\t+\\t.\\tID=test3.1;Name=test3.1\\n")\n-\t\tf.write("chr1\\ttest\\ttest3.2\\t50\\t450\\t401\\t+\\t.\\tID=test3.2;Name=test3.2\\n")\n-\t\tf.write("chr1\\ttest\\ttest3.3\\t100\\t400\\t301\\t+\\t.\\tID=test3.3;Name=test3.3\\n")\n-\t\tf.write("chr1\\ttest\\ttest3.4\\t100\\t200\\t101\\t+\\t.\\tID=test3.4;Name=test3.4\\n")\n-\t\tf.write("chr1\\ttest\\ttest3.5\\t300\\t400\\t101\\t+\\t.\\tID=test3.5;Name=test3.5\\n")\n-\t\tf.write("chr1\\ttest\\ttest3.6\\t800\\t1000\\t201\\t+\\t.\\tID=test3.6;Name=test3.6\\n")\n-\t\tf.close()\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_case4_5 (object) :\n-\tdef write(self,inFileName):\n-\t\tf = open(inFileName,\'w\')\n-\t\tf.write("chr1\\ttest\\ttest4.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test4.1;Name=test4.1\\n")\n-\t\tf.write("chr1\\ttest\\ttest4.2\\t200\\t800\\t601\\t+\\t.\\tID=test4.2;Name=test4.2\\n")\n-\t\tf.write("chr1\\ttest\\ttest4.3\\t400\\t600\\t201\\t+\\t.\\tID=test4.3;Name=test4.3\\n")\n-\t\tf.close()\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_case6_7 (object) :\n-\tdef write(self,inFileName):\n-\t\tf = open(inFileName,\'w\')\n-\t\tf.write("chr1\\ttest\\ttest6.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test6.1;Name=test6.1\\n")\n-\t\tf.write("chr1\\ttest\\ttest6.2\\t100\\t300\\t201\\t+\\t.\\tID=test6.2;Name=test6.2\\n")\n-\t\tf.write("chr1\\ttest\\ttest6.3\\t400\\t500\\t101\\t+\\t.\\tID=test6.3;Name=test6.3\\n")\n-\t\tf.write("chr1\\ttest\\ttest6.4\\t510\\t520\\t11\\t+\\t.\\tID=test6.4;Name=test6.4\\n")\n-\t\tf.write("chr1\\ttest\\ttest6.5\\t850\\t950\\t001\\t+\\t.\\tID=test6.5;Name=test6.5\\n")\n-\t\tf.close()\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_case8 (object) :\n-\tdef write(self,inFileName):\n-\t\tf = open(inFileName,\'w\')\n-\t\tf.write("chr1\\ttest\\ttest8.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test8.1;Name=test8.1\\n")\n-\t\tf.write("chr1\\ttest\\ttest8.2\\t100\\t200\\t101\\t+\\t.\\tID=test8.2;Name=test8.2\\n")\n-\t\tf.write("chr1\\ttest\\ttest8.3\\t300\\t400\\t101\\t+\\t.\\tID=test8.3;Name=test8.3\\n")\n-\t\tf.close()\t\t\n-\n-class MockFindOverlapsWithServeralIntervals_case9 (object) :\n-\tdef write(self,inFileName):\n-\t\tf = open(inFileName,\'w\')\n-\t\tf.write("chr1\\ttest\\ttest9.1\\t0\\t1000\\t1001\\t+\\t.\\tID=test9.1;Name=test9.1\\n")\n-\t\tf.write("chr1\\ttest\\ttest9.2\\t600\\t700\\t101\\t+\\t.\\tID=test9.2;Name=test9.2\\n")\n-\t\tf.write("chr1\\ttest\\ttest9.3\\t800\\t1200\\t401\\t+\\t.\\tID=test9.3;Name=test9.3\\n")\n-\t\tf.close()\n-\n-class MockFindOverlapsWithServeralIntervals_case10 (object) :\n-\tdef write(self,inFileName):\n-\t\tf = open(inFileName,\'w\')\n-\t\tf.write("chr1\\ttest\\ttest10.1\\t0\\t1000\\t1001\\t+'..b'ose()\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case2 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery2.1\\t150\\t300\\t151\\t+\\t.\\tID=query_1;Name=query2.1\\n")\n-\t\tf.write("chr1\\tquery\\tquery2.2\\t300\\t450\\t151\\t+\\t.\\tID=query_2;Name=query2.2\\n")\n-\t\tf.write("chr1\\tquery\\tquery2.3\\t480\\t800\\t321\\t+\\t.\\tID=query_3;Name=query2.3\\n")\n-\t\tf.write("chr1\\tquery\\tquery2.4\\t560\\t800\\t241\\t+\\t.\\tID=query_4;Name=query2.4\\n")\n-\t\tf.write("chr1\\tquery\\tquery2.5\\t850\\t1000\\t151\\t+\\t.\\tID=query_5;Name=query2.5\\n")\n-\t\tf.close()\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case3 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery3.1\\t150\\t250\\t101\\t+\\t.\\tID=query_1;Name=query3.1\\n")\n-\t\tf.write("chr1\\tquery\\tquery3.2\\t380\\t400\\t21\\t+\\t.\\tID=query_2;Name=query3.2\\n")\n-\t\tf.write("chr1\\tquery\\tquery3.3\\t480\\t520\\t41\\t+\\t.\\tID=query_3;Name=query3.3\\n")\n-\t\tf.write("chr1\\tquery\\tquery3.4\\t510\\t700\\t191\\t+\\t.\\tID=query_4;Name=query3.4\\n")\n-\t\tf.write("chr1\\tquery\\tquery3.5\\t900\\t950\\t41\\t+\\t.\\tID=query_5;Name=query3.5\\n")\n-\t\tf.close()\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case4 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery4.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=query4.1\\n")\n-\t\tf.write("chr1\\tquery\\tquery4.2\\t450\\t600\\t151\\t+\\t.\\tID=query_2;Name=query4.2\\n")\n-\t\tf.write("chr1\\tquery\\tquery4.3\\t700\\t800\\t101\\t+\\t.\\tID=query_3;Name=query4.3\\n")\n-\t\tf.close()\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case5 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery5.1\\t850\\t950\\t101\\t+\\t.\\tID=query_1;Name=query5.1\\n")\n-\t\tf.close()\t\t\t\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case6 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery6.1\\t200\\t300\\t101\\t+\\t.\\tID=query_1;Name=query6.1\\n")\n-\t\tf.write("chr1\\tquery\\tquery6.2\\t800\\t900\\t101\\t+\\t.\\tID=query_2;Name=query6.2\\n")\n-\t\tf.close()\t\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case7 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery6.1\\t530\\t550\\t21\\t+\\t.\\tID=query_1;Name=query6.1\\n")\n-\t\tf.write("chr1\\tquery\\tquery6.2\\t600\\t700\\t101\\t+\\t.\\tID=query_2;Name=query6.2\\n")\n-\t\tf.write("chr1\\tquery\\tquery6.3\\t650\\t900\\t251\\t+\\t.\\tID=query_3;Name=query6.3\\n")\n-\t\tf.close()\t\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case8 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery7.1\\t500\\t600\\t101\\t+\\t.\\tID=query_1;Name=query7.1\\n")\n-\t\tf.write("chr1\\tquery\\tquery7.2\\t700\\t800\\t101\\t+\\t.\\tID=query_2;Name=query7.2\\n")\n-\t\tf.write("chr1\\tquery\\tquery7.3\\t900\\t1100\\t201\\t+\\t.\\tID=query_3;Name=query7.3\\n")\n-\t\tf.write("chr1\\tquery\\tquery7.4\\t1200\\t1300\\t101\\t+\\t.\\tID=query_4;Name=query7.4\\n")\n-\t\tf.close()\t\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case9 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery8.1\\t400\\t400\\t101\\t+\\t.\\tID=query_1;Name=query8.1\\n")\n-\t\tf.write("chr1\\tquery\\tquery8.2\\t550\\t650\\t101\\t+\\t.\\tID=query_2;Name=query8.2\\n")\n-\t\tf.close()\t\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case10 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery10.1\\t700\\t800\\t101\\t+\\t.\\tID=query_1;Name=query10.1\\n")\n-\t\tf.write("chr1\\tquery\\tquery10.2\\t900\\t1000\\t101\\t+\\t.\\tID=query_2;Name=query10.2\\n")\n-\t\tf.write("chr1\\tquery\\tquery10.3\\t1100\\t1300\\t201\\t+\\t.\\tID=query_3;Name=query10.3\\n")\n-\t\tf.close()\t\t\t\n-\t\t\n-class MockFindOverlapsWithServeralIntervals_query_case11 (object):\n-\tdef write(self, fileName):\n-\t\tf = open(fileName, \'w\')\n-\t\tf.write("chr1\\tquery\\tquery11.1\\t420\\t480\\t61\\t+\\t.\\tID=query_1;Name=query11.1\\n")\n-\t\tf.write("chr1\\tquery\\tquery11.2\\t450\\t715\\t266\\t+\\t.\\tID=query_2;Name=query11.2\\n")\n-\t\tf.close()\t\t\n-\t\t\n-\t\t\t\t\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py
--- a/SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,118 +0,0 @@
-import os
-import random
-from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.structure.Transcript import Transcript
-from commons.core.parsing.GffParser import GffParser
-
-class MockFindOverlaps_randomExample(object):
-
-    def __init__(self, fileName, ID, numberOfReads, chromSize):
-        self._fileName = fileName
-        self._ID = ID
-        self._numberOfReads = numberOfReads
-        self._chromSize = chromSize
-
-    def write(self):
-        iMFO_RE = MockFindOverlaps_randomExample_NonOrder(self._fileName, self._ID, self._numberOfReads, self._chromSize)
-        iMFO_RE.write()
-        cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._fileName, self._fileName)
-        os.system(cmd)
-
-class MockFindOverlaps_randomExample_NonOrder(object):
-
- def __init__(self, fileName, ID, numberOfReads, chromSize):
- self._fileName = fileName
- self._ID = ID
- self._numberOfReads = numberOfReads
- self._chromSize = chromSize
-
- def write(self):
- iRRG = RandomRegionsGenerator(2)
- iRRG.setMinSize(36)
- iRRG.setMaxSize(100)
- iRRG.setGenomeSize(self._chromSize)
- iRRG.setChromosomeName("chr1")
- iRRG.setStrands(False)
- iRRG.setNumber(self._numberOfReads)
- iRRG.setOutputFile(self._fileName)
- iRRG.run()
-
-
-class MockFindOverlaps_randomExample_MOverlaps(object):
-
- def __init__(self, refFileName, queryFileName, overlapNumber, numberOfReads, chromSize):
- self._refFileName = refFileName
- self._queryFileName = queryFileName
- self._overlapNumber = overlapNumber
- self._numberOfReads = numberOfReads
- self._chromSize = chromSize
-
- def createRandomExample(self):
- id = 'reference'
- iRSS = MockFindOverlaps_randomExample(self._refFileName, id, self._numberOfReads, self._chromSize)
- iRSS.write()
- self.queryWriter = TranscriptWriter(self._queryFileName , 'gff3')
- totalOverlap = 0
- while totalOverlap != self._overlapNumber:
- totalOverlap = 0
- i = 0
- while i < 10:
- query = self.createRandomTranscript(i, id)
- overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
- while overlapNumber > self._overlapNumber:
- query = self.createRandomTranscript(i, id)
- overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
- totalOverlap = overlapNumber
- i += 1
- self.queryWriter.addTranscript(query)
- self.queryWriter.write()
- self.queryWriter.close()
-# os.rename("%s.gff3" % (self._queryFileName), self._queryFileName)
-
- cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._refFileName, self._refFileName)
- os.system(cmd)
- cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._queryFileName, self._queryFileName)
- os.system(cmd)
-
- def createRandomTranscript(self, cpt, id):
- iRRG = RandomRegionsGenerator(2)
- strand = '+'
- chromosome = 'chr1'
- size = random.randint(36, 100)
- iRRG.setSize(size)
- start = random.randint(0, 1000-size)
- transcript = iRRG.createTranscript(chromosome, start, size, strand, cpt)
- IDdetail = '%s_%d'%(id,cpt)
- transcript.setTagValue('ID', IDdetail)
- transcript.setName(IDdetail)  
- return transcript
-
- def isOverlap(self, query, ref):
- if (query.getStart() <= ref.getEnd() and query.getEnd() >= ref.getStart()):
- return True 
- else:
- return False
-
- def getIntervalFromAdress(self, fileName, address):
- iParser = GffParser(fileName)
- iParser.gotoAddress(int(address))
- iTranscrit = iParser.getNextTranscript()
- iParser.close()
- return iTranscrit
-
- def getOverlapNumber(self, query, refFileName, totalOverlap):
- count = totalOverlap
- fRef = open(refFileName, 'r')
- address = fRef.tell()
- line = fRef.readline()
- while line != '':
- ref = self.getIntervalFromAdress(refFileName, address)
- if self.isOverlap(query, ref):
- count += 1
- address = fRef.tell()
- line = fRef.readline()
- fRef.close()
- return count
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_F_FileSorter.py
--- a/SMART/Java/Python/ncList/test/Test_F_FileSorter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,84 +0,0 @@
-import os
-import unittest
-import struct
-from SMART.Java.Python.misc import Utils
-from SMART.Java.Python.ncList.FileSorter import FileSorter
-from SMART.Java.Python.structure.Transcript import Transcript
-from commons.core.writer.Gff3Writer import Gff3Writer
-from commons.core.parsing.GffParser import GffParser
-from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
-
-
-class Test_F_FileSorter(unittest.TestCase):
-
-    def setUp(self):
-        self._inputGff3FileName = 'inputFile.gff3'
-        self._outputFileName    = 'outputFile.pkl'
-        
-    def tearDown(self):
-        return
-        for fileName in (self._inputGff3FileName, self._sortedFileName, self._expHFileName, self._expLFileName, self._obsHFileName, self._obsLFileName, self._addressFileName):
-            if os.path.exists(fileName):
-                os.remove(fileName)
-        
-    def test_unique(self):
-        transcript = self._createTranscript("chr1", 100, 200, "test1.1")
-        parser     = self._writeAndSortAndParse([transcript])
-        self.assertEquals(parser.getNbTranscripts(), 1)
-        for transcript in parser.getIterator():
-            self._checkTranscript(transcript, "chr1", 100, 200, "test1.1")
-            
-    def test_simple(self):
-        transcript1 = self._createTranscript("chr1", 300, 400, "test1.1")
-        transcript2 = self._createTranscript("chr1", 100, 200, "test1.2")
-        parser = self._writeAndSortAndParse([transcript1, transcript2])
-        self.assertEquals(parser.getNbTranscripts(), 2)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            if cpt == 0:
-                self._checkTranscript(transcript, "chr1", 100, 200, "test1.2")
-            else:
-                self._checkTranscript(transcript, "chr1", 300, 400, "test1.1")
-
-    def test_same_start(self):
-        transcript1 = self._createTranscript("chr1", 100, 200, "test1.1")
-        transcript2 = self._createTranscript("chr1", 100, 300, "test1.2")
-        parser = self._writeAndSortAndParse([transcript1, transcript2])
-        self.assertEquals(parser.getNbTranscripts(), 2)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            if cpt == 0:
-                self._checkTranscript(transcript, "chr1", 100, 300, "test1.2")
-            else:
-                self._checkTranscript(transcript, "chr1", 100, 200, "test1.1")
-
-    def _writeAndSortAndParse(self, transcripts):
-        writer = Gff3Writer(self._inputGff3FileName, 0)
-        for transcript in transcripts:
-            writer.addTranscript(transcript)
-        writer.close()
-        parser = GffParser(self._inputGff3FileName, 0)
-        fs = FileSorter(parser, 0)
-        fs.setOutputFileName(self._outputFileName)
-        fs.sort()
-        parser = NCListFileUnpickle(self._outputFileName, 0)
-        return parser
-
-    def _createTranscript(self, chromosome, start, end, name):
-        transcript = Transcript()
-        transcript.setChromosome(chromosome)
-        transcript.setStart(start)
-        transcript.setEnd(end)
-        transcript.setDirection("+")
-        transcript.setName(name)
-        return transcript
-
-    def _checkTranscript(self, transcript, chromosome, start, end, name):
-        self.assertEquals(transcript.getChromosome(), chromosome)
-        self.assertEquals(transcript.getStart(),      start)
-        self.assertEquals(transcript.getEnd(),        end)
-        self.assertEquals(transcript.getDirection(),  1)
-        self.assertEquals(transcript.getName(),       name)
-        
-            
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithOneInterval.py
--- a/SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithOneInterval.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,121 +0,0 @@
-import unittest
-import struct
-import os
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.ncList.FindOverlapsWithOneInterval import FindOverlapsWithOneInterval
-from SMART.Java.Python.misc import Utils
-
-class Test_F_FindOverlapsWithOneInterval(unittest.TestCase):
-
-    def setUp(self):
-        self._inputGff3FileName = 'sortedFile.gff3'
-        self._writeGFF3File(self._inputGff3FileName)
-        self._obsFileName = "overlap.gff3"
-        self._expFileName = "expFile.gff3"
-        self._iFOWOI = FindOverlapsWithOneInterval(0)
-        self._iFOWOI.setFileName(self._inputGff3FileName, "gff3")
-        self._iFOWOI.setOutputFileName(self._obsFileName)
-        
-    def tearDown(self):
-        os.remove(self._inputGff3FileName)
-        os.remove(self._obsFileName)
-        os.remove(self._expFileName)
-        
-    def test_run_general(self):
-        self._iFOWOI.setInterval("chr1", 500, 850)
-        self._iFOWOI.run()
-        self._writeExpGFF3File_general(self._expFileName)
-        self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
-        
-#   def test_run_general_asScript(self):
-#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 500 -e 850 -v 0' % (self._inputGff3FileName, self._obsFileName)
-#       os.system(cmd)
-#       self._writeExpGFF3File_general(self._expFileName)
-#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
-#       
-#   def test_run_one_overlap(self):
-#       self._iFOWOI.setInterval("chr1", 1250, 1450)
-#       self._iFOWOI.run()
-#       self._writeExpGFF3File_one_overlap(self._expFileName)
-#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
-#       
-#   def test_run_one_overlap_asScript(self):
-#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 1250 -e 1450 -v 0' % (self._inputGff3FileName, self._obsFileName)
-#       os.system(cmd)
-#       self._writeExpGFF3File_one_overlap(self._expFileName) 
-#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))       
-#       
-#   def test_run_all_overlap(self):
-#       self._iFOWOI.setInterval("chr1", 300, 1250)
-#       self._iFOWOI.run()
-#       self._writeExpGff3File_all_overlap(self._expFileName)
-#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
-#       
-#   def test_run_all_overlap_asScript(self):
-#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 300 -e 1250 -v 0' % (self._inputGff3FileName, self._obsFileName)
-#       os.system(cmd)        
-#       self._writeExpGff3File_all_overlap(self._expFileName)
-#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
-#               
-#   def test_run_no_overlap_right(self):
-#       self._iFOWOI.setInterval("chr1", 1400, 1500)
-#       self._iFOWOI.run()
-#       f = open(self._expFileName, "w")
-#       f.close()
-#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
-#       
-#   def test_run_no_overlap_right_asScript(self):
-#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 1400 -e 1500 -v 0' % (self._inputGff3FileName, self._obsFileName)
-#       os.system(cmd)           
-#       f = open(self._expFileName, "w")
-#       f.close()
-#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
-#               
-#   def test_run_no_overlap_left(self):
-#       self._iFOWOI.setInterval("chr1", 0, 8)
-#       self._iFOWOI.run()
-#       f = open(self._expFileName, "w")
-#       f.close()
-#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))
-#       
-#   def test_run_no_overlap_left_asScript(self):
-#       cmd = 'python ../FindOverlapsWithOneInterval.py -i %s -f gff3 -o %s -c chr1 -s 0 -e 8 -v 0' % (self._inputGff3FileName, self._obsFileName)
-#       os.system(cmd)  
-#       f = open(self._expFileName, "w")
-#       f.close()
-#       self.assertTrue(Utils.diff(self._expFileName, self._obsFileName))                
-
-    def _writeExpGff3File_all_overlap(self, fileName):  
-        f = open(fileName, 'w')
-        f.write("chr1\tS-MART\ttest2.1\t9\t1000\t1001\t+\t.\tID=test2.1;Name=test2.1\n")
-        f.write("chr1\tS-MART\ttest2.2\t50\t350\t301\t+\t.\tID=test2.2;Name=test2.2\n")
-        f.write("chr1\tS-MART\ttest2.3\t100\t600\t501\t+\t.\tID=test2.3;Name=test2.3\n")
-        f.write("chr1\tS-MART\ttest2.4\t200\t450\t251\t+\t.\tID=test2.4;Name=test2.4\n")
-        f.write("chr1\tS-MART\ttest2.5\t700\t950\t251\t+\t.\tID=test2.5;Name=test2.5\n")
-        f.write("chr1\tS-MART\ttest2.6\t800\t900\t101\t+\t.\tID=test2.6;Name=test2.6\n")
-        f.write("chr1\tS-MART\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
-        f.close()
-        
-    def _writeExpGFF3File_one_overlap(self, fileName):
-        f = open(fileName, "w")
-        f.write("chr1\tS-MART\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
-        f.close()    
-        
-    def _writeExpGFF3File_general(self, fileName):
-        f = open(fileName, "w")
-        f.write("chr1\tS-MART\ttranscript\t500\t850\t.\t+\t.\tnbOverlaps=4;overlapsWith=test2.1--test2.3--test2.5--test2.6\n")
-        f.close()
-        
-    def _writeGFF3File(self, fileName):
-        f = open(fileName, "w")
-        f.write("chr1\ttest\ttest2.1\t9\t1000\t1001\t+\t.\tID=test2.1;Name=test2.1\n")
-        f.write("chr1\ttest\ttest2.2\t50\t350\t301\t+\t.\tID=test2.2;Name=test2.2\n")
-        f.write("chr1\ttest\ttest2.3\t100\t600\t501\t+\t.\tID=test2.3;Name=test2.3\n")
-        f.write("chr1\ttest\ttest2.4\t200\t450\t251\t+\t.\tID=test2.4;Name=test2.4\n")
-        f.write("chr1\ttest\ttest2.5\t700\t950\t251\t+\t.\tID=test2.5;Name=test2.5\n")
-        f.write("chr1\ttest\ttest2.6\t800\t900\t101\t+\t.\tID=test2.6;Name=test2.6\n")
-        f.write("chr1\ttest\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
-        f.close()
-
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithSeveralIntervals.py
--- a/SMART/Java/Python/ncList/test/Test_F_FindOverlapsWithSeveralIntervals.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,173 +0,0 @@\n-import unittest\n-import os, os.path\n-from SMART.Java.Python.ncList.FindOverlapsWithSeveralIntervals import FindOverlapsWithSeveralIntervals\n-from SMART.Java.Python.misc import Utils\n-\n-class Test_F_FindOverlapsWithSeveralIntervals(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._inputRefGff3FileName   = \'sorted_Ref.gff3\'\n-        self._inputQueryGff3FileName = \'sorted_Query.gff3\'\n-        self._outputGff3FileName     = \'output.gff3\'\n-        self._expOutputFileName      = \'expOutGff3.gff3\'\n-        self._writeQueryGff3File(self._inputQueryGff3FileName)\n-        self._writeGFF3File(self._inputRefGff3FileName)\n-        self._iFOWSI = FindOverlapsWithSeveralIntervals(0)\n-        self._iFOWSI.setRefFileName(self._inputRefGff3FileName, "gff3")\n-        self._iFOWSI.setQueryFileName(self._inputQueryGff3FileName, "gff3")\n-        self._iFOWSI.setOutputFileName(self._outputGff3FileName)\n-        self._iFOWSI.prepareIntermediateFiles()\n-        self._iFOWSI.createNCLists()\n-        \n-    def tearDown(self):\n-        for fileName in (self._inputRefGff3FileName, self._inputQueryGff3FileName, self._outputGff3FileName, self._expOutputFileName):\n-            if os.path.exists(fileName):\n-                os.remove(fileName)\n-        \n-    def test_run_general(self):\n-        self._writeQueryGff3File(self._inputQueryGff3FileName)\n-        self._writeGFF3File(self._inputRefGff3FileName)\n-        self._iFOWSI = FindOverlapsWithSeveralIntervals(0)\n-        self._iFOWSI.setRefFileName(self._inputRefGff3FileName, "gff3")\n-        self._iFOWSI.setQueryFileName(self._inputQueryGff3FileName, "gff3")\n-        self._iFOWSI.setOutputFileName(self._outputGff3FileName)\n-        self._iFOWSI.prepareIntermediateFiles()\n-        self._iFOWSI.createNCLists()\n-        self._iFOWSI.compare()\n-        self._iFOWSI.close()\n-        self._writeExpOutFile_general(self._expOutputFileName)\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))  \n-        \n-    def test_run_general_asScript(self):\n-        cmd = \'python ../FindOverlapsWithSeveralIntervals.py -i %s -f gff3 -j %s -g gff3 -o %s -v 0\' % (self._inputQueryGff3FileName, self._inputRefGff3FileName, self._outputGff3FileName)      \n-        os.system(cmd)\n-        self._writeExpOutFile_general(self._expOutputFileName)\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))         \n-\n-      \n-    def test_run_overlap_special_case(self):\n-        inputQueryGff3FileName = \'query2.gff3\'\n-        self._writeQueryGff3File2(inputQueryGff3FileName)\n-        self._iFOWSI = FindOverlapsWithSeveralIntervals(0)\n-        self._iFOWSI.setRefFileName(self._inputRefGff3FileName, "gff3")\n-        self._iFOWSI.setQueryFileName(inputQueryGff3FileName, "gff3")\n-        self._iFOWSI.setOutputFileName(self._outputGff3FileName)\n-        self._iFOWSI.prepareIntermediateFiles()\n-        self._iFOWSI.createNCLists()\n-        self._iFOWSI.compare()\n-        self._iFOWSI.close()\n-        self._writeExpOutFile_special_case(self._expOutputFileName)        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))   \n-        os.remove(inputQueryGff3FileName) \n-        \n-    def test_run_overlap_special_case_asScript(self):\n-        inputQueryGff3FileName = \'query2.gff3\'\n-        self._writeQueryGff3File2(inputQueryGff3FileName)\n-        cmd = \'python ../FindOverlapsWithSeveralIntervals.py -i %s -f gff3 -j %s -g gff3 -o %s -v 0\' % (inputQueryGff3FileName, self._inputRefGff3FileName, self._outputGff3FileName)      \n-        os.system(cmd) \n-        self._writeExpOutFile_special_case(self._expOutputFileName)        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))         \n-        os.remove(inputQueryGff3FileName) \n-                \n-    def _writeExpOutFile_special_case(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\tS-MART\ttest2\t1250\t1300\t781\t+\t.\tnbOverl'..b'r1\\tS-MART\\ttest2.4\\t200\\t450\\t251\\t+\\t.\\tName=test2.4;OverlapWith=query_2;score=251;feature=test2.4;ID=test2.4\\n")\n-        f.write("chr1\\tS-MART\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tName=test2.5;OverlapWith=query_2;score=251;feature=test2.5;ID=test2.5\\n")\n-        f.write("chr1\\tS-MART\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tName=test2.6;OverlapWith=query_2;score=101;feature=test2.6;ID=test2.6\\n")\n-        f.close()           \n-\n-    def _writeExpOutFile_overlap_to_children(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tS-MART\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tName=test2.1;OverlapWith=query_3;score=1001;feature=test2.1;ID=test2.1\\n") \n-        f.write("chr1\\tS-MART\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tName=test2.3;OverlapWith=query_3;score=501;feature=test2.3;ID=test2.3\\n") \n-        f.write("chr1\\tS-MART\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tName=test2.5;OverlapWith=query_3;score=251;feature=test2.5;ID=test2.5\\n") \n-        f.write("chr1\\tS-MART\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tName=test2.6;OverlapWith=query_3;score=101;feature=test2.6;ID=test2.6\\n") \n-        f.close()         \n-\n-    def _writeExpOutFile_not_overlap_to_children(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tS-MART\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tName=test2.1;OverlapWith=query_1;score=1001;feature=test2.1;ID=test2.1\\n")\n-        f.write("chr1\\tS-MART\\ttest2.2\\t50\\t350\\t301\\t+\\t.\\tName=test2.2;OverlapWith=query_1;score=301;feature=test2.2;ID=test2.2\\n")\n-        f.write("chr1\\tS-MART\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tName=test2.3;OverlapWith=query_1;score=501;feature=test2.3;ID=test2.3\\n")\n-        f.write("chr1\\tS-MART\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tName=test2.1;OverlapWith=query_4;score=1001;feature=test2.1;ID=test2.1\\n")\n-        f.write("chr1\\tS-MART\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tName=test2.5;OverlapWith=query_4;score=251;feature=test2.5;ID=test2.5\\n")\n-        f.close()        \n-\n-    def _writeExpOutFile_no_overlap_right(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.close()                 \n-\n-    def _writeExpOutFile_one_overlap(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tS-MART\\ttest2.7\\t1200\\t1300\\t101\\t+\\t.\\tName=test2.7;OverlapWith=query_5;score=101;feature=test2.7;ID=test2.7\\n")\n-        f.close()        \n-        \n-    def _writeQueryGff3File2(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest1\\t1100\\t1150\\t126\\t+\\t.\\tID=query_1;Name=test1.1\\n")\n-        f.write("chr1\\tquery\\ttest2\\t1250\\t1300\\t781\\t+\\t.\\tID=query_2;Name=test1.2\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest1.1\\t25\\t150\\t126\\t+\\t.\\tID=query_1;Name=test1.1\\n")\n-        f.write("chr1\\tquery\\ttest1.2\\t70\\t850\\t781\\t+\\t.\\tID=query_2;Name=test1.2\\n")\n-        f.write("chr1\\tquery\\ttest1.3\\t550\\t850\\t201\\t+\\t.\\tID=query_3;Name=test1.3\\n")\n-        f.write("chr1\\tquery\\ttest1.4\\t925\\t1025\\t101\\t+\\t.\\tID=query_4;Name=test1.4\\n")\n-        f.write("chr1\\tquery\\ttest1.5\\t1201\\t1210\\t10\\t+\\t.\\tID=query_5;Name=test1.5\\n")\n-        f.write("chr1\\tquery\\ttest1.6\\t1500\\t1600\\t101\\t+\\t.\\tID=query_6;Name=test1.6\\n")\n-        f.close()\n-        \n-    def _writeGFF3File(self, fileName):\n-        f = open(fileName, "w")\n-        f.write("chr1\\ttest\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tID=test2.1;Name=test2.1\\n")\n-        f.write("chr1\\ttest\\ttest2.2\\t50\\t350\\t301\\t+\\t.\\tID=test2.2;Name=test2.2\\n")\n-        f.write("chr1\\ttest\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tID=test2.3;Name=test2.3\\n")\n-        f.write("chr1\\ttest\\ttest2.4\\t200\\t450\\t251\\t+\\t.\\tID=test2.4;Name=test2.4\\n")\n-        f.write("chr1\\ttest\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tID=test2.5;Name=test2.5\\n")\n-        f.write("chr1\\ttest\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tID=test2.6;Name=test2.6\\n")\n-        f.write("chr1\\ttest\\ttest2.7\\t1200\\t1300\\t101\\t+\\t.\\tID=test2.7;Name=test2.7\\n")\n-        f.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_F_FindOverlaps_naif.py
--- a/SMART/Java/Python/ncList/test/Test_F_FindOverlaps_naif.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b"@@ -1,455 +0,0 @@\n-import unittest\n-import os\n-from commons.core.utils.FileUtils import FileUtils\n-from SMART.Java.Python.misc import Utils\n-from SMART.Java.Python.ncList.FindOverlaps_naif import FindOverlaps_naif\n-from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *\n-\n-class Test_F_FindOverlaps_naif(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._inputRefGff3FileName = 'ref.gff3'\n-        self._writeGFF3File(self._inputRefGff3FileName)\n-        self._inputQueryGff3FileName = 'query.gff3'\n-        self._writeQueryGff3File(self._inputQueryGff3FileName)\n-        self._outputGff3FileName = 'output.gff3'\n-        self._expOutputFileName = 'expOutGff3.gff3'\n-        self._iFON = FindOverlaps_naif(self._inputRefGff3FileName, self._inputQueryGff3FileName)\n-        self._iFON.setOutputGff3FileName(self._outputGff3FileName)\n-        \n-    def tearDown(self):\n-        os.remove(self._inputRefGff3FileName)\n-        os.remove(self._inputQueryGff3FileName)\n-        os.remove(self._outputGff3FileName)\n-        os.remove(self._expOutputFileName)\n-        \n-    def test_run_general(self):\n-        self._iFON.run()\n-        self._iFON.close()\n-        self._writeExpOutFile_general(self._expOutputFileName)\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))  \n-        \n-    def test_run_general_asScript(self):\n-        cmd = 'python ../FindOverlaps_naif.py -i %s -j %s -o %s' % (self._inputRefGff3FileName, self._inputQueryGff3FileName, self._outputGff3FileName)      \n-        os.system(cmd)\n-        self._writeExpOutFile_general(self._expOutputFileName)\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))         \n-\n-    def test_run_overlap_special_case(self):\n-        inputQueryGff3FileName = 'query2.gff3'\n-        self._writeQueryGff3File2(inputQueryGff3FileName)\n-        iFON = FindOverlaps_naif(self._inputRefGff3FileName, inputQueryGff3FileName)\n-        iFON.setOutputGff3FileName(self._outputGff3FileName)\n-        iFON.run()\n-        iFON.close()\n-        self._writeExpOutFile_special_case(self._expOutputFileName)        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))   \n-        os.remove(inputQueryGff3FileName) \n-        \n-    def test_run_overlap_special_case_asScript(self):\n-        inputQueryGff3FileName = 'query2.gff3'\n-        self._writeQueryGff3File2(inputQueryGff3FileName)\n-        cmd = 'python ../FindOverlaps_naif.py -i %s -j %s -o %s' % (self._inputRefGff3FileName, inputQueryGff3FileName, self._outputGff3FileName)      \n-        os.system(cmd) \n-        self._writeExpOutFile_special_case(self._expOutputFileName)        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))         \n-        os.remove(inputQueryGff3FileName) \n-        \n-    def test_case_2(self):\n-        inputRefGff3FileName = 'ref_case2.gff3'\n-        iMock = MockFindOverlapsWithServeralIntervals_case2()\n-        iMock.write(inputRefGff3FileName)\n-        inputQueryGff3FileName = 'query_case2.gff3'\n-        self._writeQueryGff3File_case2(inputQueryGff3FileName)  \n-        iFON = FindOverlaps_naif(inputRefGff3FileName, inputQueryGff3FileName)\n-        iFON.setOutputGff3FileName(self._outputGff3FileName)\n-        iFON.run()\n-        iFON.close()\n-        self._writeExpOutFile_case2(self._expOutputFileName)        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))   \n-        os.remove(inputQueryGff3FileName) \n-        os.remove(inputRefGff3FileName) \n-        \n-    def test_case_3(self):\n-        inputRefGff3FileName = 'ref_case3.gff3'\n-        iMock = MockFindOverlapsWithServeralIntervals_case3()\n-        iMock.write(inputRefGff3FileName)\n-        inputQueryGff3FileName = 'query_case3.gff3'\n-        self._writeQueryGff3File_case3(inputQueryGff3FileName)  \n-        iFON = FindOverlaps_naif(inputRefGff3FileName, inputQueryGff3FileName)\n-     "..b'(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest3.1\\t150\\t250\\t101\\t+\\t.\\tID=query_1;Name=test3.1\\n")\n-        f.write("chr1\\tquery\\ttest3.2\\t380\\t400\\t21\\t+\\t.\\tID=query_2;Name=test3.2\\n")\n-        f.write("chr1\\tquery\\ttest3.3\\t480\\t520\\t41\\t+\\t.\\tID=query_3;Name=test3.3\\n")\n-        f.write("chr1\\tquery\\ttest3.4\\t510\\t700\\t191\\t+\\t.\\tID=query_4;Name=test3.4\\n")\n-        f.write("chr1\\tquery\\ttest3.5\\t900\\t950\\t51\\t+\\t.\\tID=query_5;Name=test3.5\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case4(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest4.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=test4.1\\n")\n-        f.write("chr1\\tquery\\ttest4.2\\t450\\t600\\t151\\t+\\t.\\tID=query_2;Name=test4.2\\n")\n-        f.write("chr1\\tquery\\ttest4.3\\t700\\t800\\t101\\t+\\t.\\tID=query_3;Name=test4.3\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case5(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest5.1\\t850\\t950\\t101\\t+\\t.\\tID=query_1;Name=test5.1\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case6(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest6.1\\t200\\t300\\t101\\t+\\t.\\tID=query_1;Name=test6.1\\n")\n-        f.write("chr1\\tquery\\ttest6.2\\t800\\t900\\t101\\t+\\t.\\tID=query_2;Name=test6.2\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case7(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest7.1\\t530\\t550\\t21\\t+\\t.\\tID=query_1;Name=test7.1\\n")\n-        f.write("chr1\\tquery\\ttest7.2\\t600\\t700\\t101\\t+\\t.\\tID=query_2;Name=test7.2\\n")\n-        f.write("chr1\\tquery\\ttest7.3\\t650\\t900\\t251\\t+\\t.\\tID=query_3;Name=test7.3\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case8(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest8.1\\t500\\t600\\t101\\t+\\t.\\tID=query_1;Name=test8.1\\n")\n-        f.write("chr1\\tquery\\ttest8.2\\t700\\t800\\t101\\t+\\t.\\tID=query_2;Name=test8.2\\n")\n-        f.write("chr1\\tquery\\ttest8.3\\t900\\t1100\\t201\\t+\\t.\\tID=query_3;Name=test8.3\\n")\n-        f.write("chr1\\tquery\\ttest8.4\\t1200\\t1300\\t101\\t+\\t.\\tID=query_4;Name=test8.4\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case9(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest9.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=test9.1\\n")\n-        f.write("chr1\\tquery\\ttest9.2\\t550\\t650\\t101\\t+\\t.\\tID=query_2;Name=test9.2\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case10(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest10.1\\t700\\t800\\t101\\t+\\t.\\tID=query_1;Name=test10.1\\n")\n-        f.write("chr1\\tquery\\ttest10.2\\t900\\t1000\\t101\\t+\\t.\\tID=query_2;Name=test10.2\\n")\n-        f.write("chr1\\tquery\\ttest10.3\\t1100\\t1300\\t201\\t+\\t.\\tID=query_3;Name=test10.3\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case11(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest11.1\\t420\\t480\\t61\\t+\\t.\\tID=query_1;Name=test11.1\\n")\n-        f.write("chr1\\tquery\\ttest11.2\\t450\\t715\\t266\\t+\\t.\\tID=query_2;Name=test11.2\\n")\n-        f.close()\n-        \n-    def _writeGFF3File(self, fileName):\n-        f = open(fileName, "w")\n-        f.write("chr1\\ttest\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tID=test2.1;Name=test2.1\\n")\n-        f.write("chr1\\ttest\\ttest2.2\\t50\\t350\\t301\\t+\\t.\\tID=test2.2;Name=test2.2\\n")\n-        f.write("chr1\\ttest\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tID=test2.3;Name=test2.3\\n")\n-        f.write("chr1\\ttest\\ttest2.4\\t200\\t450\\t251\\t+\\t.\\tID=test2.4;Name=test2.4\\n")\n-        f.write("chr1\\ttest\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tID=test2.5;Name=test2.5\\n")\n-        f.write("chr1\\ttest\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tID=test2.6;Name=test2.6\\n")\n-        f.write("chr1\\ttest\\ttest2.7\\t1200\\t1300\\t101\\t+\\t.\\tID=test2.7;Name=test2.7\\n")\n-        f.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_F_FindOverlaps_randomExample.py
--- a/SMART/Java/Python/ncList/test/Test_F_FindOverlaps_randomExample.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,48 +0,0 @@
-import unittest
-import os
-import time
-from commons.core.utils.FileUtils import FileUtils
-from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import MockFindOverlaps_randomExample_NonOrder
-from SMART.Java.Python.ncList.FindOverlaps_naif import FindOverlaps_naif
-from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
-
-class Test_F_FindOverlaps_randomExample(unittest.TestCase):
-
-    def setUp(self):
-        self._output_optim = 'output_optim.gff3'
-
-    def test_FindOverlaps_NonOrder(self):
-        inputRefGff3FileName = 'refMOverlaps.gff3'
-        inputQueryGff3FileName = 'queryMOverlaps.gff3'
-        outputDataName = 'timeResult.dat'  
-        fTime = open(outputDataName, 'w')  
-        fTime.write('NbRef\tNbQuery\tNbOverlap\ttime\n')   
-        numberOfRefReads = 10
-        chromSize = 100000
-        numberOfQReads = 10
-        print 'ref size = %d,  query size = %d' %(numberOfRefReads, numberOfQReads)
-        iMFOR_ref = MockFindOverlaps_randomExample_NonOrder(inputRefGff3FileName, 'ref', numberOfRefReads, chromSize)
-        iMFOR_ref.write()
-        iMFOR_query = MockFindOverlaps_randomExample_NonOrder(inputQueryGff3FileName,'q', numberOfQReads, chromSize)
-        iMFOR_query.write()
-        iFOO = FindOverlapsOptim(0)
-        iFOO.setRefFileName(inputRefGff3FileName, "gff3")
-        iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")
-        iFOO.setOutputFileName(self._output_optim)
-        startTime_optim = time.time()
-        iFOO.run()
-        iFOO.close()  
-        nbOverlap = iFOO._nbOverlaps
-        endTime_optim = time.time()    
-        totalTime_optim = endTime_optim - startTime_optim
-        print 'we take %s second.' % (totalTime_optim)
-        fTime.write('%d\t%d\t%d\t%.2f\n'%(numberOfRefReads, numberOfQReads, nbOverlap, totalTime_optim))
-        fTime.close()
-        os.remove(inputQueryGff3FileName)
-        os.remove(inputRefGff3FileName)
-        os.remove(self._output_optim)  
-        os.remove(outputDataName)      
-        
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_F_NCList.py
--- a/SMART/Java/Python/ncList/test/Test_F_NCList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,302 +0,0 @@\n-import os\n-import unittest\n-import struct\n-from SMART.Java.Python.ncList.NCList import NCList\n-from SMART.Java.Python.misc import Utils\n-from commons.core.utils.FileUtils import FileUtils\n-from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *\n-from commons.core.parsing.GffParser import GffParser\n-from SMART.Java.Python.ncList.FileSorter import FileSorter\n-\n-class Test_F_NCList(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._inputGff3FileName = \'sortedFile.gff3\'\n-        self._sortedFileName    = \'sortedFile.pkl\'\n-        self._expHFileName      = \'expH.bin\'\n-        self._expLFileName      = \'expL.bin\'\n-        self._obsHFileName      = \'H.bin\'\n-        self._obsLFileName      = \'L.bin\'\n-        self._addressFileName   = \'address.txt\'\n-        self._writeGFF3File(self._inputGff3FileName)\n-        self._ncList = NCList(0)\n-        self._ncList.setChromosome("chr1")\n-        \n-    def tearDown(self):\n-        return\n-        for fileName in (self._inputGff3FileName, self._sortedFileName, self._expHFileName, self._expLFileName, self._obsHFileName, self._obsLFileName, self._addressFileName):\n-            if os.path.exists(fileName):\n-                os.remove(fileName)\n-        \n-    def _sortAndBuild(self):\n-        parser = GffParser(self._inputGff3FileName)\n-        fs = FileSorter(parser, 0)\n-        fs.setOutputFileName(self._sortedFileName)\n-        fs.sort()\n-        self._ncList.setFileName(self._sortedFileName)\n-        self._ncList.setNbElements(parser.getNbTranscripts())\n-        self._ncList.buildLists()\n-\n-    def test_run_with_one_elementSubList(self):\n-        iMock = MockFindOverlapsWithOneInterval()\n-        iMock.write(self._inputGff3FileName)\n-        self._sortAndBuild()\n-        self._writeExpHFile_one_elementSubList()\n-        self._writeExpLFile_one_elementSubList()\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName))    \n-        \n-    def test_case1(self):\n-        iMock = MockFindOverlapsWithServeralIntervals_case1()\n-        iMock.write(self._inputGff3FileName)\n-        self._sortAndBuild()\n-        self._writeExpHFileCase1()\n-        self._writeExpLFileCase1()\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName))    \n-        \n-    def test_case2(self):\n-        iMock = MockFindOverlapsWithServeralIntervals_case2()\n-        iMock.write(self._inputGff3FileName)\n-        self._sortAndBuild()\n-        self._writeExpHFileCase2()\n-        self._writeExpLFileCase2()\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName))    \n-        \n-    def test_case3(self):\n-        iMock = MockFindOverlapsWithServeralIntervals_case3()\n-        iMock.write(self._inputGff3FileName)\n-        self._sortAndBuild()\n-        self._writeExpHFileCase3()\n-        self._writeExpLFileCase3()\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName))    \n-    \n-    def test_case4_5(self):\n-        iMock = MockFindOverlapsWithServeralIntervals_case4_5()\n-        iMock.write(self._inputGff3FileName)\n-        self._sortAndBuild()\n-        self._writeExpHFileCase4_5()\n-        self._writeExpLFileCase4_5()\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expHFileName, self._ncList._hFileName))\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expLFileName, self._ncList._lFileName))    \n-\n-    def test_case6_7(self):\n-        iMock = MockFindOverlapsWithServeralIntervals_case6_7()\n-        iMock'..b'(self._expHFileName, elements)\n-              \n-    def _writeExpHFileCase11(self):\n-        elements = [0, 2, 2, 2, 4, 2]\n-        self._writeBinFile(self._expHFileName, elements)\n-        \n-    def _writeExpHFileCase12(self):\n-        elements = [0, 1, 1, 3, 4, 1]\n-        self._writeBinFile(self._expHFileName, elements)\n-        \n-    def _writeExpLFile_one_elementSubList(self):\n-        elements = [0, 1000, 0, -1, -1]\n-        self._writeBinFile(self._expLFileName, elements)\n-\n-    def _writeExpLFileCase1(self):\n-        elements = [   0, 1000,    0,  1, -1, \\\n-                    1200, 1300, 2345, -1, -1, \\\n-                      50,  350,  391, -1,  0, \\\n-                     100,  600,  781,  2,  0, \\\n-                     700,  950, 1563,  3,  0, \\\n-                     200,  450, 1172, -1,  3, \\\n-                     800,  900, 1954, -1,  4]\n-        self._writeBinFile(self._expLFileName, elements)\n-        \n-    def _writeExpLFileCase2(self):\n-        elements = [   0,  500,    0,  1, -1, \\\n-                     900, 1200, 1561, -1, -1, \\\n-                      50,  450,  389,  2,  0, \\\n-                     100,  400,  779,  3,  2, \\\n-                     100,  200, 1170, -1,  3]\n-        self._writeBinFile(self._expLFileName, elements)\n-        \n-    def _writeExpLFileCase3(self):\n-        elements = [   0,  500,    0,  1, -1, \\\n-                     800, 1000, 1952, -1, -1, \\\n-                      50,  450,  389,  2,  0, \\\n-                     100,  400,  779,  3,  2, \\\n-                     100,  200, 1170, -1,  3, \\\n-                     300,  400, 1561, -1,  3]\n-        self._writeBinFile(self._expLFileName, elements)\n-             \n-    def _writeExpLFileCase4_5(self):\n-        elements = [   0, 1000,    0,  1, -1, \\\n-                     200,  800,  391,  2,  0, \\\n-                     400,  600,  782, -1,  1]\n-        self._writeBinFile(self._expLFileName, elements)\n-                           \n-    def _writeExpLFileCase6_7(self):\n-        elements = [   0, 1000,    0,  1, -1, \\\n-                     100,  300,  391, -1,  0, \\\n-                     400,  500,  782, -1,  0, \\\n-                     510,  520, 1173, -1,  0, \\\n-                     850,  950, 1563, -1,  0]\n-        self._writeBinFile(self._expLFileName, elements)\n-        \n-    def _writeExpLFileCase8(self):\n-        elements = [   0, 1000,    0,  1, -1, \\\n-                     100,  200,  391, -1,  0, \\\n-                     300,  400,  782, -1,  0]\n-        self._writeBinFile(self._expLFileName, elements)\n-        \n-    def _writeExpLFileCase9(self):\n-        elements = [   0, 1000,    0,  1, -1, \\\n-                     800, 1200,  782, -1, -1, \\\n-                     600,  700,  391, -1,  0]\n-        self._writeBinFile(self._expLFileName, elements)\n-        \n-    def _writeExpLFileCase10(self):\n-        elements = [   0, 1000,    0,  1, -1, \\\n-                    1200, 1300, 1576, -1, -1, \\\n-                    1400, 1500, 1972, -1, -1, \\\n-                     100,  200,  394, -1,  0, \\\n-                     300,  400,  788, -1,  0, \\\n-                     500,  600, 1182, -1,  0]\n-        self._writeBinFile(self._expLFileName, elements)\n-        \n-    def _writeExpLFileCase11(self):\n-        elements = [   0,  500,    0,  1, -1, \\\n-                     700,  900, 1180,  2, -1, \\\n-                     100,  200,  392, -1,  0, \\\n-                     300,  400,  786, -1,  0, \\\n-                     710,  720, 1574, -1,  1, \\\n-                     740,  750, 1967, -1,  1]\n-        self._writeBinFile(self._expLFileName, elements)\n-              \n-    def _writeExpLFileCase12(self):\n-        elements = [   0, 1400,    0,  1, -1, \\\n-                     300,  500,  368,  2,  0, \\\n-                     800, 1100, 1106, -1,  0, \\\n-                    1200, 1300, 1476, -1,  0, \\\n-                     300,  500,  737, -1,  1]\n-        self._writeBinFile(self._expLFileName, elements)\n-              \n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_FindOverlapsWithOneInterval.py
--- a/SMART/Java/Python/ncList/test/Test_FindOverlapsWithOneInterval.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,81 +0,0 @@
-import unittest
-import struct
-import os
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.ncList.FindOverlapsWithOneInterval import FindOverlapsWithOneInterval
-from SMART.Java.Python.ncList.NCListCursor import NCListCursor
-
-class Test_FindOverlapsWithOneInterval(unittest.TestCase):
-
-    def setUp(self):
-        self._inputGff3FileName = 'sortedFile.gff3'
-        self._writeGFF3File(self._inputGff3FileName)
-        self._obsFileName = "overlap.gff3"
-        self._iFOWOI = FindOverlapsWithOneInterval(0)
-        self._iFOWOI.setFileName(self._inputGff3FileName, "gff3")
-        self._iFOWOI._chromosome = "chr1"
-        self._iFOWOI.prepareIntermediateFiles()
-        self._iFOWOI.createNCList()
-        self._ncList = self._iFOWOI._ncList
-        self._iFOWOI.setOutputFileName(self._obsFileName)
-        
-    def tearDown(self):
-        return
-        self._iFOWOI.close()
-        for file in (self._inputGff3FileName, self._obsFileName):
-            if os.path.exists(file):
-                os.remove(file)
-        
-    def test_binarySearch_first_element_overlap(self):
-        self._iFOWOI.setInterval("chr1", 500, 850)
-        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 0, 0), 0, 6)
-        expReadPosition = 0
-        self._iFOWOI.dumpWriter()
-        self._iFOWOI.close()
-        self.assertEquals(expReadPosition, obsReadPosition._lIndex)
-        
-    def test_binarySearch_second_element_overlap(self):
-        self._iFOWOI.setInterval("chr1", 500, 850)
-        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 2, 0), 2, 6)
-        expReadPosition = 3
-        self._iFOWOI.dumpWriter()
-        self._iFOWOI.close()
-        self.assertEquals(expReadPosition, obsReadPosition._lIndex)
-        
-    def test_binarySearch_empty_subList(self):
-        self._iFOWOI.setInterval("chr1", 500, 850)
-        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 5, 0), 5, 5)
-        expReadPosition = None
-        self._iFOWOI.dumpWriter()
-        self._iFOWOI.close()
-        self.assertEquals(expReadPosition, obsReadPosition)
-        
-    def test_binarySearch_no_overlap_right(self):
-        self._iFOWOI.setInterval("chr1", 1400, 1500)
-        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 0, 0), 0, 6)
-        expReadPosition = None
-        self._iFOWOI.dumpWriter()
-        self._iFOWOI.close()
-        self.assertEquals(expReadPosition, obsReadPosition)
-        
-    def test_binarySearch_no_overlap_left(self):
-        self._iFOWOI.setInterval("chr1", 0, 45)       
-        obsReadPosition = self._iFOWOI.binarySearch(NCListCursor(None, self._ncList, 2, 0), 2, 6)
-        expReadPosition = None
-        self._iFOWOI.dumpWriter()
-        self._iFOWOI.close()
-        self.assertEquals(expReadPosition, obsReadPosition)
-
-    def _writeGFF3File(self, fileName):
-        f = open(fileName, "w")
-        f.write("chr1\ttest\ttest2.1\t9\t1000\t1001\t+\t.\tID=test2.1;Name=test2.1\n")
-        f.write("chr1\ttest\ttest2.2\t50\t350\t301\t+\t.\tID=test2.2;Name=test2.2\n")
-        f.write("chr1\ttest\ttest2.3\t100\t600\t501\t+\t.\tID=test2.3;Name=test2.3\n")
-        f.write("chr1\ttest\ttest2.4\t200\t450\t251\t+\t.\tID=test2.4;Name=test2.4\n")
-        f.write("chr1\ttest\ttest2.5\t700\t950\t251\t+\t.\tID=test2.5;Name=test2.5\n")
-        f.write("chr1\ttest\ttest2.6\t800\t900\t101\t+\t.\tID=test2.6;Name=test2.6\n")
-        f.write("chr1\ttest\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
-        f.close()
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_FindOverlapsWithSeveralIntervals.py
--- a/SMART/Java/Python/ncList/test/Test_FindOverlapsWithSeveralIntervals.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,160 +0,0 @@
-import unittest
-import os
-from SMART.Java.Python.ncList.FindOverlapsWithSeveralIntervals import FindOverlapsWithSeveralIntervals
-
-class Test_FindOverlapsWithSeveralIntervals(unittest.TestCase):
-
-    def setUp(self):
-        self._inputRefGff3FileName = 'sortedFile.gff3'
-        self._writeGFF3File(self._inputRefGff3FileName)
-        self._inputQueryGff3FileName = 'sorted_Query.gff3'
-        self._writeQueryGff3File(self._inputQueryGff3FileName)
-        self._outputGff3FileName = 'overlaps.gff3'
-        self._iFOWSI = FindOverlapsWithSeveralIntervals(self._inputRefGff3FileName, self._inputQueryGff3FileName)
-        self._iFOWSI.setOutputGff3FileName(self._outputGff3FileName)
-        
-    def tearDown(self):
-        os.remove(self._inputRefGff3FileName)
-        os.remove(self._inputQueryGff3FileName)
-        os.remove(self._outputGff3FileName)
-        self._iFOWSI.deletIntermediateFiles()
-    
-    def test_isOverlapping_true(self):
-        queryGff3Addr = 116
-        RefGff3Addr = 231
-        obs = self._iFOWSI.isOverlapping(queryGff3Addr, RefGff3Addr)
-        exp = 0
-        self.assertEquals(exp, obs)
-        
-    def test_isOverlapping_false_left(self):
-        queryGff3Addr = 116
-        RefGff3Addr = 58
-        obs = self._iFOWSI.isOverlapping(queryGff3Addr, RefGff3Addr)
-        exp = -1
-        self.assertEquals(exp, obs)
-    
-    def test_isOverlapping_false_right(self):
-        queryGff3Addr = 116
-        RefGff3Addr = 347
-        obs = self._iFOWSI.isOverlapping(queryGff3Addr, RefGff3Addr)
-        exp = 1
-        self.assertEquals(exp, obs) 
-           
-    def test_getHisFirstChild(self):
-        firstRefLAddr = 0
-        obsFirstChildLAddr = self._iFOWSI.getHisFirstChild(firstRefLAddr)
-        expFirstChildLAddr = 48
-        self.assertEquals(expFirstChildLAddr, obsFirstChildLAddr) 
-    
-    def test_isLastElement_true(self):
-        refLAddr = 96
-        obsBool = self._iFOWSI.isLastElement(refLAddr)
-        expBool = True
-        self.assertEquals(expBool, obsBool)
-    
-    def test_isLastElement_false(self):
-        refLAddr = 72
-        obsBool = self._iFOWSI.isLastElement(refLAddr)
-        expBool = False
-        self.assertEquals(expBool, obsBool)  
-        
-    def test_isLastElement_highestLevel_true(self):
-        refLAddr = 24
-        obsBool = self._iFOWSI.isLastElement(refLAddr)
-        expBool = True
-        self.assertEquals(expBool, obsBool)
-    
-    def test_isLastElement_highestLevel_false(self):
-        refLAddr = 0
-        obsBool = self._iFOWSI.isLastElement(refLAddr)
-        expBool = False
-        self.assertEquals(expBool, obsBool)           
-
-    def test_findOverlapIter(self):
-        queryGff3Addr = 175
-        firstRefLAddr = 0 
-        obsFirstOverlapLAddr = self._iFOWSI.findOverlapIter(queryGff3Addr, firstRefLAddr)
-        expFirstOverlapLAddr = 0
-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
-        
-    def test_not_findOverlapIter(self):
-        queryGff3Addr = 295
-        firstRefLAddr = 24 
-        obsFirstOverlapLAddr = self._iFOWSI.findOverlapIter(queryGff3Addr, firstRefLAddr)
-        expFirstOverlapLAddr = None
-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)   
-        
-    def test_findOverlapIter_not_the_first_RefOverlap(self):
-        queryGff3Addr = 235
-        firstRefLAddr = 0 
-        obsFirstOverlapLAddr = self._iFOWSI.findOverlapIter(queryGff3Addr, firstRefLAddr)
-        expFirstOverlapLAddr = 24
-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)  
-        
-    def test_changeToNewSubEndLAddr(self):
-        firstChildLAddr = 48
-        subEndLAddr = 48
-        expSubEndLAddr = 120
-        obsSubEndLAddr = self._iFOWSI.changeToNewSubEndLAddr(firstChildLAddr, subEndLAddr)
-        self.assertEquals(expSubEndLAddr, obsSubEndLAddr) 
-        
-    def test_defineSubEndLaddr(self):
-        parentLAddr = -1
-        expSubEndLAddr = 48
-        obsSubEndLAddr = self._iFOWSI.defineSubEndLaddr(parentLAddr)
-        self.assertEquals(expSubEndLAddr, obsSubEndLAddr)
-        
-    def test_getNextRefIntervalInCaseNotOverLap(self):
-        firstRefLAddr = 96
-        expRefLAddr = 24
-        obsRefLAddr = self._iFOWSI.getNextRefIntervalInCaseNotOverLap(firstRefLAddr)
-        self.assertEquals(expRefLAddr, obsRefLAddr)
-        
-    def test_getNextRefIntervalInCaseOverLap(self):
-        firstChildLAddr = -1
-        firstRefLAddr = 120
-        subEndLAddr = 144
-        expRefLAddr, expSubEndLAddr = (96, 144)
-        obsRefLAddr, obsSubEndLAddr = self._iFOWSI.getNextRefIntervalInCaseOverLap(firstChildLAddr, firstRefLAddr, subEndLAddr)
-        self.assertEquals((expRefLAddr, expSubEndLAddr), (obsRefLAddr, obsSubEndLAddr))        
-
-    def test_not_findOverlapIter_between2RefIntervals(self):
-        inputQueryGff3FileName = 'query2.gff3'
-        self._writeQueryGff3File2(inputQueryGff3FileName)
-        self._iFOWSI.setQueryGff3FileName(inputQueryGff3FileName)
-        queryGff3Addr = 0
-        firstRefLAddr = 0
-        obsFirstOverlapLAddr = self._iFOWSI.findOverlapIter(queryGff3Addr, firstRefLAddr)
-        expFirstOverlapLAddr = 24
-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr) 
-        os.remove(inputQueryGff3FileName) 
-
-    def _writeQueryGff3File2(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tquery\ttest1\t1100\t1150\t126\t+\t.\tID=test1.1;Name=test1.1\n")
-        f.write("chr1\tquery\ttest2\t1250\t1300\t781\t+\t.\tID=test1.2;Name=test1.2\n")
-        f.close()  
-        
-    def _writeQueryGff3File(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tquery\ttest1.1\t25\t150\t126\t+\t.\tID=test1.1;Name=test1.1\n")
-        f.write("chr1\tquery\ttest1.2\t70\t850\t781\t+\t.\tID=test1.2;Name=test1.2\n")
-        f.write("chr1\tquery\ttest1.3\t550\t850\t201\t+\t.\tID=test1.3;Name=test1.3\n")
-        f.write("chr1\tquery\ttest1.4\t925\t1025\t101\t+\t.\tID=test1.4;Name=test1.4\n")
-        f.write("chr1\tquery\ttest1.5\t1201\t1210\t10\t+\t.\tID=test1.5;Name=test1.5\n")
-        f.write("chr1\tquery\ttest1.6\t1500\t1600\t101\t+\t.\tID=test1.6;Name=test1.6\n")
-        f.close()
-        
-    def _writeGFF3File(self, fileName):
-        f = open(fileName, "w")
-        f.write("chr1\ttest\ttest2.1\t9\t1000\t1001\t+\t.\tID=test2.1;Name=test2.1\n")
-        f.write("chr1\ttest\ttest2.2\t50\t350\t301\t+\t.\tID=test2.2;Name=test2.2\n")
-        f.write("chr1\ttest\ttest2.3\t100\t600\t501\t+\t.\tID=test2.3;Name=test2.3\n")
-        f.write("chr1\ttest\ttest2.4\t200\t450\t251\t+\t.\tID=test2.4;Name=test2.4\n")
-        f.write("chr1\ttest\ttest2.5\t700\t950\t251\t+\t.\tID=test2.5;Name=test2.5\n")
-        f.write("chr1\ttest\ttest2.6\t800\t900\t101\t+\t.\tID=test2.6;Name=test2.6\n")
-        f.write("chr1\ttest\ttest2.7\t1200\t1300\t101\t+\t.\tID=test2.7;Name=test2.7\n")
-        f.close()
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_FindOverlaps_randomExample.py
--- a/SMART/Java/Python/ncList/test/Test_FindOverlaps_randomExample.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,99 +0,0 @@
-import unittest
-import os
-import time
-from SMART.Java.Python.misc import Utils
-from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import *
-from SMART.Java.Python.ncList.FindOverlaps_naif import FindOverlaps_naif
-from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
-
-class Test_FindOverlaps_randomExample(unittest.TestCase):
-
-    def setUp(self):
-        self._output_naif = 'output_naif.gff3'
-        self._outputOptim = 'outputOptim.gff3'
-
-        
-    def tearDown(self):
-        return
-        os.remove(self._output_naif)
-        os.remove(self._outputOptim)
-        
-    def test_run_smallSize(self):
-        inputRefGff3FileName = 'ref_small.gff3'
-        numberOfReads = 10
-        chromSize = 1000
-        iMFO_rand = MockFindOverlaps_randomExample(inputRefGff3FileName, 'reference', numberOfReads, chromSize)
-        iMFO_rand.write()
-
-        inputQueryGff3FileName = 'query_small.gff3'
-        iMFO_rand = MockFindOverlaps_randomExample(inputQueryGff3FileName,'query', 10, 1000)
-        iMFO_rand.write()
-        
-        iFON = FindOverlaps_naif(inputRefGff3FileName, inputQueryGff3FileName)
-        iFON.setOutputGff3FileName(self._output_naif)
-        iFOO = FindOverlapsOptim(0)
-        iFOO.setRefFileName(inputRefGff3FileName, "gff3")
-        iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")
-        iFOO.setOutputFileName(self._outputOptim)
-        iFOO.prepareIntermediateFiles()
-        iFOO.createNCLists()
-        
-        startTime_naif = time.time()
-        iFON.run()
-        iFON.close()
-        endTime_naif = time.time()
-        totalTime_naif = endTime_naif - startTime_naif
-        print 'for naive algo, we take %e second' % (totalTime_naif)
-        
-        startTimeOptim = time.time()
-        iFOO.compare()
-        endTimeOptim = time.time()
-        totalTimeOptim = endTimeOptim - startTimeOptim
-        print 'for optim algo, we take %e second' % (totalTimeOptim)
-        iFOO.close()
-        
-        self.assertTrue(Utils.diff(self._output_naif, self._outputOptim)) 
-        
-        os.remove(inputRefGff3FileName)
-        os.remove(inputQueryGff3FileName)
-      
-
-    def test_creatRandomExampleWithMOverlaps_smallSize(self):
-        inputRefGff3FileName = 'refMOverlaps_small.gff3'
-        inputQueryGff3FileName = 'queryMOverlaps_small.gff3'       
-        numberOfReads = 10
-        chromSize = 1000 
-        iRMSS = MockFindOverlaps_randomExample_MOverlaps(inputRefGff3FileName, inputQueryGff3FileName, 7, numberOfReads, chromSize)
-        iRMSS.createRandomExample()
-  
-  
-        iFON = FindOverlaps_naif(inputRefGff3FileName, inputQueryGff3FileName)
-        iFON.setOutputGff3FileName(self._output_naif)
-        iFOO = FindOverlapsOptim(0)
-        iFOO.setRefFileName(inputRefGff3FileName, "gff3")
-        iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")
-        iFOO.setOutputFileName(self._outputOptim)
-        iFOO.prepareIntermediateFiles()
-        iFOO.createNCLists()
-        
-        startTime_naif = time.time()
-        iFON.run()
-        endTime_naif = time.time()
-        totalTime_naif = endTime_naif - startTime_naif
-        print 'for naive algo, we take %e second' % (totalTime_naif)
-        iFON.close()
-        
-        startTimeOptim = time.time()
-        iFOO.compare()
-        endTimeOptim = time.time()
-        totalTimeOptim = endTimeOptim - startTimeOptim
-        print 'for optim algo, we take %e second' % (totalTimeOptim)
-        iFOO.close()        
-        
-        self.assertTrue(Utils.diff(self._output_naif, self._outputOptim)) 
-        
-        os.remove(inputRefGff3FileName)
-        os.remove(inputQueryGff3FileName)        
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/ncList/test/Test_randExample.py
--- a/SMART/Java/Python/ncList/test/Test_randExample.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,51 +0,0 @@
-import unittest
-import time
-from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import *
-from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
-
-class Test_F_FindOverlaps_randomExample(unittest.TestCase):
-
-    def setUp(self):
-        self._output_optim = 'output_optim.gff3'
-        
-    def test_creatRandomExampleWithMOverlaps(self):
-        inputRefGff3FileName = 'refMOverlaps.gff3'
-        inputQueryGff3FileName = 'queryMOverlaps.gff3'
-        outputDataName = 'timeResult.dat'  
-        fTime = open(outputDataName, 'w')  
-        fTime.write('NbRef\tNbQuery\tNbOverlap\ttime\n')   
-        numberOfRefReads = 1000
-        chromSize = 100000
-        while numberOfRefReads <= 1000:
-            numberOfQReads = 1000
-            while numberOfQReads <= 1000:
-                print 'ref size = %d,  query size = %d' %(numberOfRefReads, numberOfQReads)
-                iMFOR_ref = MockFindOverlaps_randomExample(inputRefGff3FileName, 'ref', numberOfRefReads, chromSize)
-                iMFOR_ref.write()
-                iMFOR_query = MockFindOverlaps_randomExample(inputQueryGff3FileName,'q', numberOfQReads, chromSize)
-                iMFOR_query.write()
-                iFOO = FindOverlapsOptim(0)
-                iFOO.setRefFileName(inputRefGff3FileName, "gff3")
-                iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")
-                iFOO.setOutputFileName(self._output_optim)
-                iFOO.prepareIntermediateFiles()
-                iFOO.createNCLists()
-                
-                startTime_optim = time.time()
-                iFOO.compare()
-                endTime_optim = time.time()
-                totalTime_optim = endTime_optim - startTime_optim
-                print 'we took %s second.' % (totalTime_optim)
-                nbOverlap = iFOO._nbOverlaps
-                iFOO.close()  
-                fTime.write('%d\t%d\t%d\t%.2f\n' % (numberOfRefReads, numberOfQReads, nbOverlap, totalTime_optim))
-                numberOfQReads *= 10
-            numberOfRefReads *= 10
-        fTime.close()
-        os.remove(inputQueryGff3FileName)
-        os.remove(inputRefGff3FileName)
-        os.remove(self._output_optim)
-        
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/plot.py
--- a/SMART/Java/Python/plot.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,227 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-\n-"""\n-Plot the data from the data files\n-"""\n-\n-import os, re, math\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.misc.Progress import Progress\n-from commons.core.utils.FileUtils import FileUtils\n-\n-class Plot(object):\n-\n-    def __init__(self, verbosity):\n-        self.verbosity = verbosity\n-        self.keep      = False\n-\n-    def keepTmpFiles(self, boolean):\n-        self.keep = boolean\n-\n-    def setShape(self, shape):\n-        self.shape = shape\n-\n-    def setInputFileName(self, fileName, format):\n-        self.parser = TranscriptContainer(fileName, format, self.verbosity)\n-\n-    def setXData(self, tag, default):\n-        self.x        = tag\n-        self.xDefault = default\n-\n-    def setYData(self, tag, default):\n-        self.y        = tag\n-        self.yDefault = default\n-\n-    def setZData(self, tag, default):\n-        self.z        = tag\n-        self.zDefault = default\n-\n-    def setNbBars(self, nbBars):\n-        self.nbBars = nbBars\n-\n-    def setOutputFileName(self, fileName):\n-        self.outputFileName = fileName\n-\n-    def setRegression(self, regression):\n-        self.regression = regression\n-\n-    def setLog(self, log):\n-        self.log = log\n-\n-    def createPlotter(self):\n-        self.plotter = RPlotter(self.outputFileName, self.verbosity, self.keep)\n-        if self.shape == "barplot":\n-            self.plotter.setBarplot(True)\n-        elif self.shape == "line":\n-            pass\n-        elif self.shape == "points":\n-            self.plotter.setPoints(True)\n-        elif self.shape == "heatPoints":\n-            self.plotter.setHeatPoints(True)\n-        else:\n-            raise Exception("Do not understand shape \'%s\'\\n" % (self.shape))\n-            \n-        self.plotter.setLog(self.log)\n-        self.plotter.setRegression(self.regression)\n-\n-    def getValues(self, transcript):\n-        x = transcript.getTagValue(self.x)\n-        y = None\n-        z = None\n-        if self.y != None:\n-            y = transcript.getTagValue(self.y)\n-        if self.z != None:\n-            z = transcript.getTagValue(self.z)\n-        if x == None:\n-            if self.xDefault != None:\n-                x = self.xDefault\n-            else:\n-                raise Exception("Error! Transcript %s do not have the x-tag %s\\n" % (transcript, self.x))\n-        if '..b'= "heatPoints":\n-            self.plotter.addHeatLine(heatLine)\n-        self.plotter.plot()\n-\n-    def close(self):\n-        if self.regression:\n-            print self.plotter.getCorrelationData()\n-        if self.shape == "points":\n-            rho = self.plotter.getSpearmanRho()\n-            if rho == None:\n-                print "Cannot compute Spearman rho."\n-            else:\n-                print "Spearman rho: %f" % (rho)    \n-\n-    def run(self):\n-        self.createPlotter()\n-        self.parseFile() \n-        self.close()\n-\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Plot v1.0.2: Plot some information from a list of transcripts. [Category: Visualization]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input",      dest="inputFileName",  action="store",                      type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format",     dest="format",         action="store",                      type="string", help="format of the input [compulsory] [format: transcript file format]")\n-    parser.add_option("-x", "--x",          dest="x",              action="store",                      type="string", help="tag for the x value [format: string]")\n-    parser.add_option("-y", "--y",          dest="y",              action="store",                      type="string", help="tag for the y value [format: string]")\n-    parser.add_option("-z", "--z",          dest="z",              action="store",      default=None,   type="string", help="tag for the z value [format: string]")\n-    parser.add_option("-X", "--xDefault",   dest="xDefault",       action="store",      default=None,   type="float",  help="value for x when tag is not present [format: float]")\n-    parser.add_option("-Y", "--yDefault",   dest="yDefault",       action="store",      default=None,   type="float",  help="value for y when tag is not present [format: float]")\n-    parser.add_option("-Z", "--zDefault",   dest="zDefault",       action="store",      default=None,   type="float",  help="value for z when tag is not present [format: float]")\n-    parser.add_option("-o", "--output",     dest="outputFileName", action="store",                      type="string", help="output file names [format: output file in PNG format]")\n-    parser.add_option("-s", "--shape",      dest="shape",          action="store", default="barplot",   type="string", help="shape of the plot [format: choice (barplot, line, points, heatPoints)]")\n-    parser.add_option("-n", "--nbBars",     dest="nbBars",         action="store",      default=2,                type="int",    help="number of bars in barplot [format: int]")\n-    parser.add_option("-k", "--keep",       dest="keep",           action="store_true", default=False,                 help="keep temporary files [format: bool]")\n-    parser.add_option("-r", "--regression", dest="regression",     action="store_true", default=False,                 help="plot regression line (in \'points\' format) [format: bool]")\n-    parser.add_option("-l", "--log",        dest="log",            action="store",      default="y",     type="string", help="use log on x- or y-axis (write \'x\', \'y\' or \'xy\') [format: string]")\n-    parser.add_option("-v", "--verbosity",  dest="verbosity",      action="store",      default=1,      type="int",    help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-    plot = Plot(options.verbosity)\n-    plot.setInputFileName(options.inputFileName, options.format)\n-    plot.setOutputFileName(options.outputFileName)\n-    plot.setXData(options.x, options.xDefault)\n-    plot.setYData(options.y, options.yDefault)\n-    plot.setZData(options.z, options.zDefault)\n-    plot.setShape(options.shape)\n-    plot.setNbBars(options.nbBars)\n-    plot.setRegression(options.regression)\n-    plot.setLog(options.log)\n-    plot.keepTmpFiles(options.keep)\n-    plot.run()\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/plotCoverage.py
--- a/SMART/Java/Python/plotCoverage.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,481 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os, os.path, subprocess, glob, random\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-from SMART.Java.Python.misc.Progress import Progress\n-from commons.core.parsing.FastaParser import FastaParser\n-\n-strands = [-1, 1]\n-colors  = {-1: "blue", 1: "red", 0: "black"}\n-colorLine = "black"\n-\n-def parseTargetField(field):\n-\tstrand             = "+"\n-\tsplittedFieldSpace = field.split()\n-\tsplittedFieldPlus  = field.split("+", 4)\n-\tif len(splittedFieldSpace) == 3:\n-\t\tid, start, end = splittedFieldSpace\n-\telif len(splittedFieldSpace) == 4:\n-\t\tid, start, end, strand = splittedFieldSpace\n-\telif len(splittedFieldPlus) == 3:\n-\t\tid, start, end = splittedFieldPlus\n-\telif len(splittedFieldPlus) == 4:\n-\t\tid, start, end, strand = splittedFieldPlus\n-\telse:\n-\t\traise Exception("Cannot parse Target field \'%s\'." % (field))\n-\treturn (id, int(start), int(end), strand)\n-\n-\n-class SimpleTranscript(object):\n-\tdef __init__(self, transcript1, transcript2, color = None):\n-\t\tself.start  = max(0, transcript1.getStart() - transcript2.getStart())\n-\t\tself.end    = min(transcript2.getEnd() - transcript2.getStart(), transcript1.getEnd() - transcript2.getStart())\n-\t\tself.strand = transcript1.getDirection() * transcript2.getDirection()\n-\t\tself.exons  = []\n-\t\tfor exon in transcript1.getExons():\n-\t\t\tif exon.getEnd() >= transcript2.getStart() and exon.getStart() <= transcript2.getEnd():\n-\t\t\t\tstart = max(0, exon.getStart() - transcript2.getStart())\n-\t\t\t\tend   = min(transcript2.getEnd() - transcript2.getStart(), exon.getEnd() - transcript2.getStart())\n-\t\t\t\tself.addExon(start, end, self.strand, color)\n-\n-\tdef addExon(self, start, end, strand, color):\n-\t\texon = SimpleExon(start, end, strand, color)\n-\t\tself.exons.append(exon)\n-\n-\tdef getRScript(self, yOffset, height):\n-\t\trString     = ""\n-\t\tpreviousEnd = None\n-\t\tfor exon in sorted(self.exons, key=lambda exon: exon.start):\n-\t\t\tif previousEnd != None:\n-\t\t\t\trString += "segments(%.1f, %.1f, %.1f, %.1f, col = \\"%s\\")\\n" % (previousEnd, yOffset + height / 4.0, exon.start, yOffset + height / 4.0, colorLine)\n-\t\t\trString    += exon.getRScript(yOffset, height)\n-\t\t\tpreviousEnd = exon.end\n-\t\treturn rString\n-\n-\n-class SimpleExon(object):\n-\tdef __init__(self, start, end, strand, color = None):\n'..b'on="store",                       type="string", help="input file 1 [compulsory] [format: file in transcript or mapping format given by -f]")\n-\tparser.add_option("-f", "--inputFormat1", dest="inputFormat1",   action="store",                       type="string", help="format of input file 1 [compulsory] [format: transcript or mapping file format]")\n-\tparser.add_option("-j", "--input2",       dest="inputFileName2", action="store",                       type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")\n-\tparser.add_option("-g", "--inputFormat2", dest="inputFormat2",   action="store",                       type="string", help="format of input file 2 [compulsory] [format: transcript file format]")\n-\tparser.add_option("-q", "--sequence",     dest="inputSequence",  action="store",      default=None,    type="string", help="input sequence file [format: file in FASTA format] [default: None]")\n-\tparser.add_option("-o", "--output",       dest="outputFileName", action="store",                       type="string", help="output file [compulsory] [format: output file in PNG format]")\n-\tparser.add_option("-w", "--width",        dest="width",          action="store",      default=1500,    type="int",    help="width of the plots (in px) [format: int] [default: 1500]")\n-\tparser.add_option("-e", "--height",       dest="height",         action="store",      default=1000,    type="int",    help="height of the plots (in px) [format: int] [default: 1000]")\n-\tparser.add_option("-t", "--title",        dest="title",          action="store",      default="",      type="string", help="title of the plots [format: string]")\n-\tparser.add_option("-x", "--xlab",         dest="xLabel",         action="store",      default="",      type="string", help="label on the x-axis [format: string]")\n-\tparser.add_option("-y", "--ylab",         dest="yLabel",         action="store",      default="",      type="string", help="label on the y-axis [format: string]")\n-\tparser.add_option("-p", "--plusColor",    dest="plusColor",      action="store",      default="red",   type="string", help="color for the elements on the plus strand [format: string] [default: red]")\n-\tparser.add_option("-m", "--minusColor",   dest="minusColor",     action="store",      default="blue",  type="string", help="color for the elements on the minus strand [format: string] [default: blue]")\n-\tparser.add_option("-s", "--sumColor",     dest="sumColor",       action="store",      default="black", type="string", help="color for 2 strands coverage line [format: string] [default: black]")\n-\tparser.add_option("-l", "--lineColor",    dest="lineColor",      action="store",      default="black", type="string", help="color for the lines [format: string] [default: black]")\n-\tparser.add_option("-1", "--merge",        dest="merge",          action="store_true", default=False,                  help="merge the 2 plots in 1 [format: boolean] [default: false]")\n-\tparser.add_option("-D", "--directory",    dest="working_Dir",    action="store",      default=os.getcwd(), type="string", help="the directory to store the results [format: directory]")\n-\tparser.add_option("-v", "--verbosity",    dest="verbosity",      action="store",      default=1,       type="int",    help="trace level [format: int]")\n-\t(options, args) = parser.parse_args()\n-\n-\tcolors[1]  = options.plusColor\n-\tcolors[-1] = options.minusColor\n-\tcolors[0]  = options.sumColor\n-\tcolorLine  = options.lineColor\n-\n-\tpp = PlotParser(options.verbosity)\n-\tpp.addInput(0, options.inputFileName1, options.inputFormat1)\n-\tpp.addInput(1, options.inputFileName2, options.inputFormat2)\n-\tpp.addSequence(options.inputSequence)\n-\tpp.setOutput(options.outputFileName if os.path.isabs(options.outputFileName) else os.path.join(options.working_Dir, options.outputFileName))\n-\tpp.setPlotSize(options.width, options.height)\n-\tpp.setLabels(options.xLabel, options.yLabel)\n-\tpp.setTitle(options.title)\n-\tpp.setMerge(options.merge)\n-\tpp.start()\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/plotGenomeCoverage.py
--- a/SMART/Java/Python/plotGenomeCoverage.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,132 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import FastaParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.RPlotter import RPlotter
-from SMART.Java.Python.misc.Utils import *
-
-
-class GetGenomeCoverage(object):
-
-    def __init__(self, verbosity = 1):
-        self.verbosity       = verbosity
-        self.inputContainer  = None
-        self.referenceParser = None
-        self.outputFileName  = None
-        self.genomeSize      = None
-        self.coverage        = {}
-        self.distribution    = {}
-
-
-    def setInputFile(self, fileName, format):
-        self.inputContainer = TranscriptContainer(fileName, format, self.verbosity)
-
-
-    def setOutputFile(self, fileName):
-        self.outputFileName = fileName
-
-
-    def setReference(self, fileName):
-        self.referenceParser = FastaParser(fileName, self.verbosity)
-
-
-    def getReferenceSizes(self):
-        self.genomeSize = 0
-        for chromosome in self.referenceParser.getRegions():
-            self.genomeSize += self.referenceParser.getSizeOfRegion(chromosome)
-    
-
-    def getCoverage(self):
-        progress = Progress(self.inputContainer.getNbTranscripts(), "Reading reads", self.verbosity)
-        for transcript in self.inputContainer.getIterator():
-            chromosome = transcript.getChromosome()
-            if chromosome not in self.coverage:
-                self.coverage[chromosome] = {}
-            for exon in transcript.getExons():
-                for pos in range(exon.getStart(), exon.getEnd() + 1):
-                    if pos not in self.coverage[chromosome]:
-                        self.coverage[chromosome][pos] = 1
-                    else:
-                        self.coverage[chromosome][pos] += 1
-            progress.inc()
-        progress.done()
-
-    
-    def getDistribution(self):
-        nbNucleotides = sum([len(self.coverage[chromosome].keys()) for chromosome in self.coverage])
-        progress      = Progress(nbNucleotides, "Building distribution", self.verbosity)
-        for chromosome in self.coverage:
-            for num in self.coverage[chromosome].values():
-                if num not in self.distribution:
-                    self.distribution[num] = 1
-                else:
-                    self.distribution[num] += 1
-                progress.inc()
-        progress.done()
-        self.distribution[0] = self.genomeSize - nbNucleotides
-                
-
-    def plotDistribution(self):
-        plotter = RPlotter(self.outputFileName, self.verbosity)
-        plotter.setFill(0)
-        plotter.addLine(self.distribution)
-        plotter.plot()
-        print "min/avg/med/max reads per base: %d/%.2f/%.1f/%d" % getMinAvgMedMax(self.distribution)
-
-
-    def run(self):
-        self.getReferenceSizes()
-        self.getCoverage()
-        self.getDistribution()
-        self.plotDistribution()
-        
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Plot Genome Coverage v1.0.1: Get the coverage of a genome. [Category: Personal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="reads file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of file [compulsory] [format: transcript file format]")
-    parser.add_option("-r", "--reference", dest="reference",      action="store",               type="string", help="sequences file [compulsory] [format: file in FASTA format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output file in PNG format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    getGenomeCoverage = GetGenomeCoverage(options.verbosity)
-    getGenomeCoverage.setInputFile(options.inputFileName, options.format)
-    getGenomeCoverage.setOutputFile(options.outputFileName)
-    getGenomeCoverage.setReference(options.reference)
-    getGenomeCoverage.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/plotRepartition.py
--- a/SMART/Java/Python/plotRepartition.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,128 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Plot the data from the data files
-"""
-import os
-from optparse import OptionParser
-from commons.core.parsing.GffParser import GffParser
-from SMART.Java.Python.misc.RPlotter import RPlotter
-from SMART.Java.Python.misc.Progress import Progress
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Plot Repartition v1.0.1: Plot the repartition of different data on a whole genome. (This tool uses 1 input file only, the different values being stored in the tags.    See documentation to know more about it.) [Category: Visualization]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                           type="string", help="input file name [compulsory] [format: file in GFF3 format]")
-    parser.add_option("-n", "--names",     dest="names",          action="store",      default=None,        type="string", help="name for the tags (separated by commas and no space) [default: None] [format: string]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                           type="string", help="output file [compulsory] [format: output file in PNG format]")
-    parser.add_option("-c", "--color",     dest="colors",         action="store",      default=None,        type="string", help="color of the lines (separated by commas and no space) [format: string]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",      default="png",       type="string", help="format of the output file [format: string] [default: png]")
-    parser.add_option("-r", "--normalize", dest="normalize",      action="store_true", default=False,                      help="normalize data (when panels are different) [format: bool] [default: false]")
-    parser.add_option("-l", "--log",       dest="log",            action="store",      default="",          type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,           type="int",    help="trace level [format: int]")
-    parser.add_option("-D", "--directory", dest="working_Dir",    action="store",      default=os.getcwd(), type="string", help="the directory to store the results [format: directory]")
-    (options, args) = parser.parse_args()
-
-    strands        = [1, -1]
-    strandToString = {1: "+", -1: "-"}
-    names          = [None] if options.names == None else options.names.split(",")
-    maxs           = {}
-    nbElements     = [0 for name in names]
-    lines          = [{} for i in range(len(names))]
-    if options.colors == None:
-        colors = [None for i in range(len(names))]
-    else:
-        colors = options.colors.split(",")
-
-    parser = GffParser(options.inputFileName, options.verbosity)
-    progress = Progress(parser.getNbTranscripts(), "Reading %s" % (options.inputFileName), options.verbosity)
-    for transcript in parser.getIterator():
-        chromosome = transcript.getChromosome()
-        direction  = transcript.getDirection()
-        start      = transcript.getStart()
-        for i, name in enumerate(names):
-            if chromosome not in lines[i]:
-                lines[i][chromosome] = dict([(strand, {}) for strand in strands])
-            if chromosome not in maxs:
-                maxs[chromosome] = transcript.getStart()
-            else:
-                maxs[chromosome] = max(maxs[chromosome], start)
-            if start not in lines[i][chromosome][direction]:
-                lines[i][chromosome][direction][start] = 0
-            thisNbElements                          = float(transcript.getTagValue(name)) if name != None and name in transcript.getTagNames() else 1
-            lines[i][chromosome][direction][start] += thisNbElements * direction
-            nbElements[i]                          += thisNbElements
-        progress.inc()
-    progress.done()
-
-    if options.normalize:
-        if options.verbosity >= 10:
-            print "Normalizing..."
-        for i, linesPerCondition in enumerate(lines):
-            for linesPerChromosome in linesPerCondition.values():
-                for line in linesPerChromosome.values():
-                    for key, value in line.iteritems():
-                        line[key] = value / float(nbElements[i]) * max(nbElements)
-    if options.verbosity >= 10:
-        print "... done."
-
-    progress = Progress(len(maxs.keys()), "Plotting", options.verbosity)
-    for chromosome in maxs:
-        plot = RPlotter("%s%s.%s" % (options.outputFileName, chromosome.capitalize(), options.format), options.verbosity)
-        plot.setLog(options.log)
-        plot.setImageSize(2000, 500)
-        plot.setFormat(options.format)
-        if maxs[chromosome] <= 1000:
-            unit    = "nt."
-            ratio = 1.0
-        elif maxs[chromosome] <= 1000000:
-            unit    = "kb"
-            ratio = 1000.0
-        else:
-            unit    = "Mb"
-            ratio = 1000000.0
-        plot.setXLabel("Position on %s (in %s)" % (chromosome.replace("_", " "), unit))
-        plot.setYLabel("# reads")
-        plot.setLegend(True)
-        for i, name in enumerate(names):
-            for strand in strands:
-                correctedLine = dict([(key / ratio, value) for key, value in lines[i][chromosome][strand].iteritems()])
-                if name != None:
-                    name = "%s (%s)" % (name.replace("_", " "), strandToString[strand])
-                plot.addLine(correctedLine, None, colors[i])
-        plot.plot()
-        progress.inc()
-    progress.done()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/plotTranscriptList.py
--- a/SMART/Java/Python/plotTranscriptList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,255 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-"""\n-Plot the data from the data files\n-"""\n-import sys\n-import math\n-from optparse import OptionParser\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from SMART.Java.Python.misc.RPlotter import RPlotter\n-\n-\n-class PlotTranscriptList(object):\n-\n-    def __init__(self, verbosity = 0):\n-        self.inputFileName    = None\n-        self.format                 = None\n-        self.x                            = None\n-        self.y                            = None\n-        self.z                            = None\n-        self.xDefault             = None\n-        self.yDefault             = None\n-        self.zDefault             = None\n-        self.xLabel                 = None\n-        self.yLabel                 = None\n-        self.shape                    = None\n-        self.bucket                 = None\n-        self.keep                     = None\n-        self.log                        = None\n-        self.verbosity            = None\n-\n-\n-    def setPlotter(self, outputFileName, keep, log, xLabel, yLabel):\n-        self.plotter = RPlotter(outputFileName, self.verbosity, keep)\n-        if self.shape != "barplot":\n-            self.plotter.setLog(log)\n-        self.plotter.setXLabel(xLabel)\n-        self.plotter.setYLabel(yLabel)\n-\n-\n-    def setShape(self, shape):\n-        if self.shape == "line":\n-            pass\n-        elif shape == "barplot":\n-            self.plotter.setBarplot(True)\n-        elif shape == "points":\n-            self.plotter.setPoints(True)\n-        elif shape == "heatPoints":\n-            self.plotter.setHeatPoints(True)\n-        else:\n-            sys.exit("Do not understand shape \'%s\'" % (shape))\n-\n-\n-    def setInput(self, inputFileName, format):\n-        self.parser = TranscriptContainer(inputFileName, format, self.verbosity)\n-\n-\n-    def getValues(self, transcript):\n-        x, y, z = None, None, None\n-        x = transcript.getTagValue(self.x)\n-        if self.y != None:\n-            y = transcript.getTagValue(self.y)\n-        if self.z != None:\n-            z = transcript.getTagValue(self.z)\n-        if x == None:\n-            if self.xDefault != None:\n-                x = self.xDefault\n-            else:\n-                sys.exit("Error! Transcript %s do not have the x-tag %s" % (transcript, self.x))\n-        if y == None and self.shape != "line" and self.shape != "barplot":\n-            if self.yDefault != None:\n-                y = s'..b'line = self.clusterInBarplot(line)\n-\n-        if self.shape == "points" or self.shape == "barplot" or self.shape == "line":\n-            self.plotter.addLine(line)\n-        elif self.shape == "heatPoints":\n-            self.plotter.addLine(line)\n-            self.plotter.addHeatLine(heatLine)\n-        else:\n-            sys.exit("Do not understand shape \'%s\'" % (self.shape))\n-\n-        self.plotter.plot()\n-\n-        if self.shape == "points" or self.shape == "heatPoints":\n-            self.getSpearmanRho()\n-\n-\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Plot v1.0.2: Plot some information from a list of transcripts. [Category: Visualization]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input",dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n-    parser.add_option("-f", "--format",dest="format", action="store",type="string", help="format of the input [compulsory] [format: transcript file format]")\n-    parser.add_option("-x", "--x",dest="x",action="store", type="string", help="tag for the x value [format: string]")\n-    parser.add_option("-y", "--y",dest="y",action="store", type="string", help="tag for the y value [format: string]")\n-    parser.add_option("-z", "--z",dest="z", action="store", default=None,type="string", help="tag for the z value [format: string]")\n-    parser.add_option("-X", "--xDefault",dest="xDefault",action="store", default=None,type="float",help="value for x when tag is not present [format: float]")\n-    parser.add_option("-Y", "--yDefault",dest="yDefault",action="store",default=None,type="float",help="value for y when tag is not present [format: float]")\n-    parser.add_option("-Z", "--zDefault",dest="zDefault", action="store",default=None,type="float",help="value for z when tag is not present [format: float]")\n-    parser.add_option("-n", "--xLabel",dest="xLabel",action="store",default="",type="string", help="label on the x-axis [format: string] [default: ]")\n-    parser.add_option("-m", "--yLabel",dest="yLabel",action="store",default="", type="string", help="label on the y-axis [format: string] [default: ]")\n-    parser.add_option("-o", "--output",dest="outputFileName",action="store",type="string", help="output file names [format: output file in PNG format]")\n-    parser.add_option("-s", "--shape",dest="shape",action="store", type="string", help="shape of the plot [format: choice (barplot, line, points, heatPoints)]")\n-    parser.add_option("-b", "--bucket",dest="bucket",action="store",default=None,type="float",help="bucket size (for the line plot) [format: int] [default: 1]")\n-    parser.add_option("-k", "--keep",dest="keep",action="store_true", default=False, help="keep temporary files [format: bool]")\n-    parser.add_option("-l", "--log",dest="log",action="store",default="",type="string", help="use log on x- or y-axis (write \'x\', \'y\' or \'xy\') [format: string] [default: ]")\n-    parser.add_option("-v", "--verbosity",dest="verbosity",action="store",default=1, type="int",help="trace level [format: int]")\n-    (options, args) = parser.parse_args()\n-\n-    plotTranscriptList = PlotTranscriptList(options.verbosity)\n-    plotTranscriptList.x                = options.x\n-    plotTranscriptList.y                = options.y\n-    plotTranscriptList.z                = options.z\n-    plotTranscriptList.xDefault = options.xDefault\n-    plotTranscriptList.yDefault = options.yDefault\n-    plotTranscriptList.zDefault = options.zDefault\n-    plotTranscriptList.shape        = options.shape\n-    plotTranscriptList.bucket     = options.bucket\n-    plotTranscriptList.log            = options.log\n-    plotTranscriptList.setPlotter(options.outputFileName, options.keep, options.log, options.xLabel, options.yLabel)\n-    plotTranscriptList.setShape(options.shape)\n-    plotTranscriptList.setInput(options.inputFileName, options.format)\n-    plotTranscriptList.run()\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/qualToFastq.py
--- a/SMART/Java/Python/qualToFastq.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,87 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from optparse import OptionParser
-from commons.core.parsing.SequenceListParser import SequenceListParser
-from SMART.Java.Python.misc.Progress import Progress
-
-"""
-Transform qual and fasta files to a single fastq file
-"""
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Qual To FastQ v1.0.2: Convert a file in FASTA/Qual format to FastQ format. [Category: Conversion]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-f", "--fasta",     dest="fastaFileName",  action="store",               type="string", help="input fasta file [compulsory] [format: file in FASTA format]")
-    parser.add_option("-q", "--qual",      dest="qualFileName",   action="store",               type="string", help="input qual file [compulsory] [format: file in TXT format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store", default=None, type="string", help="output file [compulsory] [format: output file in FASTQ format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-    
-    fastaFile = open(options.fastaFileName)
-    qualFile  = open(options.qualFileName)
-    fastqFile = open(options.outputFileName, "w")
-    
-    fastaLine = fastaFile.readline().strip()
-    qualLine  = qualFile.readline().strip()
-    header    = None
-    cpt       = 0
-    while fastaLine:
-        if not qualLine:
-            raise Exception("Qual file is shorter!")
-        if fastaLine[0] == ">":
-            header = fastaLine[1:]
-            if qualLine[0] != ">":
-                raise Exception("Discrepencies around %s!" % (header))
-            fastqFile.write("@%s\n" % (header))
-        else:
-            if qualLine[0] == ">":
-                raise Exception("Discrepencies around %s!" % (qualLine[1:]))
-            intQualities = qualLine.split()
-            if len(intQualities) != len(fastaLine):
-                raise Exception("Sizes of read and quality diverge in %s!" % (header))
-            chrQualities = [chr(min(int(quality), 93) + 33) for quality in intQualities]
-            fastqFile.write("%s\n+\n%s\n" % (fastaLine, "".join(chrQualities)))
-        fastaLine = fastaFile.readline().strip()
-        qualLine  = qualFile.readline().strip()
-        if cpt % 1000 == 0 and options.verbosity > 1:
-            sys.stdout.write("%d lines read\r" % (cpt))
-            sys.stdout.flush()
-        cpt += 1
-    if options.verbosity > 0:
-        print "%d lines read" % (cpt)
-        
-    if qualLine:
-        raise Exception("Qual file is longer!")
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/re_filter_ratio_5_NbReads_100_samples_all_norm_Window100overlap50.gff3
--- a/SMART/Java/Python/re_filter_ratio_5_NbReads_100_samples_all_norm_Window100overlap50.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,296 +0,0 @@\n-chr2\tS-MART\tsample9Unique_transcript\t11739884\t11740222\t21\t-\t.\tnbOverlaps1=11091;nbOverlaps2=2.71789;nbOverlaps3=6262.63;nbOverlaps4=5519.92;nbOverlaps5=8514.14;nbOverlaps6=6150.58;nbOverlaps7=33.3514;nbOverlaps8=1542.36;nbOverlaps9=501.044;nbOverlaps10=462.834;ID=HWUSI-EAS454_0005:4:6:8871:3706#0/1;overlapsWith=HWUSI-EAS454_0005:5:83:18620:7295#0/1--HWUSI-EAS454_0005:5:114:4513:11369#0--HWUSI-EAS454_0005:5:2:1;Name=HWUSI-EAS454_0005:4:6:8871:3706#0/1--HWUSI-EAS454_0005:3:78:19264:12888#0/1--HWUSI-EAS454_0001:4:90:;nbElements=57149.000000;nbE1/nbE2=2983.143665;filter=S1>S2,\n-chr2\tS-MART\tsample5Unique_transcript\t2731028\t2731271\t24\t-\t.\tnbOverlaps1=690;nbOverlaps2=6.11525;nbOverlaps3=491.553;nbOverlaps4=463.775;nbOverlaps5=535.969;nbOverlaps6=325.238;nbOverlaps7=7.13514;nbOverlaps8=33.0133;nbOverlaps9=84.0599;nbOverlaps10=35.1854;ID=HWUSI-EAS454_0001:7:35:2542:19410#0/1;overlapsWith=HWUSI-EAS454_0005:5:115:16901:16071#0/1--HWUSI-EAS454_0005:5:33:3771:9306#0/1--HWUSI-EAS454_0005:5:3;Name=HWUSI-EAS454_0001:7:35:2542:19410#0/1--HWUSI-EAS454_0005:1:68:6471:6188#0/1--HWUSI-EAS454_0001:4:74:;nbElements=3755.000000;nbE1/nbE2=96.974808;filter=S1>S2,\n-chr5\tS-MART\tsample8Unique_transcript\t15253504\t15253638\t19\t+\t.\tnbOverlaps1=274;nbOverlaps2=16.3073;nbOverlaps3=82.8734;nbOverlaps4=76.5867;nbOverlaps5=104.667;nbOverlaps6=46.3123;nbOverlaps7=0.324324;nbOverlaps8=22.838;nbOverlaps9=14.8226;nbOverlaps10=10.0132;ID=HWUSI-EAS454_0005:3:31:2846:9089#0;overlapsWith=HWUSI-EAS454_0005:5:97:8048:11119#0/1--HWUSI-EAS454_0005:5:97:4678:4418#0--HWUSI-EAS454_0005:5:36:17;Name=HWUSI-EAS454_0005:3:31:2846:9089#0--HWUSI-EAS454_0001:4:68:6299:4556#0/1--HWUSI-EAS454_0005:5:79:101;nbElements=913.000000;nbE1/nbE2=15.831470;filter=S1>S2,\n-chr4\tS-MART\tsample2Unique_transcript\t9401695\t9401791\t23\t+\t.\tnbOverlaps1=1;nbOverlaps2=748.099;nbOverlaps3=1.62497;nbOverlaps4=1.41827;nbOverlaps5=0.902304;nbOverlaps6=1.57883;nbOverlaps7=0;nbOverlaps8=2.03507;nbOverlaps9=0.195035;nbOverlaps10=0;ID=HWUSI-EAS454_0001:3:88:5184:17397#0/1;overlapsWith=HWUSI-EAS454_0005:4:109:7472:8125#0/1;Name=HWUSI-EAS454_0001:3:88:5184:17397#0/1--HWUSI-EAS454_0001:3:18:15316:12317#0/1--HWUSI-EAS454_0001:3:9;nbElements=1119.000000;nbE2/nbE1=374.049500;filter=S2>S1,\n-chr1\tS-MART\tsample1Unique_transcript\t11592768\t11592855\t24\t+\t.\tnbOverlaps1=377;nbOverlaps2=77.4598;nbOverlaps3=8.93733;nbOverlaps4=8.50964;nbOverlaps5=11.73;nbOverlaps6=4.21021;nbOverlaps7=0.0540541;nbOverlaps8=0.226119;nbOverlaps9=0;nbOverlaps10=0.417218;ID=HWUSI-EAS454_0005:1:59:8362:4670#0;overlapsWith=HWUSI-EAS454_0005:5:85:4327:6835#0--HWUSI-EAS454_0005:5:76:14344:4377#0;Name=HWUSI-EAS454_0005:1:59:8362:4670#0--HWUSI-EAS454_0001:3:24:2476:19461#0--HWUSI-EAS454_0001:3:85:9481;nbElements=534.000000;nbE1/nbE3=37.937756;filter=S1>S3,\n-chr5\tS-MART\tsample7Unique_transcript\t18274485\t18274638\t22\t+\t.\tnbOverlaps1=274;nbOverlaps2=50.9604;nbOverlaps3=219.371;nbOverlaps4=43.9665;nbOverlaps5=177.754;nbOverlaps6=68.4158;nbOverlaps7=0.918919;nbOverlaps8=11.532;nbOverlaps9=2.34041;nbOverlaps10=0.973509;ID=HWUSI-EAS454_0013_FC:1:64:3764:13895#0;overlapsWith=HWUSI-EAS454_0005:5:113:1828:9830#0/1--HWUSI-EAS454_0005:5:12:13473:6177#0/1--HWUSI-EAS454_0005:5:42;Name=HWUSI-EAS454_0013_FC:1:64:3764:13895#0--HWUSI-EAS454_0001:6:35:1740:1405#0/1--HWUSI-EAS454_0001:7:43;nbElements=1063.000000;nbE1/nbE2=5.273247;filter=S1>S2,\n-chr5\tS-MART\tsample6Unique_transcript\t16212511\t16212648\t24\t+\t.\tnbOverlaps1=56;nbOverlaps2=59.1141;nbOverlaps3=344.493;nbOverlaps4=25.5289;nbOverlaps5=51.4313;nbOverlaps6=165.777;nbOverlaps7=1.2973;nbOverlaps8=9.04474;nbOverlaps9=0.390069;nbOverlaps10=0.139073;ID=HWUSI-EAS454_0004:5:73:8987:9418#0/1;overlapsWith=HWUSI-EAS454_0005:5:111:18220:9874#0/1;Name=HWUSI-EAS454_0004:5:73:8987:9418#0/1--HWUSI-EAS454_0004:5:73:2669:12596#0/1--HWUSI-EAS454_0004:5:60:;nbElements=1024.000000;nbE3/nbE1=6.043737;filter=S3>S1\n-chr4\tS-MART\tsample9Unique_transcript\t10266529\t10266697\t23\t-\t.\tnbOverlaps1=469;nbOverlaps2=679.472;nbOverlaps3=38.1'..b'WUSI-EAS454_0005:3:69:12301:18998#0/1--HWUSI-EAS454_0004:5:51;nbElements=1166.000000;nbE1/nbE2=5.771658;filter=S1>S2,\n-chr1\tS-MART\tsample3Unique_transcript\t26111387\t26111467\t23\t-\t.\tnbOverlaps1=534;nbOverlaps2=36.012;nbOverlaps3=140.56;nbOverlaps4=87.9329;nbOverlaps5=37.8968;nbOverlaps6=169.461;nbOverlaps7=0.324324;nbOverlaps8=9.94922;nbOverlaps9=1.75531;nbOverlaps10=0.417218;ID=HWUSI-EAS454_0001:4:27:10831:18663#0/1;overlapsWith=HWUSI-EAS454_0005:5:110:16902:16613#0/1--HWUSI-EAS454_0005:5:80:7345:3357#0/1--HWUSI-EAS454_0005:5:3;Name=HWUSI-EAS454_0001:4:27:10831:18663#0/1--HWUSI-EAS454_0005:1:12:16643:14153#0--HWUSI-EAS454_0005:3:10;nbElements=1248.000000;nbE1/nbE2=14.427753;filter=S1>S2,\n-chr1\tS-MART\tsample2Unique_transcript\t3855600\t3855715\t23\t+\t.\tnbOverlaps1=166;nbOverlaps2=474.951;nbOverlaps3=6.49988;nbOverlaps4=7.09136;nbOverlaps5=5.41382;nbOverlaps6=22.6299;nbOverlaps7=0;nbOverlaps8=37.3096;nbOverlaps9=0.195035;nbOverlaps10=0.139073;ID=HWUSI-EAS454_0001:3:31:13200:14532#0;overlapsWith=HWUSI-EAS454_0005:5:71:9926:19481#0;Name=HWUSI-EAS454_0001:3:31:13200:14532#0--HWUSI-EAS454_0001:3:13:5219:7425#0--HWUSI-EAS454_0001:6:68:106;nbElements=1094.000000;nbE1/nbE3=22.133687;filter=S1>S3,\n-chr1\tS-MART\tsample4Unique_transcript\t21609487\t21609537\t23\t-\t.\tnbOverlaps1=122;nbOverlaps2=20.3842;nbOverlaps3=23.562;nbOverlaps4=56.7309;nbOverlaps5=17.1438;nbOverlaps6=15.262;nbOverlaps7=0.216216;nbOverlaps8=3.39178;nbOverlaps9=3.51062;nbOverlaps10=0.834436;ID=HWUSI-EAS454_0001:7:58:122:1727#0;overlapsWith=HWUSI-EAS454_0005:5:31:18659:14130#0--HWUSI-EAS454_0005:5:75:15252:4379#0--HWUSI-EAS454_0005:5:74:14;Name=HWUSI-EAS454_0001:7:58:122:1727#0--HWUSI-EAS454_0001:4:14:10300:13699#0--HWUSI-EAS454_0001:3:88:7480;nbElements=312.000000;nbE1/nbE2=5.705147;filter=S1>S2,\n-chr5\tS-MART\tsample2Unique_transcript\t7185205\t7185373\t24\t+\t.\tnbOverlaps1=39;nbOverlaps2=377.107;nbOverlaps3=207.996;nbOverlaps4=17.0193;nbOverlaps5=55.0405;nbOverlaps6=40.5232;nbOverlaps7=0.27027;nbOverlaps8=0.678356;nbOverlaps9=0.195035;nbOverlaps10=0.278145;ID=HWUSI-EAS454_0001:3:55:14092:13307#0;overlapsWith=HWUSI-EAS454_0005:5:79:13096:9168#0/1--HWUSI-EAS454_0005:5:37:11901:7436#0/1;Name=HWUSI-EAS454_0001:3:55:14092:13307#0--HWUSI-EAS454_0001:3:54:19540:17685#0/1--HWUSI-EAS454_0001:3:27;nbElements=990.000000;nbE2/nbE1=9.427675;nbE3/nbE1=5.199900;filter=S2>S1,S3>S1\n-chr4\tS-MART\tsample10Uniqu_transcript\t6551532\t6551826\t20\t+\t.\tnbOverlaps1=1109;nbOverlaps2=876.519;nbOverlaps3=104.81;nbOverlaps4=55.3126;nbOverlaps5=65.8682;nbOverlaps6=96.3084;nbOverlaps7=30.1622;nbOverlaps8=213.456;nbOverlaps9=391.629;nbOverlaps10=75.7946;ID=HWUSI-EAS454_0005:5:119:2219:3398#0/1;overlapsWith=HWUSI-EAS454_0005:5:109:4734:5243#0/1--HWUSI-EAS454_0005:5:43:4352:9936#0/1--HWUSI-EAS454_0005:5:64:;Name=HWUSI-EAS454_0005:5:119:2219:3398#0/1--HWUSI-EAS454_0005:3:85:14318:4211#0--HWUSI-EAS454_0005:5:105:;nbElements=6739.000000;nbE1/nbE3=10.481051;filter=S1>S3,\n-chr1\tS-MART\tsample2Unique_transcript\t25427657\t25427769\t24\t+\t.\tnbOverlaps1=31;nbOverlaps2=216.752;nbOverlaps3=1.62497;nbOverlaps4=0;nbOverlaps5=0;nbOverlaps6=0;nbOverlaps7=0;nbOverlaps8=0;nbOverlaps9=0.195035;nbOverlaps10=0;ID=HWUSI-EAS454_0001:3:64:17647:16938#0/1;overlapsWith=HWUSI-EAS454_0005:4:59:16218:16037#0/1;Name=HWUSI-EAS454_0001:3:64:17647:16938#0/1--HWUSI-EAS454_0001:3:91:1451:16969#0/1--HWUSI-EAS454_0001:3:8;nbElements=352.000000;nbE2/nbE1=6.773500;filter=S2>S1,\n-chr1\tS-MART\tsample3Unique_transcript\t79000\t79112\t21\t+\t.\tnbOverlaps1=436;nbOverlaps2=82.2161;nbOverlaps3=63.3738;nbOverlaps4=83.6781;nbOverlaps5=46.0175;nbOverlaps6=52.1013;nbOverlaps7=54.3784;nbOverlaps8=64.2177;nbOverlaps9=42.7126;nbOverlaps10=37.1324;ID=HWUSI-EAS454_0001:4:78:7399:13906#0;overlapsWith=HWUSI-EAS454_0005:5:114:13819:16401#0--HWUSI-EAS454_0005:5:21:18580:10056#0--HWUSI-EAS454_0005:5:79:;Name=HWUSI-EAS454_0001:4:78:7399:13906#0--HWUSI-EAS454_0001:3:82:16783:2949#0--HWUSI-EAS454_0001:7:56:690;nbElements=2620.000000;nbE1/nbE2=5.239371;nbE1/nbE3=6.772942;filter=S1>S2,S1>S3,\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/removeAllTmpTables.py
--- a/SMART/Java/Python/removeAllTmpTables.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,64 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Remove all tmp tables in the MySQL database"""
-
-import os
-import glob
-from optparse import OptionParser
-from SMART.Java.Python.mySql.MySqlConnection import *
-
-
-if __name__ == "__main__":
-    
-    description = "Remove Tables v1.0.2: Remove tables in the local MySQL database. [Category: Other]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-t", "--tmp",     dest="tmp",     action="store_true",    default=False, help="Remove temporary tables only [format: bool] [default: false]")
-    parser.add_option("-f", "--files", dest="files", action="store_false", default=True,    help="Do not remove temporary files [format: bool] [default: true]")
-    (options, args) = parser.parse_args()
-    
-    print "Removing temporary databases:"
-    if options.files:
-        for tmpFile in glob.glob("smartdb*"):
-            print "    removing %s" % (tmpFile)
-            os.unlink(tmpFile)
-    print "Removing temporary files:"
-    if options.files:
-        for tmpFile in glob.glob("tmp*.dat"):
-            print "    removing %s" % (tmpFile)
-            os.unlink(tmpFile)
-        for tmpFile in glob.glob("tmp*.R"):
-            print "    removing %s" % (tmpFile)
-            os.unlink(tmpFile)
-        for tmpFile in glob.glob("tmp*.Rout"):
-            print "    removing %s" % (tmpFile)
-            os.unlink(tmpFile)
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/removeEmptySequences.py
--- a/SMART/Java/Python/removeEmptySequences.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,135 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Remove empty sequences from a FASTA or FASTQ file
-"""
-
-import os, random
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import *
-from commons.core.parsing.FastqParser import *
-from commons.core.writer.FastaWriter import *
-from commons.core.writer.FastqWriter import *
-from SMART.Java.Python.misc.Progress import *
-
-
-class EmptySequenceRemover(object):
-
-    def __init__(self, verbosity = 1):
-        self.verbosity            = verbosity
-        self.inputFileName    = None
-        self.parser                 = None
-        self.format                 = None
-        self.writer                 = None
-        self.forbiddenNames = {}
-        self.removedNames     = {}
-
-
-    def setInputFileName(self, fileName, format):
-        self.inputFileName = fileName
-        self.format                = format
-        if options.format == "fasta":
-            self.parser = FastaParser(self.inputFileName, self.verbosity)
-        elif options.format == "fastq":
-            self.parser = FastqParser(self.inputFileName, self.verbosity)
-        else:
-            sys.exit("Do not understand '%s' file format." % (self.format))
-
-
-    def setOutputFileName(self, fileName):
-        if options.format == "fasta":
-            self.writer = FastaWriter("%s.mfa" % (fileName), self.verbosity)
-        elif options.format == "fastq":
-            self.writer = FastqWriter("%s.mfq" % (fileName), self.verbosity)
-
-
-    def parse(self):
-        progress = Progress(self.parser.getNbSequences(), "Reading sequences in %s" % (options.inputFileName), options.verbosity)
-        for sequence in self.parser.getIterator():
-            name = sequence.name.split("/")[0]
-            if name not in self.forbiddenNames:
-                if sequence.sequence == "":
-                    self.removedNames[name] = 1
-                else:
-                    self.writer.addSequence(sequence)
-            progress.inc()
-        progress.done()
-        self.writer.write()
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Remove Empty Sequences v1.0.2: Remove all the empty sequences in a list. [Category: Personal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",         dest="inputFileName",     action="store",                                         type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
-    parser.add_option("-f", "--format",        dest="format",                    action="store",                                         type="string", help="format of the input file [compulsory] [format: sequence file format]")
-    parser.add_option("-j", "--input2",        dest="inputFileName2",    action="store",                                         type="string", help="input file 2 (in case of pair end reads) [format: file in sequence format given by -f] [default: None]")
-    parser.add_option("-o", "--output",        dest="outputFileName",    action="store",            default=None,    type="string", help="output file [compulsory] [format: output file in format given by -f]")
-    parser.add_option("-p", "--output2",     dest="outputFileName2", action="store",            default=None,    type="string", help="output file 2 (in case of pair end reads) [format: output file in sequence format given by -f] [default: None]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",             action="store",            default=1,         type="int",        help="trace level [format: int] [default: 1]")
-    parser.add_option("-l", "--log",             dest="log",                         action="store_true", default=False,                                help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    if options.log:
-        logHandle = open("%s.log" % options.outputFileName, "w")
-    
-    remover = EmptySequenceRemover(options.verbosity)
-    remover.setInputFileName(options.inputFileName, options.format)
-    remover.setOutputFileName(options.outputFileName)
-    remover.parse()
-    removedNames = remover.removedNames
-    if options.log:
-        for name in removedNames:
-            logHandle.write("Removed '%s' in %s\n" % (name, options.inputFileName))
-    nbSequences = remover.parser.getNbSequences()
-
-    newRemovedNames = {}
-    if options.inputFileName2 != None:
-        remover = EmptySequenceRemover(options.verbosity)
-        remover.setInputFileName(options.inputFileName2, options.format)
-        remover.setOutputFileName(options.outputFileName2)
-        remover.forbiddenNames = removedNames
-        remover.parse()
-        newRemovedNames = remover.removedNames
-        if options.log:
-            for name in newRemovedNames:
-                logHandle.write("Removed '%s' in %s\n" % (name, options.inputFileName2))
-
-        remover = EmptySequenceRemover(options.verbosity)
-        remover.setInputFileName(options.inputFileName, options.format)
-        remover.setOutputFileName(options.outputFileName)
-        remover.forbiddenNames = newRemovedNames
-        remover.parse()
-
-    nbRemoved = len(removedNames.keys()) + len(newRemovedNames.keys())
-    print "%d over %d sequences are empty (%.2f%%)." % (nbRemoved, nbSequences, float(nbRemoved) / nbSequences * 100)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/removeExonLines.sh
--- a/SMART/Java/Python/removeExonLines.sh Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-#!/bin/bash
-sed '/exon/d' $1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/repetGffConverter.py
--- a/SMART/Java/Python/repetGffConverter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,71 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Convert a GFF with REPET format to BED format"""
-
-import os
-from optparse import OptionParser
-from commons.core.parsing.GffParser import *
-from commons.core.writer.BedWriter import *
-from SMART.Java.Python.misc.Progress import *
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Repet GFF Convert v1.0.1: Convert REPET-flavored GFF to normal GFF. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",                        dest="inputFileName",    action="store",                                         type="string", help="input file [compulsory] [format: file in GFF3 format]")
-    parser.add_option("-o", "--output",                     dest="outputFileName", action="store",                                         type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-v", "--verbosity",                dest="verbosity",            action="store",            default=1,         type="int",        help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    parser            = GffParser(options.inputFileName, options.verbosity)
-    transcripts = dict()
-    progress        = Progress(parser.getNbTranscripts(), "Analyzing file %s" % (options.inputFileName), options.verbosity)
-    for transcript in parser.getIterator():
-        if transcript.feature.endswith("range"):
-            transcripts[transcript.name] = transcript
-        elif transcript.feature.endswith("hsp"):
-            if transcript.name in transcripts:
-                transcripts[transcript.name].addExon(transcript)
-            else:
-                sys.exit("Transcript %s is not defined\n" % (transcript.name))
-        else:
-            sys.exit("Do not understand feature %s" % (transcript.feature))
-        progress.inc()
-    progress.done()
-        
-    writer = BedWriter(options.outputFileName, options.verbosity)
-    for name in transcripts:
-        writer.addTranscript(transcripts[name])
-        
-    print "%d transcripts out of %d written (%.2f%%)" % (len(transcripts.keys()), parser.getNbTranscripts(), float(len(transcripts.keys())) / parser.getNbTranscripts() * 100)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/restrictFromNucleotides.py
--- a/SMART/Java/Python/restrictFromNucleotides.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,78 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Remove all dirty sequences"""
-
-import os
-import sys
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import *
-from commons.core.writer.FastaWriter import *
-from commons.core.parsing.FastqParser import *
-from commons.core.writer.FastqWriter import *
-from SMART.Java.Python.misc.Progress import *
-from SMART.Java.Python.misc.RPlotter import *
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Restrict from nucleotide v1.0.1: Remove the sequences with ambiguous nucleotides. [Category: Personal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",         dest="inputFileName",    action="store",                                                type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
-    parser.add_option("-f", "--format",        dest="inputFileName",    action="store",            default="fasta",    type="string", help="format of the input and output files [compulsory] [format: sequence file format]")
-    parser.add_option("-o", "--output",        dest="outputFileName", action="store",                                                type="string", help="output file [compulsory] [format: output file in sequence format given by -f]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",            action="store",            default=1,                type="int",        help="trace level [format: int]")
-    parser.add_option("-l", "--log",             dest="log",                        action="store_true", default=False,                                     help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    # treat items
-    if options.format == "fasta":
-        parser     = FastaParser(options.inputFileName, options.verbosity)
-        writer     = FastaWriter(options.outputFileName, options.verbosity)
-    elif options.format == "fastq":
-        parser     = FastqParser(options.inputFileName, options.verbosity)
-        writer     = FastqWriter(options.outputFileName, options.verbosity)
-    else:
-        sys.exit("Do not understand '%s' format." % (options.format))
-    nbSequences = parser.getNbSequences()
-    print "sequences: %d" % (nbSequences)
-    
-    progress = Progress(nbSequences, "Analyzing sequences of %s" % (options.inputFileName), options.verbosity)
-    nbKept     = 0
-    for sequence in parser.getIterator():
-        if not sequence.containsAmbiguousNucleotides():
-            writer.addSequence(sequence)
-            nbKept += 1
-        progress.inc()
-    progress.done()
-
-    print "%d items, %d kept (%.2f%%)" % (nbSequences, nbKept, float(nbKept) / nbSequences * 100)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/restrictFromSize.py
--- a/SMART/Java/Python/restrictFromSize.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,94 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Get the size distribution of a Fasta / BED file"""
-
-import os
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import *
-from commons.core.parsing.FastqParser import *
-from SMART.Java.Python.structure.TranscriptContainer import *
-from commons.core.writer.TranscriptWriter import *
-from commons.core.writer.FastaWriter import *
-from commons.core.writer.FastqWriter import *
-from SMART.Java.Python.misc.Progress import *
-from SMART.Java.Python.misc.RPlotter import *
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Restrict from Size v1.0.1: Select the elements of a list of sequences or transcripts with a given size. [Category: Data Selection]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript or sequence format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",                     type="string", help="format of the input [compulsory] [format: sequence or transcript file format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in transcript or sequence format given by -f]")
-    parser.add_option("-m", "--minSize",   dest="minSize",        action="store",      default=None,  type="int",    help="minimum size [format: int]")
-    parser.add_option("-M", "--maxSize",   dest="maxSize",        action="store",      default=None,  type="int",    help="maximum size [format: int]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
-    parser.add_option("-l", "--log",       dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    if options.format == "fasta":
-        parser = FastaParser(options.inputFileName, options.verbosity)
-        writer = FastaWriter(options.outputFileName, options.verbosity)
-    elif options.format == "fastq":
-        parser = FastqParser(options.inputFileName, options.verbosity)
-        writer = FastqWriter(options.outputFileName, options.verbosity)
-    else:
-        parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
-        writer = TranscriptWriter(options.outputFileName, options.format, options.verbosity)
-
-
-    # treat items
-    nbItems  = parser.getNbItems()
-    progress = Progress(nbItems, "Analyzing sequences of %s" % (options.inputFileName), options.verbosity)
-    nbKept   = 0
-    nbRead   = 0
-    nbClKept = 0
-    nbClRead = 0
-    for item in parser.getIterator():
-        size      = item.getSize()
-        nb        = 1 if options.format in ("fasta", "fastq") or "nbElements" not in item.getTagNames() else float(item.getTagValue("nbElements"))
-        nbRead   += nb
-        nbClRead += 1
-        if (options.minSize == None or options.minSize <= size) and (options.maxSize == None or options.maxSize >= size):
-            writer.addElement(item)
-            nbKept   += nb
-            nbClKept += 1
-        progress.inc()
-    progress.done()
-    
-    writer.write()
-
-    print "%d items,    %d kept (%.2f%%)" % (nbRead, nbKept, 0 if nbItems == 0 else float(nbKept) / nbItems * 100)
-    if nbKept != nbClKept or nbRead != nbClRead:
-        print "%d clusters, %d kept (%.2f%%)" % (nbClRead, nbClKept, 0 if nbClRead == 0 else float(nbClKept) / nbClRead * 100)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/restrictSequenceList.py
--- a/SMART/Java/Python/restrictSequenceList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,113 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Restrict a sequence list with some names"""
-
-from optparse import OptionParser
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.writer.WriterChooser import WriterChooser
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc import Utils
-
-class RestrictSequenceList(object):
-
-    def __init__(self, verbosity):
-        self.verbosity = verbosity
-        self.exclude   = False
-
-    def setInputFileName(self, fileName, format):
-        chooser = ParserChooser(self.verbosity)
-        chooser.findFormat(format)
-        self.parser = chooser.getParser(fileName)
-
-    def setExclusion(self, boolean):
-        self.exclude = boolean
-
-    def setOutputFileName(self, fileName, format):
-        chooser = WriterChooser(self.verbosity)
-        chooser.findFormat(format)
-        self.writer = chooser.getWriter(fileName)
-
-    def setNamesFileName(self, fileName):
-        self.namesFileName = fileName
-
-    def _readNames(self):
-        self.names = []
-        handle = open(self.namesFileName)
-        for name in handle:
-            self.names.append(name.strip())
-        handle.close()
-
-    def _write(self):
-        nbElements = self.parser.getNbItems()
-        progress   = Progress(nbElements, "Parsing input file", self.verbosity)
-        nbRead     = 0
-        nbWritten  = 0
-        for element in self.parser.getIterator():
-            name    = element.getName()
-            nbRead += 1
-            if Utils.xor(name in self.names, self.exclude):
-                self.writer.addElement(element)
-                nbWritten += 1
-            if name in self.names:
-                self.names.remove(name)
-            progress.inc()
-        progress.done()
-        if self.verbosity > 0:
-            print "%d read" % (nbRead)
-            print "%d written (%d%%)" % (nbWritten, 0 if nbRead == 0 else round(float(nbWritten) / nbRead * 100))
-    
-    def run(self):
-        self._readNames()
-        self._write()
-        if self.names:
-            print "Some names are not present in the file: %s" % ", ".join(self.names)
-        
-
-
-if __name__ == "__main__":
-    
-    description = "Restrict Sequence List v1.0.1: Keep the elements of a list of sequences whose name is mentionned in a given file. [Category: Data Selection]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFile",  action="store",                       type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",     action="store",      default="fasta", type="string", help="format of the input and output files [compulsory] [format: sequence file format] [default: fasta]")
-    parser.add_option("-n", "--name",      dest="names",      action="store",                       type="string", help="names of the transcripts [compulsory] [format: file in TXT format]")
-    parser.add_option("-o", "--output",    dest="outputFile", action="store",                       type="string", help="output file [format: output file in sequence format given by -f]")
-    parser.add_option("-x", "--exclude",   dest="exclude",    action="store_true", default=False,                  help="output all those whose name is NOT on the list [format: boolean]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",  action="store",      default=1,       type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    rsl = RestrictSequenceList(options.verbosity)
-    rsl.setInputFileName(options.inputFile, options.format)
-    rsl.setOutputFileName(options.outputFile, options.format)
-    rsl.setNamesFileName(options.names)
-    rsl.setExclusion(options.exclude)
-    rsl.run()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/restrictTranscriptList.py
--- a/SMART/Java/Python/restrictTranscriptList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,85 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Restrict a transcript list with some parameters (regions)"""
-
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.misc.Progress import Progress
-
-STRAND2DIRECTION = {"+": 1, "-": -1, None: None}
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Restrict Transcript List v1.0.2: Keep the coordinates which are located in a given position. [Category: Data Selection]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",      dest="inputFileName",  action="store",                             type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",     dest="format",         action="store",                             type="string", help="format [compulsory] [format: transcript file format]")
-    parser.add_option("-c", "--chromosome", dest="chromosome",     action="store",            default=None,    type="string", help="chromosome [format: string]")
-    parser.add_option("-s", "--start",      dest="start",          action="store",            default=None,    type="int",    help="start [format: int]")
-    parser.add_option("-e", "--end",        dest="end",            action="store",            default=None,    type="int",    help="end [format: int]")
-    parser.add_option("-t", "--strand",     dest="strand",         action="store",            default=None,    type="string", help="strand (+ or -) [format: string]")
-    parser.add_option("-o", "--output",     dest="outputFileName", action="store",                             type="string", help="output file [format: output file in GFF3 format]")
-    parser.add_option("-v", "--verbosity",  dest="verbosity",      action="store",            default=1,       type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
-    writer = TranscriptWriter(options.outputFileName, options.format, options.verbosity)
-
-    direction = STRAND2DIRECTION[options.strand]
-        
-    nbTranscripts = parser.getNbTranscripts()
-    progress      = Progress(nbTranscripts, "Parsing file %s" % (options.inputFileName), options.verbosity)
-    
-    nbTotal = 0
-    nbKept    = 0
-    for transcript in parser.getIterator():
-        progress.inc()
-        nbTotal += 1
-        if options.chromosome != None and options.chromosome != transcript.getChromosome():
-            continue
-        if options.start != None and options.start > transcript.getEnd():
-            continue
-        if options.end != None and options.end < transcript.getStart():
-            continue
-        if options.end != None and options.end < transcript.getStart():
-            continue
-        if direction != None and direction != transcript.getDirection():
-            continue
-        nbKept += 1
-        writer.addTranscript(transcript)
-    progress.done()
-    
-    writer.write()
-        
-    print "%d out of %d are kept (%f%%)" % (nbKept, nbTotal, (float(nbKept) / nbTotal * 100))        
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/runRandomJobs.py
--- a/SMART/Java/Python/runRandomJobs.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,46 +0,0 @@
-import unittest
-import os
-import time
-from optparse import OptionParser
-from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import MockFindOverlaps_randomExample
-from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
-
-if __name__ == '__main__':
-    description = "runRandomJobs: create random ref/query files (with size given), and run the jobs on cluster with help of runJobs.sh"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--inputRef", dest="inputRefGff3FileName", action="store", type="string", help="Reference input file [compulsory] [format: file in gff3 format]")
-    parser.add_option("-j", "--inputQuery", dest="inputQueryGff3FileName", action="store", type="string", help="Query input file [compulsory] [format: file in gff3 format]")
-    parser.add_option("-m", "--inputRefSize", dest="numberOfRefReads", action="store", type="int", help="The number of Reference")
-    parser.add_option("-n", "--inputQuerySize", dest="numberOfQReads", action="store", type="int", help="The number of Query")
-    parser.add_option("-o", "--output", dest="outputGff3FileName", action="store", type="string", help="output file [compulsory] [format: output file in gff3 format]")
-    (options, args) = parser.parse_args()
-    
-    outputDataName = 'timeResult.dat' 
-    fTime = open(outputDataName, 'w')  
-    fTime.write('NbRef\tNbQuery\tNbOverlap\ttime\n')   
-    chromSize = 100000
-    print 'ref size = %d,  query size = %d' %(options.numberOfRefReads, options.numberOfQReads)
-    iMFOR_ref = MockFindOverlaps_randomExample(options.inputRefGff3FileName, 'ref', options.numberOfRefReads, chromSize)
-    iMFOR_ref.write()
-    cmd_ref = 'sort -f -n -k4 -k5.4rn -o %s %s' % (options.inputRefGff3FileName, options.inputRefGff3FileName)
-    os.system(cmd_ref)
-    iMFOR_query = MockFindOverlaps_randomExample(options.inputQueryGff3FileName,'q', options.numberOfQReads, chromSize)
-    iMFOR_query.write()
-    cmd_query = 'sort -f -n -k4 -k5.4rn -o %s %s' % (options.inputQueryGff3FileName, options.inputQueryGff3FileName)
-    os.system(cmd_query)
-    iFOO = FindOverlaps_optim(options.inputRefGff3FileName, options.inputQueryGff3FileName)
-    iFOO.setOutputGff3FileName(options.outputGff3FileName)
-    
-    startTime_optim = time.time()
-    iFOO.run()
-    iFOO.close()  
-    nbOverlap = iFOO.getNbOverlap() 
-    endTime_optim = time.time()    
-    cmd = 'sort -f -n -k4 -k5.4rn -k9.5 -t ";" -o %s %s' % (options.outputGff3FileName, options.outputGff3FileName)
-    os.system(cmd)
-    totalTime_optim = endTime_optim - startTime_optim
-    print 'we take %s second.' % (totalTime_optim)
-    fTime.write('%d\t%d\t%d\t%.2f\n'%(options.numberOfRefReads, options.numberOfQReads, nbOverlap, totalTime_optim))
-    iFOO.deletIntermediateFiles()
-    fTime.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/selectByNbOccurrences.py
--- a/SMART/Java/Python/selectByNbOccurrences.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,89 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Select the transcript that have not more that a given number of occurrences"""
-
-import os
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import *
-from commons.core.writer.Gff3Writer import *
-from SMART.Java.Python.misc.Progress import *
-from SMART.Java.Python.misc.RPlotter import *
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Select by # of Occurrences v1.0.1: Keep the reads which have mapped less than a given number of times. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",             dest="inputFileName",    action="store",                                                type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",            dest="format",                 action="store",                                                type="string", help="format of the input [compulsory] [format: transcript file format]")
-    parser.add_option("-n", "--occurrences", dest="occurrences",        action="store",            default=1,                type="int",        help="maximum number of occurrences allowed [format: int] [default: 1]")     
-    parser.add_option("-o", "--output",            dest="outputFileName", action="store",                                                type="string", help="output file [format: output file in GFF3 format]")
-    parser.add_option("-y", "--mysql",             dest="mysql",                    action="store_true", default=False,                                     help="mySQL output [format: bool] [default: false]")
-    parser.add_option("-v", "--verbosity",     dest="verbosity",            action="store",            default=1,                type="int",        help="trace level [format: int] [default: 1]")
-    parser.add_option("-l", "--log",                 dest="log",                        action="store_true", default=False,                                     help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    parser = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
-
-    # get occurrences of the transcripts
-    names        = dict()
-    progress = Progress(parser.getNbTranscripts(), "Reading names of %s" % (options.inputFileName), options.verbosity)
-    for transcript in parser.getIterator():
-        name = transcript.name
-        if name not in names:
-            names[name] = 1
-        else:
-            names[name] += 1
-        progress.inc()
-    progress.done()
-
-    # write output file
-    nbWritten = 0
-    writer        = Gff3Writer(options.outputFileName, options.verbosity)
-    if options.mysql:
-        mysqlWriter = MySqlTranscriptWriter(options.outputFileName, options.verbosity)
-    progress    = Progress(parser.getNbTranscripts(), "Writing transcripts", options.verbosity)
-    for transcript in parser.getIterator():
-        name = transcript.name
-        if names[name] <= options.occurrences:
-            nbWritten += 1
-            writer.addTranscript(transcript)
-            if options.mysql:
-                mysqlWriter.addTranscript(transcript)
-        progress.inc()
-    progress.done()
-                                                     
-    if options.mysql:
-        mysqlWriter.write()
-    print "%d input" % (parser.getNbTranscripts())
-    print "%d output (%.2f%%)" % (nbWritten, float(nbWritten) / parser.getNbTranscripts() * 100)
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/sequenceListSplitter.py
--- a/SMART/Java/Python/sequenceListSplitter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,73 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Split a FASTA file into several shorter ones"""
-
-from optparse import OptionParser
-from commons.core.parsing.SequenceListParser import *
-from commons.core.writer.FastaWriter import *
-from SMART.Java.Python.misc.Progress import *
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Sequence List Splitter v1.0.1: Split a list of big sequences into small chunks. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",   action="store",                         type="string", help="input file [compulsory] [format: file in FASTA format]")
-    parser.add_option("-o", "--output",    dest="outputFileNames", action="store",                         type="string", help="output files [compulsory] [format: output file in FASTA format]")
-    parser.add_option("-n", "--number",    dest="number",          action="store",      default=10,        type="int",    help="number of splits [compulsory] [format: int] [default: 10]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",       action="store",      default=1,         type="int",    help="trace level [format: int]")
-    parser.add_option("-l", "--log",       dest="log",             action="store_true", default=False,                    help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    if options.log:
-        logHandle = open(options.outputFileNames + ".log", "w")
-
-    # split file
-    sequenceListParser = SequenceListParser(options.inputFileName, options.verbosity)
-    nbSequences                = sequenceListParser.getNbSequences()
-    nbSequencesByFile    = math.ceil(nbSequences / options.number)
-
-    # write into files
-    currentFileNumber = 1
-    writer            = FastaWriter("%s%i.fasta" % (options.outputFileNames, currentFileNumber), options.verbosity)
-    nbSequencesHere   = 0
-    progress          = Progress(nbSequences, "Writing files", options.verbosity)
-    for sequence in sequenceListParser.getIterator():
-        writer.addSequence(sequence)
-        nbSequencesHere += 1
-        if nbSequencesHere == nbSequencesByFile:
-            currentFileNumber += 1
-            writer             = FastaWriter("%s%i.fasta" % (options.outputFileNames, currentFileNumber), options.verbosity)
-            nbSequencesHere    = 0
-        progress.inc()
-    progress.done()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/splitByTag.py
--- a/SMART/Java/Python/splitByTag.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,68 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Read a file and split it into several, depending on a tag"""
-
-import os
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import *
-from commons.core.writer.Gff3Writer import *
-from SMART.Java.Python.misc.Progress import *
-from SMART.Java.Python.misc import Utils
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Split By Tag v1.0.1: Read a file and split it into several, depending on a tag. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",                     dest="inputFileName",         action="store",                                        type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",                    dest="format",                        action="store",                                        type="string", help="format of file 1 [compulsory] [format: transcript file format]")
-    parser.add_option("-t", "--tag",                         dest="tag",                             action="store",                                        type="string", help="tag on which the split is made [compulsory] [format: string]")
-    parser.add_option("-o", "--output",                    dest="outputFileName",        action="store",                                        type="string", help="output file [format: output file in CSV format]")
-    parser.add_option("-v", "--verbosity",             dest="verbosity",                 action="store",            default=1,        type="int",        help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
-    writers                         = dict()
-
-    progress = Progress(transcriptContainer.getNbTranscripts(), "Reading file %s" % (options.inputFileName), options.verbosity)
-    for transcript in transcriptContainer.getIterator():
-        value = transcript.getTagValue(options.tag)
-        if value == None:
-            value = "noTag"
-        value = str(value).replace(" ", "_").title()
-        if value not in writers:
-            writers[value] = Gff3Writer("%s.gff3" % (os.path.join(options.outputFileName, value)))
-        writers[value].addTranscript(transcript)
-
-        progress.inc()
-    progress.done()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/splitMultiFasta.py
--- a/SMART/Java/Python/splitMultiFasta.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,64 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Split a Multi-Fasta file to several Fasta files"""
-
-import os
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import *
-from commons.core.writer.Gff3Writer import *
-from SMART.Java.Python.misc.Progress import *
-from SMART.Java.Python.misc import Utils
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Split Multi-Fasta v1.0.1: Split a Multi-Fasta file to several Fasta files. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",                     dest="inputFileName",         action="store",                                        type="string", help="input file 1 [compulsory] [format: file in FASTA format]")
-    parser.add_option("-o", "--output",                    dest="outputFileName",        action="store",                                        type="string", help="output file [format: output file in FASTA format]")
-    (options, args) = parser.parse_args()
-
-    inputHandle    = open(options.inputFileName)
-    outputHandle = None
-    
-    for line in inputHandle:
-        line = line.strip()
-        if line[0] == ">":
-            if outputHandle != None:
-                outputHandle.close()
-            name = line[1:].split(" ")[0]
-            outputHandle = open("%s%s.fasta" % (options.outputFileName, name), "w")
-        outputHandle.write("%s\n" % (line))
-            
-    inputHandle.close()
-    outputHandle.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Bins.py
--- a/SMART/Java/Python/structure/Bins.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,77 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Some functions about bins
-"""
-
-def getMinBin():
-    return 3
-
-
-def getMaxBin():
-    return 7
-
-
-def getBin(start, end):
-    for i in range(getMinBin(), getMaxBin() + 1):
-        binLevel = 10 ** i
-        if int(start / binLevel) == int(end / binLevel):
-            return int(i * 10 ** (getMaxBin() + 1) + int(start / binLevel))
-    return int((getMaxBin() + 1) * 10 ** (getMaxBin() + 1))
-
-
-def getOverlappingBins(start, end):
-    array  = []
-    bigBin = int((getMaxBin() + 1) * 10 ** (getMaxBin() + 1))
-    for i in range(getMinBin(), getMaxBin() + 1):
-        binLevel = 10 ** i
-        array.append((int(i * 10 ** (getMaxBin() + 1) + int(start / binLevel)), int(i * 10 ** (getMaxBin() + 1) + int(end / binLevel))))
-    array.append((bigBin, bigBin))
-    return array
-
-
-def getIterator(maxValue = None):
-    if maxValue == None:
-        maxValue = 10 ** (getMaxBin() + getMinBin()) - 1
-    for i in range(getMinBin(), getMaxBin() + 1):
-        binLevel = 10 ** i
-        binBit   = i * 10 ** (getMaxBin() + 1)
-        for j in range(0, maxValue / binLevel+1):
-            yield binBit + j
-    yield int((getMaxBin() + 1) * 10 ** (getMaxBin() + 1))
-
-
-def getNbBins(maxValue = None):
-    if maxValue == None:
-        maxValue = 10 ** (getMaxBin() + getMinBin()) - 1
-    nbBins = 0
-    for i in range(getMinBin(), getMaxBin() + 1):
-        nbBins += maxValue / 10 ** i
-    return nbBins + 1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Bins.pyc
b
Binary file SMART/Java/Python/structure/Bins.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Interval.py
--- a/SMART/Java/Python/structure/Interval.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,706 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-\n-from SMART.Java.Python.structure.Bins import *\n-from commons.core.coord.Range import Range\n-\n-class Interval(Range):\n-    """\n-    Store a genomic interval\n-    @ivar name:          name of the interval [optional]\n-    @type name:          string\n-    @ivar id:            id of the interval [optional]\n-    @type id:            int\n-    @ivar bin:           bin in which the interval should be if stored in a database [computed]\n-    @type bin:           int \n-    @ival tags:          information about the transcript [optional]\n-    @type tags:          dict\n-    @ivar verbosity:     verbosity\n-    @type verbosity:     int [default: 0]\n-    """\n-\n-    def __init__(self, interval = None, verbosity = 0):\n-        """\n-        Constructor\n-        @param interval:    interval to be copied\n-        @type    interval:    class L{Interval<Interval>}\n-        @param verbosity: verbosity\n-        @type    verbosity: int\n-        """\n-        Range.__init__(self)\n-        self.name          = None\n-        self.id            = None\n-        self.bin           = None\n-        self.verbosity     = verbosity\n-        self.tags          = {}\n-        if interval != None:\n-            self.copy(interval)\n-\n-    #!!!! Warning: two methods getStart() and getEnd() give the information maximum and minimum in interval.!!!!#\n-    #In case strand = "+", start < end; strand = "-", start > end        \n-    def getStart(self):\n-        if self.start == -1:\n-            return -1\n-        if self.end == -1:\n-            return self.start\n-        return self.getMin()\n-\n-    \n-    def getEnd(self):\n-        if self.end == -1:\n-            return -1\n-        if self.start == -1:\n-            return self.end\n-        return self.getMax()\n-\n-\n-    def getChromosome(self):\n-        return self.getSeqname()\n-\n-\n-    def getDirection(self):\n-        return 1 if self.getStrand() == "+" else -1\n-\n-\n-    def getName(self):\n-        return self.name\n-\n-\n-    def isSet(self):\n-        """\n-        Check if the interval is set\n-        """\n-        return self.getStart() == None and self.getEnd() == None\n-\n-\n-    def copy(self, interval):\n-        """\n-        Copy method\n-        @param interval: interval to be copied\n-        @type    interval: class L{Interval<Interval>}\n-        """\n-        self.setStart(interval.getStart())\n-        self.setEnd(interval.getEnd())\n-        self.setChromosome(interval.getChromosome())\n-        self.setDirection(interval.getDirection()'..b'-        variables = ["name", "chromosome", "start", "end", "direction", "tags", "bin"]\n-        return variables\n-    getSqlVariables = classmethod(getSqlVariables)\n-\n-\n-    def setSqlValues(self, array):\n-        """\n-        Set the values of the properties of this object as given by a results line of a SQL query\n-        """\n-        self.id         = array[0]\n-        self.name       = array[1].strip("\'")\n-        self.setChromosome(array[2].strip("\'"))\n-        self.setStart(array[3])\n-        self.setEnd(array[4])\n-        self.setDirection(array[5])\n-        self.setTagValues(array[6].strip("\'"), ";", "=")\n-        self.bin        = array[7]\n-\n-\n-    def getSqlValues(self):\n-        """\n-        Get the values of the properties that should be saved in a database\n-        """\n-        values = dict()\n-        values["name"]       = self.name\n-        values["chromosome"] = self.getChromosome()\n-        values["start"]      = self.getStart()\n-        values["end"]        = self.getEnd()\n-        values["direction"]  = self.getDirection()\n-        values["tags"]       = self.getTagValues(";", "=")\n-        values["bin"]        = self.getBin()\n-        return values\n-\n-\n-    def getSqlTypes(cls):\n-        """\n-        Get the values of the properties that should be saved in a database\n-        """\n-        types = dict()\n-        types["name"]       = "varchar"\n-        types["chromosome"] = "varchar"\n-        types["start"]      = "int"\n-        types["end"]        = "int"\n-        types["direction"]  = "tinyint"\n-        types["tags"]       = "varchar"\n-        types["bin"]        = "int"\n-        return types\n-    getSqlTypes = classmethod(getSqlTypes)\n-    \n-\n-    def getSqlSizes(cls):\n-        """\n-        Get the sizes of the properties that should be saved in a database\n-        """\n-        sizes = dict()\n-        sizes["name"]       = 255\n-        sizes["chromosome"] = 255\n-        sizes["start"]      = 11\n-        sizes["end"]        = 11\n-        sizes["direction"]  = 4\n-        sizes["tags"]       = 1023\n-        sizes["bin"]        = 11\n-        return sizes\n-    getSqlSizes = classmethod(getSqlSizes)\n-    \n-\n-    def printCoordinates(self):\n-        """\n-        Print start and end positions (depending on the direction of the interval)\n-        """\n-        if self.getDirection() == 1:\n-            return "%d-%d" % (self.getStart(), self.getEnd())\n-        else:\n-            return "%d-%d" % (self.getEnd(), self.getStart())\n-\n-    \n-    def extractSequence(self, parser):\n-        """\n-        Get the sequence corresponding to this interval\n-        @param parser: a parser to a FASTA file\n-        @type    parser: class L{SequenceListParser<SequenceListParser>}\n-        @return        : a instance of L{Sequence<Sequence>}\n-        """\n-        return parser.getSubSequence(self.getChromosome(), self.getStart(), self.getEnd(), self.getDirection(), self.name)\n-    \n-    \n-    def extractWigData(self, parser):\n-        """\n-        Get the data retrieved from a wig file\n-        @param parser: a parser class to a WIG file\n-        @type    parser: class L{WigParser<WigParser>}\n-        """\n-        data = parser.getRange(self.getChromosome(), self.getStart(), self.getEnd())\n-        if self.getDirection() == -1:\n-            if parser.strands:\n-                newData = {}\n-                for strand in data:\n-                    data[strand].reverse()\n-                    newData[-strand] = data[strand]\n-                data = newData\n-            else:\n-                data.reverse()\n-        return data\n-\n-\n-    def __str__(self):\n-        """\n-        Output a simple representation of this interval\n-        """\n-        direction = "+"\n-        if self.getDirection() == -1:\n-            direction = "-"\n-        string = "%s:%d-%d (%s)" % (self.getChromosome(), self.getStart(), self.getEnd(), direction)\n-        if self.name != "":\n-            string = "(%s) %s" % (self.name, string)\n-        return string\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Interval.pyc
b
Binary file SMART/Java/Python/structure/Interval.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Mapping.py
--- a/SMART/Java/Python/structure/Mapping.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,255 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-from SMART.Java.Python.structure.SubMapping import SubMapping\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.Interval import Interval\n-\n-class Mapping(object):\n-    """A class that represents a mapping"""\n-\n-    def __init__(self):\n-        self.targetInterval = None\n-        self.queryInterval  = None\n-        self.subMappings    = []\n-        self.size           = None\n-        self.transcript     = None\n-        self.tags           = {}\n-\n-\n-    def copy(self, mapping):\n-        for subMapping in mapping.subMappings:\n-            newSubMapping = SubMapping(subMapping)\n-            self.addSubMapping(newSubMapping)\n-        self.targetInterval = Interval(mapping.targetInterval)\n-        self.queryInterval  = Interval(mapping.queryInterval)\n-        self.size           = mapping.size\n-        self.tags           = {}\n-        for tag in mapping.tags:\n-            self.tags[tag] = mapping[tag]\n-        self.transcript.copy(mapping.transcript)\n-\n-\n-    def setTargetInterval(self, interval):\n-        self.targetInterval = Interval(interval)\n-        if self.queryInterval != None:\n-            self.setDirection(self.targetInterval.getDirection() * self.queryInterval.getDirection())\n-\n-\n-    def setQueryInterval(self, interval):\n-        self.queryInterval = Interval(interval)\n-        if self.targetInterval != None:\n-            self.setDirection(self.targetInterval.getDirection() * self.queryInterval.getDirection())\n-\n-\n-    def getQueryInterval(self):\n-        return self.queryInterval\n-\n-\n-    def addSubMapping(self, subMapping):\n-        subMappingCopy = SubMapping(subMapping)\n-        self.subMappings.append(subMappingCopy)\n-\n-        if self.targetInterval:\n-            self.targetInterval.setStart(min(self.targetInterval.getStart(), subMapping.targetInterval.getStart()))\n-            self.targetInterval.setEnd(max(self.targetInterval.getEnd(),     subMapping.targetInterval.getEnd()))\n-        else:\n-            self.setTargetInterval(subMapping.targetInterval)\n-        if self.queryInterval:\n-            self.queryInterval.setStart(min(self.queryInterval.getStart(), subMapping.queryInterval.getStart()))\n-            self.queryInterval.setEnd(max(self.queryInterval.getEnd(),     subMapping.queryInterval.getEnd()))\n-        else:\n-            self.setQueryInterval(subMapping.queryInterval)\n-\n-        if self.getDirection() != 0:\n-            subMapping.setDirection(self.getDirection'..b'ccurrences(self, nbOccurrences):\n-        self.setTagValue("nbOccurrences", nbOccurrences)\n-\n-\n-    def setNbMismatches(self, nbMismatches):\n-        self.setTagValue("nbMismatches", nbMismatches)\n-        if self.size != None and "identity" not in self.getTagNames():\n-            identity = 100 if self.size == 0 else (self.size - self.getTagValue("nbMismatches")) / float(self.size) * 100\n-            self.setTagValue("identity", identity)\n-\n-\n-    def setNbGaps(self, nbGaps):\n-        self.setTagValue("nbGaps", nbGaps)\n-        \n-        \n-    def setRank(self, rank):\n-        self.setTagValue("rank", rank)\n-        \n-\n-    def setEvalue(self, evalue):\n-        self.setTagValue("evalue", evalue)\n-        \n-\n-    def setOccurrence(self, occurrence):\n-        self.setTagValue("occurrence", occurrence)\n-        \n-        \n-    def setBestRegion(self, bestRegion):\n-        self.setTagValue("bestRegion", bestRegion)\n-\n-\n-    def mergeExons(self, distance):\n-        previousSubMapping = None\n-        subMappings        = []\n-        for subMapping in self.subMappings:\n-            if previousSubMapping == None:\n-                subMappings.append(subMapping)\n-                previousSubMapping = subMapping\n-            else:\n-                targetDistance = subMapping.targetInterval.getDistance(previousSubMapping.targetInterval)\n-                queryDistance  = subMapping.queryInterval.getDistance(previousSubMapping.queryInterval)\n-                if targetDistance <= distance:\n-                    self.setTagValue("nbGaps", self.getTagValue("nbGaps") + queryDistance)\n-                    previousSubMapping.merge(subMapping)\n-                else:\n-                    subMappings.append(subMapping)\n-                    previousSubMapping = subMapping\n-        self.subMappings = subMappings\n-        \n-        \n-    def getTranscript(self):\n-        """\n-        Extract a transcript from this mapping\n-        @return: a transcript\n-        """\n-        if self.transcript != None:\n-            return self.transcript\n-        self.transcript = Transcript()\n-        self.transcript.copy(self.targetInterval)\n-        self.transcript.setDirection(self.getDirection())\n-        self.transcript.setName(self.queryInterval.getName())\n-        self.transcript.removeExons()\n-        if len(self.subMappings) > 1:\n-            for subMapping in self.subMappings:\n-                self.transcript.addExon(subMapping.targetInterval)\n-        cpt = 1\n-        for exon in self.transcript.exons:\n-            exon.setDirection(self.transcript.getDirection())\n-            exon.setName("%s-exon%d" % (self.transcript.getName(), cpt))\n-            exon.setChromosome(self.transcript.getChromosome())\n-            cpt += 1\n-        self.transcript.setDirection(self.getDirection())\n-        self.transcript.sortExons()\n-        for tag in self.tags:\n-            if "bestRegion" not in self.getTagNames():\n-                self.transcript.setTagValue("bestRegion", "(self)")\n-            self.transcript.setTagValue(tag, self.getTagValue(tag))\n-        return self.transcript\n-    \n-\n-    def getChromosome(self):\n-        if not self.subMappings:\n-            raise Exception("Error! Mapping \'%s\' has no submapping" % (self))\n-        return self.subMappings[0].targetInterval.getChromosome()\n-\n-\n-    \n-    def getErrorScore(self):\n-        return self.getTagValue("nbGaps") * 3 + self.getTagValue("nbMismatches") + (len(self.subMappings) - 1) * 0.1\n-            \n-\n-    def printGBrowseReference(self):\n-        return self.getTranscript().printGBrowseReference()\n-\n-\n-    def printGBrowseLine(self):\n-        return self.getTranscript().printGBrowseLine()\n-\n-\n-    def printGBrowse(self):\n-        return self.getTranscript().printGBrowse()\n-\n-\n-    def printBed(self):\n-        return self.getTranscript().printBed()\n-\n-\n-    def __str__(self):\n-        return "%s     ----     %s" % (str(self.getTranscript()), ", ". join([str(submapping) for submapping in self.subMappings]))\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Mapping.pyc
b
Binary file SMART/Java/Python/structure/Mapping.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Sequence.py
--- a/SMART/Java/Python/structure/Sequence.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,184 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-import re
-from commons.core.seq.Bioseq import Bioseq
-
-reverseComplementString = {
-    "A": "T",
-    "C": "G",
-    "G": "C",
-    "T": "A",
-    "U": "A",
-    "M": "K",
-    "R": "Y",
-    "W": "W",
-    "S": "S",
-    "Y": "R",
-    "K": "M",
-    "V": "B",
-    "H": "D",
-    "D": "H",
-    "B": "V",
-    "N": "N",
-    "a": "t",
-    "c": "g",
-    "g": "c",
-    "t": "a",
-    "u": "a",
-    "m": "k",
-    "r": "y",
-    "w": "w",
-    "s": "s",
-    "y": "r",
-    "k": "m",
-    "v": "b",
-    "h": "d",
-    "d": "h",
-    "b": "v",
-    "n": "n"
-}
-
-class Sequence(Bioseq):
-    """A class that codes for a sequence"""
-
-    def __init__(self, name = "", sequence = ""):
-        super(Sequence, self).__init__(name, sequence)
-        self.name            = self.header        
-        self.quality         = None
-        self.chunkedSequence = None
-        self.chunkedQuality  = None
-        self.integerQuality  = False
-
-    def setName(self, name=""):
-        super(Sequence, self).setHeader(name)
-            
-    def getName(self):
-        return self.getHeader()
-    
-    def setSequence(self, seq=""):
-        super(Sequence, self).setSequence(seq)
-
-    def setQuality(self, quality):
-        if quality == None:
-            self.quality = None
-            return
-        if " " in quality:
-            self.quality        = quality.split()
-            self.integerQuality = True
-        else:
-            self.quality = list(quality)
-        
-    def getQuality(self):
-        if self.quality == None:
-            return None
-        if self.integerQuality:
-            return " ".join(self.quality)
-        return "".join(self.quality)
-    
-    def getSize(self):
-        return len(self.getSequence())
-
-
-    def copy(self, sequence):
-        self.setName(sequence.getName())
-        self.setSequence(sequence.getSequence())
-        self.setQuality(sequence.getQuality())
-        self.chunkedSequence = None
-        self.chunkedQuality  = None
-
-
-    def chunkSequence(self):
-        self.chunkedSequence = []
-        for i in range (0, self.getSize() / 60 + 1):
-            self.chunkedSequence.append(self.getSequence()[i * 60 : min(self.getSize(), (i+1) * 60)])
-        if self.quality != None:
-            self.chunkedQuality = []
-            for i in range (0, self.getSize() / 60 + 1):
-                self.chunkedQuality.append(self.quality[i * 60 : min(self.getSize(), (i+1) * 60)])
-
-    def concatenate(self, seq):
-        sequence  = self.getSequence()
-        sequence += seq.getSequence()
-        self.setSequence(sequence)
-        if self.quality != None:
-            sep = " " if self.integerQuality else ""
-            self.setQuality(self.getQuality() + sep + seq.getQuality())
-        self.chunkedSequence = None
-        self.chunkedQuality  = None
-        
-
-    def printFasta(self):
-        if self.chunkedSequence == None:
-            self.chunkSequence()
-        return ">%s\n%s\n" % (self.getHeader(), "\n".join(self.chunkedSequence))
-
-
-    def printFastq(self):
-        if self.chunkedSequence == None:
-            self.chunkSequence()
-        return "@%s\n%s\n+%s\n%s\n" % (self.getHeader(), self.getSequence(), self.getHeader(), self.getQuality())
-
-
-    def reverseComplement(self):
-        seq = ""
-        self.chunkedSequence = None
-        self.chunkedQuality  = None
-        for i in range(0, self.getSize()):
-            char = self.getSequence()[i:i+1]
-            if char not in reverseComplementString:
-                sys.exit("Cannot understand character %s from string %s" % (char, self.getSequence()))
-            seq = "%s%s" % (reverseComplementString[char], seq)
-        self.setSequence(seq)
-        if self.quality != None:
-            self.quality = self.quality[::-1]
-        
-        
-    def containsAmbiguousNucleotides(self):
-        m = re.search("[^ACGTUacgtu]", self.getSequence())
-        if m != None:
-            return True
-        return False
-    
-    
-    def shrinkToFirstNucleotides(self, nbNucleotides):
-        self.chunkedSequence = None
-        self.chunkedQuality  = None
-        self.setSequence(self.getSequence()[0:nbNucleotides])
-        if self.quality != None:
-            self.quality = self.quality[0:nbNucleotides]
-    
-    
-    def shrinkToLastNucleotides(self, nbNucleotides):
-        self.chunkedSequence = None
-        self.chunkedQuality  = None
-        self.setSequence(self.getSequence()[-nbNucleotides:])
-        if self.quality != None:
-            self.quality = self.quality[-nbNucleotides:]
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Sequence.pyc
b
Binary file SMART/Java/Python/structure/Sequence.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/SequenceList.py
--- a/SMART/Java/Python/structure/SequenceList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,72 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import math
-
-class SequenceList(object):
-    """A class that codes for a list of sequences"""
-
-    def __init__(self, verbosity = 0):
-        self.sequences = []
-        self.verbosity = verbosity
-
-
-    def nbSequences(self):
-        return len(self.sequences)
-
-
-    def getSequence(self, index):
-        return self.sequences[index]
-        
-
-    def addSequence(self, sequence):
-        self.sequences.append(sequence)
-        
-
-    def split(self, number):
-        sequenceLists = []
-        size          = math.ceil(self.nbSequences() / number)
-
-        sequenceList = SequenceList()
-        for i in range(0, self.nbSequences()):
-            sequenceList.addSequence(self.getSequence(i))
-            if (sequenceList.nbSequences() == size):
-                sequenceLists.append(sequenceList)
-                sequenceList = SequenceList()
-        if (sequenceList.nbSequences() != 0):
-            sequenceLists.append(sequenceList)
-        return sequenceLists
-
-
-    def printFasta(self):
-        string = ""
-        for sequence in self.sequences:
-            string += sequence.printFasta()
-        return string
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/SequenceList.pyc
b
Binary file SMART/Java/Python/structure/SequenceList.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/SubMapping.py
--- a/SMART/Java/Python/structure/SubMapping.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,258 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-from SMART.Java.Python.structure.Interval import Interval\n-from commons.core.coord.Align import Align\n-\n-class SubMapping(Align):\n-    """\n-    A class that represents a part of a mapping, more precisely, a pair (target interval, query interval) that match together\n-    @ivar targetInterval: the target interval\n-    @type targetInterval: class L{Interval<Interval>}\n-    @ivar queryInterval:  the query interval\n-    @type queryInterval:  class L{Interval<Interval>}\n-    @ivar size:           size of this sub-mapping\n-    @type size:           int\n-    @ivar tags:           various information\n-    @type tags:           dict\n-    """\n-\n-    def __init__(self, subMapping = None):\n-        """\n-        Constructor\n-        @param subMapping: a sub-mapping to be copied\n-        @type  subMapping: class L{SubMapping<SubMapping>}\n-        """\n-        self.targetInterval = Interval()\n-        self.queryInterval  = Interval()\n-        Align.__init__(self, self.queryInterval, self.targetInterval)\n-        self.size = None\n-        self.tags = {}\n-        if subMapping != None:\n-            self.copy(subMapping)\n-    \n-    def __eq__(self, o):\n-        if o == None:\n-            return False\n-        areAlignAttributesEquals = Align.__eq__(self, o)\n-        return areAlignAttributesEquals and (self.targetInterval == o.targetInterval) and (self.queryInterval == o.queryInterval) and self.size == o.getSize() and self.tags == o.getTags()\n-    \n-    def getSuperAdress(self):\n-        return hex(id(super(Align, self)))\n-    \n-#    def setRangesAlignToRangesInterval(self):\n-#        self.range_query = super(Range, self.queryInterval)\n-#        self.range_subject = super(Range, self.targetInterval)\n-        \n-    def copy(self, subMapping):\n-        """\n-        Copy method\n-        @param subMapping: a sub-mapping to be copied\n-        @type    subMapping: class L{SubMapping<SubMapping>}\n-        """\n-        self.setQueryName(subMapping.getQueryName())\n-        self.setQueryStart(subMapping.getQueryStart())\n-        self.setQueryEnd(subMapping.getQueryEnd())\n-        self.setSubjectName(subMapping.getSubjectName())\n-        self.setSubjectStart(subMapping.getSubjectStart())\n-        self.setSubjectEnd(subMapping.getSubjectEnd())\n-        self.e_value = subMapping.getEvalue()\n-        self.score = subMapping.getScore()\n-        self.identity = subMapping.getIdentity()\n-        \n-        self.targetInterval.copy(subMapping.targetInterval)\n-        sel'..b' @type    name:    string\n-        @param value: value of the tag\n-        @type    value: string or int\n-        """\n-        self.tags[name] = value\n-\n-\n-    def getTagValue(self, name):\n-        """\n-        Get the value of a tag\n-        @param name:    name of the tag\n-        @type    name:    string\n-        @return:            value of the tag\n-        """\n-        return self.tags[name]\n-\n-    \n-    def getTagNames(self):\n-        """\n-        Get all the names of the tags\n-        @return: the names of the tags\n-        """\n-        return self.tags.keys()\n-\n-    def getTargetInterval(self):\n-        return self.targetInterval\n-    \n-    def getQueryInterval(self):\n-        return self.queryInterval\n-    \n-    def getSize(self):\n-        return self.size\n-    \n-    def getTags(self):\n-        return self.tags\n-\n-    def setIdentity(self, identity):\n-        """\n-        Set the percentage of identity of the sub-mapping\n-        Possibly also set number of mismatches\n-        @param identity: the percentage of identity of the sub-mapping\n-        @type  identity: float\n-        """\n-        self.identity = identity\n-        self.setTagValue("identity", identity)\n-        if self.size != None and "nbMismatches" not in self.getTagNames():\n-            self.setTagValue("nbMismatches", self.size - round(self.size * self.getTagValue("identity") / 100.0))\n-\n-\n-    def setNbMismatches(self, nbMismatches):\n-        """\n-        Set the number of mismatches of the sub-mapping\n-        Possibly also set percentage of identity\n-        @param nbMismatches: the number of mismatches of the sub-mapping\n-        @type    nbMismatches: int\n-        """\n-        self.nbMismatches = nbMismatches\n-        if self.size != None and "identity" not in self.getTagNames():\n-            self.setTagValue("identity", (self.size - self.getTagValue("nbMismatches")) / float(self.size) * 100)\n-\n-\n-    def setNbGaps(self, nbGaps):\n-        """\n-        Set the number of gaps of the sub-mapping\n-        @param nbGaps: the number of gaps of the sub-mapping\n-        @type    nbGaps: int\n-        """\n-        self.setTagValue("nbGaps", nbGaps)\n-        \n-        \n-    def merge(self, subMapping):\n-        """\n-        Merge two subMappings\n-        @param subMapping: another sub-mapping\n-        @type    subMapping: class L{SubMapping<SubMapping>}\n-        """\n-        self.targetInterval.merge(subMapping.targetInterval)\n-        self.queryInterval.merge(subMapping.queryInterval)\n-\n-\n-    def printCoordinates(self):\n-        """\n-        Print the coordinates of the sub-mapping (considering the direction)\n-        @return: a string\n-        """\n-        if self.getDirection() == 1:\n-            return "%d-%d" % (self.targetInterval.getStart(), self.targetInterval.getEnd())\n-        else:\n-            return "%d-%d" % (self.targetInterval.getEnd(), self.targetInterval.getStart())\n-\n-\n-    def __str__(self):\n-        """\n-        Return a representation of this object\n-        @return: a string\n-        """\n-\n-        if "match" in self.getTagNames() and not self.getTagValue("match"):\n-            return "%s ---" % self.queryName\n-\n-        direction = "+"\n-        if self.getDirection() == -1:\n-            direction = "-"\n-        string = "%s:%d-%d -- %s:%d-%d    (%s)" % (self.targetInterval.getChromosome(), self.targetInterval.getStart(), self.targetInterval.getEnd(), self.queryInterval.name, self.queryInterval.getStart(), self.queryInterval.getEnd(), direction)\n-        if "nbMismatches" in self.getTagNames():\n-            string += "(%i mm)" % (self.getTagValue("nbMismatches"))\n-        if "identity" in self.getTagNames():\n-            string += "(id: %i%%)" % (self.getTagValue("identity"))\n-        if self.targetInterval.getSize() != None and self.queryInterval.getSize() != None and self.size != None:\n-            string += "(sizes: %d, %d -> %d)" % (self.targetInterval.getSize(), self.queryInterval.getSize(), self.size)\n-        return string\n-\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/SubMapping.pyc
b
Binary file SMART/Java/Python/structure/SubMapping.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Transcript.py
--- a/SMART/Java/Python/structure/Transcript.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,876 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import sys\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.structure.Sequence import Sequence\n-\n-\n-class Transcript(Interval):\n-\t"""\n-\tA class that models an transcript, considered as a specialized interval (the bounds of the transcript) that contains exons (also represented as intervals)\n-\t@ivar exons: a list of exons (intervals)\n-\t@type exons: list of L{Interval{Interval}}\n-\t"""\n-\n-\tdef __init__(self, transcript = None, verbosity = 0):\n-\t\t"""\n-\t\tConstructor\n-\t\t@param transcript: transcript to be copied\n-\t\t@type  transcript: class L{Transcript<Transcript>}\n-\t\t@param verbosity:  verbosity\n-\t\t@type  verbosity:  int\n-\t\t"""\n-\t\tsuper(Transcript, self).__init__(None, verbosity)\n-\t\tself.exons   = []\n-\t\tself.introns = None\n-\t\tif transcript != None:\n-\t\t\tself.copy(transcript)\n-\n-\n-\tdef copy(self, transcript):\n-\t\t"""\n-\t\tCopy method\n-\t\t@param transcript: transcript to be copied\n-\t\t@type\ttranscript: class L{Transcript<Transcript>} or L{Interval<Interval>}\n-\t\t"""\n-\t\tsuper(Transcript, self).copy(transcript)\n-\t\tif transcript.__class__.__name__ == "Transcript":\n-\t\t\texons = transcript.getExons()\n-\t\t\tif len(exons) > 1:\n-\t\t\t\tfor exon in exons:\n-\t\t\t\t\texonCopy = Interval(exon)\n-\t\t\t\t\tself.addExon(exonCopy)\n-\n-\n-\tdef setDirection(self, direction):\n-\t\t"""\n-\t\tSet the direction of the interval\n-\t\tPossibly parse different formats\n-\t\tImpact all exons\n-\t\t@param direction: direction of the transcript (+ / -)\n-\t\t@type\tdirection: int or string\n-\t\t"""\n-\t\tsuper(Transcript, self).setDirection(direction)\n-\t\tfor exon in self.exons:\n-\t\t\texon.setDirection(direction)\n-\t\t\t\n-\n-\tdef setChromosome(self, chromosome):\n-\t\t"""\n-\t\tSet the chromosome\n-\t\t@param chromosome: chromosome on which the transcript is\n-\t\t@type  chromosome: string\n-\t\t"""\n-\t\tsuper(Transcript, self).setChromosome(chromosome)\n-\t\tfor exon in self.exons:\n-\t\t\texon.setChromosome(chromosome)\n-\n-\t\n-\tdef addExon(self, exon):\n-\t\t"""\n-\t\tAdd an exon to the list of exons\n-\t\t@param exon: a new exon\n-\t\t@type  exon: class L{Interval<Interval>}\n-\t\t"""\n-\t\tif not self.exons and not exon.overlapWith(self):\n-\t\t\tfirstExon = Interval()\n-\t\t\tfirstExon.setStart(self.getStart())\n-\t\t\tfirstExon.setEnd(self.getEnd())\n-\t\t\tfirstExon.setDirection(self.getDirection())\n-\t\t\tfirstExon.setChromosome(self.getChromosome())\n-\t\t\tself.exons.append(firstExon)\n-\t\tnewExon = Interval(exon)\n-\t\tnewExon.setDirection(self.getDirection())\n-\t\tself.exons.append(newExon)\n-\t\tif newExon.getStart() < self.getStart():\n-\t\t\tself.setSta'..b'\tif i == 0:\n-\t\t\t\tcontinue\n-\t\t\tcigar += "%dN" % (exon.getStart() - lastExonEnd - 1)\n-\t\t\tcigar += "%dM" % (exon.getSize())\n-\n-\t\treturn "%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%s\\t%d\\t%d\\t%s\\t%s\\t%s\\n" % (name, flag, chromosome, genomeStart, quality, cigar, mate, mateGenomeStart, gapSize, sequence, qualityString, tags)\n-\n-\n-\tdef printUcsc(self):\n-\t\t"""\n-\t\tExport this transcript using UCSC BED format\n-\t\t@return: a string\n-\t\t"""\n-\t\tif self.getChromosome().find("Het") != -1:\n-\t\t\treturn ""\n-\t\tname\t  = self.name\n-\t\tcomment   = self.getTagValues(";", "")\n-\t\tsizes\t = []\n-\t\tstarts\t= []\n-\t\tdirection = "+"\n-\t\tif self.getDirection() == -1:\n-\t\t\tdirection = "-"\n-\t\tself.sortExonsIncreasing()\n-\t\tfor exon in self.getExons():\n-\t\t\tsizes.append("%d" % (exon.getSize()))\n-\t\t\tstarts.append("%d" % (exon.getStart() - self.getStart()))\n-\t\treturn "%s\\t%d\\t%d\\t%s\\t1000\\t%s\\t%d\\t%d\\t0\\t%d\\t%s,\\t%s,\\n" % (self.getChromosome().replace("arm_", "chr"), self.getStart(), self.getEnd()+1, name, direction, self.getStart(), self.getEnd()+1, self.getNbExons(), ",".join(sizes), ",".join(starts))\n-\n-\n-\tdef printGBrowseReference(self):\n-\t\t"""\n-\t\tExport this transcript using GBrowse format (1st line only)\n-\t\t@return: a string\n-\t\t"""\n-\t\treturn "reference = %s\\n" % (self.getChromosome())\n-\n-\n-\tdef printGBrowseLine(self):\n-\t\t"""\n-\t\tExport this transcript using GBrowse format (2nd line only)\n-\t\t@return: a string\n-\t\t"""\n-\t\tself.sortExons()\n-\t\tcoordinates = []\n-\t\tfor exon in self.getExons():\n-\t\t\tcoordinates.append(exon.printCoordinates())\n-\t\tcoordinatesString = ",".join(coordinates)\n-\t\tcomment = self.getTagValues(";", "=")\n-\t\tif comment:\n-\t\t\tcomment = "\\t\\"%s\\"" % (comment)\n-\t\treturn "User_data\\t%s\\t%s%s\\n" % (self.name, coordinatesString, comment)\n-\n-\t\n-\tdef printGBrowse(self):\n-\t\t"""\n-\t\tExport this transcript using GBrowse format\n-\t\t@return: a string\n-\t\t"""\n-\t\treturn "%s%s" % (self.printGBrowseReference(), self.printGBrowseLine())\n-\n-\n-\tdef printCsv(self):\n-\t\t"""\n-\t\tExport this transcript using CSV format\n-\t\t@return: a string\n-\t\t"""\n-\t\tself.sortExons()\n-\t\tstring = "%s,%d,%d,\\"%s\\"," % (self.getChromosome(), self.getStart(), self.getEnd(), "+" if self.getDirection() == 1 else "-")\n-\t\tif len(self.getExons()) == 1:\n-\t\t\tstring += "None"\n-\t\telse:\n-\t\t\tfor exon in self.getExons():\n-\t\t\t\tstring += "%d-%d " % (exon.getStart(), exon.getEnd())\n-\t\tfor tag in sorted(self.tags.keys()):\n-\t\t\tstring += ",%s=%s" % (tag, str(self.tags[tag]))\n-\t\tstring += "\\n"\n-\t\treturn string\n-\n-\n-\tdef extractSequence(self, parser):\n-\t\t"""\n-\t\tGet the sequence corresponding to this transcript\n-\t\t@param parser: a parser to a FASTA file\n-\t\t@type  parser: class L{SequenceListParser<SequenceListParser>}\n-\t\t@return:\t   an instance of L{Sequence<Sequence>}\n-\t\t"""\n-\t\tself.sortExons()\n-\t\tname = self.name\n-\t\tif "ID" in self.getTagNames() and self.getTagValue("ID") != self.name:\n-\t\t\tname += ":%s" % (self.getTagValue("ID"))\n-\t\tsequence = Sequence(name)\n-\t\tfor exon in self.getExons():\n-\t\t\tsequence.concatenate(exon.extractSequence(parser))\n-\t\treturn sequence\n-\t\n-\t\n-\tdef extractWigData(self, parser):\n-\t\t"""\n-\t\tGet some wig data corresponding to this transcript\n-\t\t@param parser: a parser to a wig file\n-\t\t@type  parser: class L{WigParser<WigParser>}\n-\t\t@return: a sequence of float\n-\t\t"""\n-\t\tself.sortExons()\n-\t\tif parser.strands:\n-\t\t\tstrands = (-1, 1)\n-\t\t\tvalues  = dict([(strand, []) for strand in strands])\n-\t\t\tfor exon in self.getExons():\n-\t\t\t\ttheseValues = exon.extractWigData(parser)\n-\t\t\t\tif self.getDirection() == -1:\n-\t\t\t\t\tfor strand in strands:\n-\t\t\t\t\t\ttheseValues[strand].reverse()\n-\t\t\t\tfor strand in strands:\n-\t\t\t\t\tvalues[strand].extend(theseValues[strand])\n-\t\t\tif self.getDirection() == -1:\n-\t\t\t\tfor strand in strands:\n-\t\t\t\t\tvalues[strand].reverse()\n-\t\t\treturn values\n-\t\telse:\n-\t\t\tvalues = []\n-\t\t\tfor exon in self.getExons():\n-\t\t\t\ttheseValues = exon.extractWigData(parser)\n-\t\t\t\t#if self.getDirection() == -1:\n-\t\t\t\t#\ttheseValues.reverse()\n-\t\t\t\tvalues.extend(theseValues)\n-\t\t\t#if self.getDirection() == -1:\n-\t\t\t#\tvalues.reverse()\n-\t\t\treturn values\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/Transcript.pyc
b
Binary file SMART/Java/Python/structure/Transcript.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/TranscriptContainer.py
--- a/SMART/Java/Python/structure/TranscriptContainer.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,236 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import re\n-import sys\n-from commons.core.parsing.ParserChooser import ParserChooser\n-from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable\n-from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter\n-\n-class TranscriptContainer(object):\n-    """\n-    An interface class that contains a list of transcripts, handle different formats\n-    @ivar container: container of the data\n-    @type container: string \n-    @ivar format: format of the data\n-    @type format: string        \n-    @ivar transcriptListParser: possibly contains a parser to a list of transcripts\n-    @type transcriptListParser: L{TranscriptListParser<TranscriptListParser>} or None\n-    @ivar mappingListParser: possibly contains a parser to a list of mappings\n-    @type mappingListParser: L{MapperParser<MapperParser>} or None\n-    @ivar transcriptTables: possibly contains the mySQL tables\n-    @type transcriptTables: dict of L{MySqlTranscriptTable<MySqlTranscriptTable>} or None\n-    @ivar mySqlConnection: connection to a MySQL database\n-    @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}\n-    @ivar type: type of the data (transcripts, mappings or mySQL)\n-    @type type: string\n-    @ivar verbosity: verbosity\n-    @type verbosity: int        \n-    """\n-\n-    def __init__(self, container, format, verbosity = 0):\n-        """\n-        Constructor\n-        @param container: container of the data\n-        @type container: string\n-        @param format: format of the data\n-        @type format: string\n-        @param verbosity: verbosity\n-        @type verbosity: int\n-        """\n-        self.container            = container\n-        self.format               = format\n-        self.verbosity            = verbosity\n-        self.transcriptListParser = None\n-        self.mappingListParser    = None\n-        self.transcriptTables     = {}\n-        self.mySqlConnection      = None\n-        self.foundData            = False\n-        self.nbTranscripts        = None\n-        self.nbNucleotides        = None\n-        self.chromosomes          = None\n-        self.type                 = None\n-        if self.container == None:\n-            sys.exit("Error! Container input file name is empty!")\n-        if self.format == None:\n-            sys.exit("Error! Container input format is empty!")\n-        \n-        \n-    def findData(self):\n-        """\n-        Load data\n-        """\n-        if self.format == None:\n-            sys.ex'..b'ndle format \'%s\'!" % (self.format))\n-\n-        if self.transcriptListParser != None:\n-            if self.type == "transcript":\n-                self.nbTranscripts = self.transcriptListParser.getNbTranscripts()\n-                self.nbNucleotides = self.transcriptListParser.getNbNucleotides()\n-                self.chromosomes   = self.transcriptListParser.getChromosomes()\n-        if self.mappingListParser != None:\n-            if self.type == "mapping":\n-                self.nbTranscripts = self.mappingListParser.getNbMappings()\n-                self.nbNucleotides = self.mappingListParser.getNbNucleotides()\n-                self.chromosomes   = self.mappingListParser.getChromosomes()\n-\n-        self.foundData = True\n-\n-\n-    def getNbTranscripts(self):\n-        """\n-        Get the number of transcripts\n-        @return: the number of transcripts\n-        """\n-        if not self.foundData:\n-            self.findData()\n-        return self.nbTranscripts\n-    \n-    \n-    def getNbItems(self):\n-        """\n-        Same as getNbTranscripts\n-        """\n-        return self.getNbTranscripts()\n-\n-\n-    def getNbNucleotides(self):\n-        """\n-        Get the number of nucleotides\n-        @return: the number of nucleotides\n-        """\n-        if not self.foundData:\n-            self.findData()\n-        return self.nbNucleotides\n-\n-\n-    def getChromosomes(self):\n-        """\n-        Get the chromosomes\n-        @return: the chromosomes\n-        """\n-        if not self.foundData:\n-            self.findData()\n-        return self.chromosomes\n-    \n-\n-    def getIterator(self):\n-        """\n-        An iterator\n-        @return: an iterator to a list of transcripts\n-        """\n-        if not self.foundData:\n-            self.findData()\n-        if self.type == "sql":\n-            for chromosome in self.transcriptTables:\n-                for transcript in self.transcriptTables[chromosome].getIterator():\n-                    yield transcript\n-            return\n-        if self.type == "transcript":\n-            for transcript in self.transcriptListParser.getIterator():\n-                yield transcript\n-            return\n-        if self.type == "mapping":\n-            for mapping in self.mappingListParser.getIterator():\n-                yield mapping.getTranscript()\n-            return\n-        sys.exit("Error! No valid transcript container given!")\n-        \n-        \n-    def storeIntoDatabase(self, name = None):\n-        """\n-        Store the current transcript / mapping list into database\n-        """\n-        if not self.foundData:\n-            self.findData()\n-\n-        if (self.transcriptListParser == None and self.mappingListParser == None) or len(self.transcriptTables.keys()) != 0:\n-            return\n-        \n-        mySqlTranscriptWriter = MySqlTranscriptWriter(self.mySqlConnection, name, self.verbosity)\n-        mySqlTranscriptWriter.addTranscriptList(self.transcriptListParser if self.transcriptListParser else self.mappingListParser)\n-        mySqlTranscriptWriter.write()\n-        self.transcriptTables = mySqlTranscriptWriter.getTables()\n-        self.type = "sql"\n-            \n-            \n-    def getTables(self):\n-        """\n-        Accessor to the mySQL tables\n-        @return: the mySQL tables\n-        """\n-        return self.transcriptTables\n-        \n-\n-    def setDefaultTagValue(self, name, value):\n-        """\n-        Set the given tag to the value for all transcripts\n-        @param name: name of the tag\n-        @type name: string\n-        @param value: value of the tag\n-        @type value: string\n-        """\n-        if self.type == "sql":\n-            for chromosome in self.transcriptTables:\n-                self.transcriptTables[chromosome].setDefaultTagValue(name, value)\n-        elif self.type == "transcript":\n-            self.transcriptListParser.setDefaultTagValue(name, value)\n-        elif self.type == "mapping":\n-            self.mappingListParser.setDefaultTagValue(name, value)\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/TranscriptContainer.pyc
b
Binary file SMART/Java/Python/structure/TranscriptContainer.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/TranscriptList.py
--- a/SMART/Java/Python/structure/TranscriptList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,172 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.mySql.MySqlTable import MySqlTable
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.misc.Progress import Progress
-
-
-class TranscriptList(object):
-    """A class that codes for a list of transcript"""
-
-    def __init__(self, verbosity = 0):
-        self.transcripts = dict()
-        self.longestTranscript = 0
-        self.verbosity = verbosity
-
-
-    def getTranscript(self, chromosome, index):
-        return self.transcripts[chromosome][index]
-        
-
-    def getChromosomes(self):
-        return self.transcripts.keys()
-
-
-    def getTranscriptsOnChromosome(self, chromosome):
-        if chromosome not in self.transcripts:
-            return []
-        return self.transcripts[chromosome]
-
-
-    def addTranscript(self, transcript):
-        if transcript.getChromosome() in self.transcripts:
-            self.transcripts[transcript.getChromosome()].append(transcript)
-        else:
-            self.transcripts[transcript.getChromosome()] = [transcript]
-        self.longestTranscript = max(self.longestTranscript, transcript.getEnd() - transcript.getStart())
-        
-
-    def removeTranscript(self, chromosome, i):
-        del self.transcripts[chromosome][i]
-
-
-    def removeAll(self):
-        self.transcripts = {}
-
-
-    def getNbTranscripts(self):
-        nbTranscripts = 0
-        for chromosome in self.transcripts:
-            nbTranscripts += len(self.transcripts[chromosome])
-        return nbTranscripts
-
-
-    def getSize(self):
-        size = 0
-        for chromosome in self.transcripts:
-            for transcript in self.transcripts[chromosome]:
-                size += transcript.getSize()
-        return size
-
-
-    def sort(self):
-        for chromosome in self.transcripts:
-            self.transcripts[chromosome].sort(lambda x, y: x.getStart() - y.getStart())
-
-
-    def removeOverlapWith(self, transcriptList):
-        transcriptList.sort()
-        for chromosome in self.transcripts:
-            progress = Progress(len(self.transcripts[chromosome]), "Handling chromosome %s" % (chromosome), self.verbosity)
-            for thisTranscriptId in range(len(self.transcripts[chromosome])):
-                progress.inc()
-                for thatTranscriptId in range(len(transcriptList.transcripts[chromosome])):
-                    if self.transcripts[chromosome][thisTranscriptId].overlapWith(transcriptList.transcripts[chromosome][thatTranscriptId]):
-                        self.transcripts[chromosome][thisTranscriptId] = None
-                        break
-                    if self.transcripts[chromosome][thisTranscriptId].getEnd() > transcriptList.transcripts[chromosome][thatTranscriptId]:
-                        break
-            self.transcripts[chromosome] = [transcript for transcript in self.transcripts[chromosome] if transcript != None]
-        progress.done()
-
-
-    def removeOverlapWithExon(self, transcriptList):
-        transcriptList.sort()
-        for chromosome in self.transcripts:
-            progress = Progress(len(self.transcripts[chromosome]), "Handling chromosome %s" % (chromosome), self.verbosity)
-            for thisTranscriptId in range(len(self.transcripts[chromosome])):
-                progress.inc()
-                for thatTranscriptId in range(len(transcriptList.transcripts[chromosome])):
-                    if self.transcripts[chromosome][thisTranscriptId].overlapWithExon(transcriptList.transcripts[chromosome][thatTranscriptId]):
-                        self.transcripts[chromosome][thisTranscriptId] = None
-                        break
-                    if self.transcripts[chromosome][thisTranscriptId].getEnd() > transcriptList.transcripts[chromosome][thatTranscriptId]:
-                        break
-            self.transcripts[chromosome] = [transcript for transcript in self.transcripts[chromosome] if transcript != None]
-        progress.done()
-
-
-    def setDefaultTagValue(self, name, value):
-        for transcript in self.getIterator():
-            transcript.setTag(name, value)
-
-
-    def storeDatabase(self, mySqlConnection):
-        transcriptsTable = MySqlTable("TmpTranscriptsTable", mySqlConnection)
-        transcriptsTable.create(Transcript.getSqlVariables(), Transcript.getSqlTypes())
-        intervalsVariables = Interval.getSqlVariables()
-        intervalsVariables.append("idTranscript")
-        intervalsTypes = Interval.getSqlTypes()
-        intervalsTypes["idTranscript"] = "int"
-        intervalsTable = MySqlTable("TmpIntervalsTable", mySqlConnection)
-        intervalsTable.create(intervalsVariables, intervalsTypes)
-        for chromosome in self.transcripts:
-            for transcript in self.transcripts[chromosome]:
-                idTranscript = transcriptsTable.addLine(transcript.getSqlValues())
-                for exon in transcript.getExons():
-                    intervalValues = exon.getSqlValues()
-                    intervalValues["idTranscript"] = idTranscript
-                    intervalsTable.addLine(intervalValues)
-                    
-    
-    def getIterator(self):
-        chromosomes = self.transcripts.keys()
-        currentChromosome = 0
-        currentTranscript = 0
-        while True:
-            if currentChromosome >= len(chromosomes):
-                return
-            elif currentTranscript >= len(self.transcripts[chromosomes[currentChromosome]]):
-                currentTranscript    = 0
-                currentChromosome += 1
-            elif self.transcripts[chromosomes[currentChromosome]][currentTranscript] == None:
-                currentTranscript += 1
-            else:
-                yield self.transcripts[chromosomes[currentChromosome]][currentTranscript]
-                currentTranscript += 1
-
-
-    def __str__(self):
-        string = ""
-        for transcript in self.getIterator():
-            string += str(transcript)
-        return string
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/TranscriptList.pyc
b
Binary file SMART/Java/Python/structure/TranscriptList.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/TranscriptListIterator.py
--- a/SMART/Java/Python/structure/TranscriptListIterator.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,58 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-class TranscriptListIterator(object):
-    """A class that iterates on a list of transcript"""
-
-    def __init__(self, transcriptList, verbosity = 0):
-        self.transcriptList = transcriptList
-        self.verbosity = verbosity
-        self.chromosomes = self.transcriptList.transcripts.keys()
-        self.currentChromosome = 0
-        self.currentTranscript = -1
-
-
-    def __iter__(self):
-        return self
-    
-    
-    def next(self):
-        self.currentTranscript += 1
-        while True:
-            if self.currentChromosome >= len(self.transcriptList.transcripts):
-                raise StopIteration
-            elif self.currentTranscript >= len(self.transcriptList.transcripts[self.chromosomes[self.currentChromosome]]):
-                self.currentTranscript = 0
-                self.currentChromosome += 1
-            elif self.transcriptList.transcripts[self.chromosomes[self.currentChromosome]][self.currentTranscript] == None:
-                self.currentTranscript += 1
-            else:
-                return self.transcriptList.transcripts[self.chromosomes[self.currentChromosome]][self.currentTranscript]
-            
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/TranscriptListsComparator.py
--- a/SMART/Java/Python/structure/TranscriptListsComparator.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1198 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import sys\n-import random\n-from SMART.Java.Python.misc import Utils\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.structure.TranscriptList import TranscriptList\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection\n-from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable\n-from SMART.Java.Python.misc.Progress import Progress\n-from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter\n-\n-\n-\n-class TranscriptListsComparator(object):\n-    """\n-    Compare two transcript lists, using a database for one of the list\n-    Uses one TranscriptContainer for query data, \n-             one TranscriptContainer exported to MySqlTranscriptTable for reference data, \n-             one MySqlTranscriptTable for transformed reference data\n-    @ivar inputTranscriptContainers: parsers to the list of query transcripts\n-    @type inputTranscriptContainers: list of 2 L{TranscriptContainer<TranscriptContainer>}\n-    @ivar writer:                    transcript list writer\n-    @type writer:                    class L{TranscriptListWriter<TranscriptListWriter>}\n-    @ivar mySqlConnection:           connection to a MySQL database (to compute the ovelapping efficiently)\n-    @type mySqlConnection:           class L{MySqlConnection<MySqlConnection>}\n-    @ivar introns:                   compare transcripts or exons only\n-    @type introns:                   list of 2 boolean\n-    @ivar starts:                    restrict the query transcripts to first nucleotides\n-    @type starts:                    list of 2 int or None\n-    @ivar fivePrimes:                extend a list of transcripts by their 5\' end\n-    @type fivePrimes:                list of 2 int or None\n-    @ivar threePrimes:               extend a list of transcripts by their 3\' end\n-    @type threePrimes:               list of 2 int or None\n-    @ivar minDistance:               min distance between two transcripts [default: 0]\n-    @type minDistance:               int\n-    @ivar maxDistance:               max distance between two transcripts [default: 0]\n-    @type maxDistance:               int\n-    @ivar minOverlap:                minimum number of overlapping nucleotides to declare an overlap\n-    @type minOverlap:                int\n-    @ivar pcOverlap:                 percentage of overlapping nucleotides to declare an ove'..b'          for index2, transcript2 in self.getTables(self.REFERENCE)[chromosome1].selectTranscripts(command):\n-                        transcripts2.append(transcript2)\n-                    command = "DELETE FROM %s WHERE start < %d" % (self.getTables(self.REFERENCE)[chromosome1].getName(), end + distance)\n-                    self.mySqlConnection.executeQuery(command)\n-\n-                # compare sets\n-                toBeRemoved1 = []\n-                for index1, transcript1 in enumerate(transcripts1):\n-                    newTranscript1 = Transcript()\n-                    newTranscript1.copy(transcript1)\n-                    for transcript2 in transcripts2:\n-                        newTranscript1 = newTranscript1.getDifference(transcript2)\n-                        if newTranscript1 == None:\n-                            toBeRemoved1.append(index1)\n-                            break\n-                    transcripts1[index1] = newTranscript1\n-\n-                    # check if query transcript extends bounds of the chunk\n-                    if newTranscript1 != None and newTranscript1.getEnd() < end:\n-                        if self.splitDifference:\n-                            for exon in newTranscript1.getExons():\n-                                transcript = Transcript()\n-                                transcript.copy(exon)\n-                                self.writeTranscript(transcript)\n-                        else:\n-                            self.writeTranscript(newTranscript1)\n-                        toBeRemoved1.append(index1)\n-\n-                # update list of query transcripts\n-                for index1 in reversed(toBeRemoved1):\n-                    del transcripts1[index1]\n-\n-                # check if the reference transcripts extends bounds of the chunk\n-                toBeRemoved2 = []\n-                for index2, transcript2 in enumerate(transcripts2):\n-                    if transcript2.getEnd() + distance < end:\n-                        toBeRemoved2.append(index2)\n-                for index2 in reversed(toBeRemoved2):\n-                    del transcripts2[index2]\n-\n-                progress.inc()\n-\n-            for transcript1 in transcripts1:\n-                if self.splitDifference:\n-                    for exon in transcript1.getExons():\n-                        transcript = Transcript()\n-                        transcript.copy(exon)\n-                        self.writeTranscript(transcript)\n-                else:\n-                    self.writeTranscript(transcript1)\n-            progress.done()\n-            self.getTables(self.QUERY)[chromosome1].remove()\n-            if chromosome1 in self.getTables(self.REFERENCE):\n-                self.getTables(self.REFERENCE)[chromosome1].remove()\n-                self.getTables(self.WORKING)[chromosome1].remove()\n-\n-        self.flushData()\n-        if self.writer != None:\n-            self.writer.close()\n-            self.writer = None\n-\n-        if self.verbosity > 0:\n-            print "query:     %d elements" % (self.nbTranscripts[self.QUERY])\n-            print "reference: %d elements" % (self.nbTranscripts[self.REFERENCE])\n-            print "# printed: %d (%.2f%%)" % (self.nbPrinted, self.nbPrinted / float(self.nbTranscripts[self.QUERY]) * 100)\n-\n-\n-    def getOddsPerTranscript(self):\n-        """\n-        Return overlap results\n-        @return a dict of data\n-        """\n-        if not self.odds:\n-            raise Exception("Did not compute odds!")\n-        return self.overlapResults\n-\n-\n-    def getOdds(self):\n-        """\n-        Return odds about the overlap\n-        @return a dict of data\n-        """\n-        if not self.odds:\n-            raise Exception("Did not compute odds!")\n-        if self.oddResults != None:\n-            return self.oddResults\n-        self.oddResults = {}\n-        for name, value in self.overlapResults.iteritems():\n-            self.oddResults[value] = self.oddResults.get(value, 0) + 1\n-        return self.oddResults\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/TranscriptListsComparator.pyc
b
Binary file SMART/Java/Python/structure/TranscriptListsComparator.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/__init__.pyc
b
Binary file SMART/Java/Python/structure/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/test/Test_Interval.py
--- a/SMART/Java/Python/structure/test/Test_Interval.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,369 +0,0 @@\n-import unittest\n-from SMART.Java.Python.structure.Interval import Interval\n-\n-class Test_Interval(unittest.TestCase):\n-\n-    def setUp(self):\n-        self.iInterval = Interval()\n-        self.iInterval1 = Interval()\n-        self.iInterval2 = Interval()\n-        \n-    def test__init__(self):\n-        self.iInterval.setChromosome("chromosome")\n-        self.iInterval.setName("sequence")\n-        self.iInterval.setStart(0)\n-        self.iInterval.setEnd(123)\n-        obsStart = self.iInterval.getStart()\n-        obsEnd = self.iInterval.getEnd()\n-        expStart = 0\n-        expEnd = 123\n-        \n-        self.assertEqual(expStart, obsStart)\n-        self.assertEqual(expEnd, obsEnd)\n-\n-    def test_copy(self):\n-        self.iInterval1.setName("interval1")\n-        self.iInterval1.setChromosome("chr1")\n-        self.iInterval1.setStart(100)\n-        self.iInterval1.setEnd(300)\n-        self.iInterval1.setDirection("+")\n-\n-        self.iInterval2.copy(self.iInterval1)\n-        self.assertEqual(self.iInterval2.getName(), "interval1")\n-        self.assertEqual(self.iInterval2.getChromosome(), "chr1")\n-        self.assertEqual(self.iInterval2.getStart(), 100)\n-        self.assertEqual(self.iInterval2.getEnd(), 300)\n-        self.assertEqual(self.iInterval2.getDirection(), 1)\n-\n-        self.iInterval1.setStart(200)\n-        self.assertEqual(self.iInterval2.getStart(), 100)\n-        \n-    def test_getDirection(self):\n-        self.iInterval1.setName("interval1")\n-        self.iInterval1.setChromosome("chr1")\n-        self.iInterval1.setStart(100)\n-        self.iInterval1.setEnd(300)\n-        self.iInterval1.setDirection("+")\n-        expDirect = 1\n-        self.assertEquals(expDirect,self.iInterval1.getDirection())\n-\n-    #!!!! Warning: two methods getStart() and getEnd() give the information maximum and minimum in interval.!!!!#\n-    #In case strand = "+", start < end; strand = "-", start > end   \n-    def test_setStartEnd(self):\n-        self.iInterval1 = Interval()\n-        self.iInterval1.setName("interval1")\n-        self.iInterval1.setChromosome("chr1")\n-        self.iInterval1.setStart(100)\n-        self.iInterval1.setEnd(300)\n-        self.iInterval1.setDirection("+")\n-        \n-        self.assertEqual(self.iInterval1.getName(), "interval1")\n-        self.assertEqual(self.iInterval1.getChromosome(), "chr1")\n-        self.assertEqual(self.iInterval1.getStart(),100)\n-        self.assertEqual(self.iInterval1.getEnd(), 300)\n-        self.assertEqual(self.iInterval1.getDirection(), 1)\n-\n-        self.iInterval1.setStart(200)\n-        self.assertEqual(self.iInterval1.getStart(), 200)\n-        self.assertEqual(self.iInterval1.getEnd(), 300)\n-\n-        self.iInterval1.setEnd(300)\n-        self.iInterval1.setStart(100)\n-        self.assertEqual(self.iInterval1.getStart(), 100)\n-        self.assertEqual(self.iInterval1.getEnd(), 300)\n-\n-        self.iInterval1.setEnd(1200)\n-        self.iInterval1.setStart(1000)\n-        self.assertEqual(self.iInterval1.getStart(), 1000)\n-        self.assertEqual(self.iInterval1.getEnd(), 1200)\n-\n-        self.iInterval1.reverse()\n-        self.assertEqual(self.iInterval1.getDirection(), -1)\n-        self.assertEqual(self.iInterval1.getStart(), 1000)\n-        self.assertEqual(self.iInterval1.getEnd(), 1200)\n-\n-        self.iInterval1.setStart(1100)\n-        self.assertEqual(self.iInterval1.getStart(), 1100)\n-        self.assertEqual(self.iInterval1.getEnd(), 1200)\n-\n-        self.iInterval1.setEnd(2200)\n-        self.iInterval1.setStart(2000)\n-        self.assertEqual(self.iInterval1.getStart(), 2000)\n-        self.assertEqual(self.iInterval1.getEnd(), 2200)\n-\n-        self.iInterval1.setStart(1000)\n-        self.iInterval1.setEnd(1200)\n-        self.assertEqual(self.iInterval1.getStart(), 1000)\n-        self.assertEqual(self.iInterval1.getEnd(), 1200)\n-\n-    def test_reverse(self):\n-        self.iInterval1 = Interval()\n-        self.iInterval1.setName("interval1")\n-        self.i'..b'\n-\n-        iInterval2.setChromosome("chr2")\n-        results = iInterval1.getDifference(iInterval2)\n-        self.assertEqual(len(results), 1)\n-        resultInterval = results[0]\n-        self.assertEqual(resultInterval.getStart(),      iInterval1.getStart())\n-        self.assertEqual(resultInterval.getEnd(),        iInterval1.getEnd())\n-        self.assertEqual(resultInterval.getDirection(),  iInterval1.getDirection())\n-        self.assertEqual(resultInterval.getChromosome(), iInterval1.getChromosome())\n-\n-        iInterval2.setChromosome("chr1")\n-        iInterval2.setEnd(300)\n-        results = iInterval1.getDifference(iInterval2)\n-        self.assertEqual(len(results), 1)\n-        resultInterval = results[0]\n-        self.assertEqual(resultInterval.getStart(),      301)\n-        self.assertEqual(resultInterval.getEnd(),        iInterval1.getEnd())\n-        self.assertEqual(resultInterval.getDirection(),  iInterval1.getDirection())\n-        self.assertEqual(resultInterval.getChromosome(), iInterval1.getChromosome())\n-        \n-        iInterval2.setDirection("-")\n-        results = iInterval1.getDifference(iInterval2, True)\n-        self.assertEqual(len(results), 1)\n-        resultInterval = results[0]\n-        self.assertEqual(resultInterval.getStart(),      iInterval1.getStart())\n-        self.assertEqual(resultInterval.getEnd(),        iInterval1.getEnd())\n-        self.assertEqual(resultInterval.getDirection(),  iInterval1.getDirection())\n-        self.assertEqual(resultInterval.getChromosome(), iInterval1.getChromosome())\n-        \n-        iInterval2.setDirection("+")\n-        iInterval2.setStart(200)\n-        results = iInterval1.getDifference(iInterval2)\n-        self.assertEqual(len(results), 2)\n-        resultInterval1, resultInterval2 = results\n-        self.assertEqual(resultInterval1.getStart(),      iInterval1.getStart())\n-        self.assertEqual(resultInterval1.getEnd(),        199)\n-        self.assertEqual(resultInterval1.getDirection(),  iInterval1.getDirection())\n-        self.assertEqual(resultInterval1.getChromosome(), iInterval1.getChromosome())\n-        self.assertEqual(resultInterval2.getStart(),      301)\n-        self.assertEqual(resultInterval2.getEnd(),        iInterval1.getEnd())\n-        self.assertEqual(resultInterval2.getDirection(),  iInterval1.getDirection())\n-        self.assertEqual(resultInterval2.getChromosome(), iInterval1.getChromosome())\n-\n-        iInterval2.setEnd(2000)\n-        iInterval2.setStart(1000)\n-        results = iInterval1.getDifference(iInterval2)\n-        self.assertEqual(len(results), 1)\n-        resultInterval = results[0]\n-        self.assertEqual(resultInterval.getStart(),      iInterval1.getStart())\n-        self.assertEqual(resultInterval.getEnd(),        iInterval1.getEnd())\n-        self.assertEqual(resultInterval.getDirection(),  iInterval1.getDirection())\n-        self.assertEqual(resultInterval.getChromosome(), iInterval1.getChromosome())\n- \n-    def test_mergeWithDifferentStrand(self):\n-        self.iInterval1 = Interval()\n-        self.iInterval1.setName("interval1")\n-        self.iInterval1.setChromosome("chr1")\n-        self.iInterval1.setStart(100)\n-        self.iInterval1.setEnd(200)\n-        self.iInterval1.setDirection("+")\n-   \n-        self.iInterval2 = Interval()\n-        self.iInterval2.setName("interval2")\n-        self.iInterval2.setChromosome("chr1")\n-        self.iInterval2.setStart(300)\n-        self.iInterval2.setEnd(400)\n-        self.iInterval2.setDirection("-")\n-\n-        expMessage = "Cannot merge \'%s\' and \'%s\' for they are on different strands." % (str(self.iInterval2), str(self.iInterval1))\n-        isExceptionRaised = False\n-        try:\n-            self.iInterval2.merge(self.iInterval1)\n-        except Exception, e:\n-            isExceptionRaised = True\n-        obsMessage = str(e)\n-\n-        self.assertTrue(isExceptionRaised)\n-        self.assertEquals(expMessage, obsMessage)\n-\n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/test/Test_Mapping.py
--- a/SMART/Java/Python/structure/test/Test_Mapping.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,59 +0,0 @@
-import unittest
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.structure.SubMapping import SubMapping
-from SMART.Java.Python.structure.Mapping import Mapping
-
-class Test_Mapping(unittest.TestCase):
-
-    def test__getTranscript(self):
-        queryInterval1 = Interval()
-        queryInterval1.setName("read1_1")
-        queryInterval1.setStart(1)
-        queryInterval1.setEnd(10)
-        queryInterval1.setDirection(1)
-
-        targetInterval1 = Interval()
-        targetInterval1.setChromosome("chr1")
-        targetInterval1.setStart(100)
-        targetInterval1.setEnd(110)
-        targetInterval1.setDirection(1)
-
-        subMapping1 = SubMapping()
-        subMapping1.setQueryInterval(queryInterval1)
-        subMapping1.setTargetInterval(targetInterval1)
-
-        queryInterval2 = Interval()
-        queryInterval2.setName("read1_2")
-        queryInterval2.setStart(11)
-        queryInterval2.setEnd(20)
-        queryInterval2.setDirection(1)
-
-        targetInterval2 = Interval()
-        targetInterval2.setChromosome("chr1")
-        targetInterval2.setStart(200)
-        targetInterval2.setEnd(210)
-        targetInterval2.setDirection(1)
-
-        subMapping2 = SubMapping()
-        subMapping2.setQueryInterval(queryInterval2)
-        subMapping2.setTargetInterval(targetInterval2)
-
-        mapping = Mapping()
-        mapping.addSubMapping(subMapping1)
-        mapping.addSubMapping(subMapping2)
-
-        transcript = mapping.getTranscript()
-        self.assertEqual(transcript.getStart(),      100)
-        self.assertEqual(transcript.getEnd(),        210)
-        self.assertEqual(transcript.getChromosome(), "chr1")
-        exons = transcript.getExons()
-        self.assertEqual(len(exons), 2)
-        exon1, exon2 = exons
-        self.assertEqual(exon1.getStart(), 100)
-        self.assertEqual(exon1.getEnd(),   110)
-        self.assertEqual(exon2.getStart(), 200)
-        self.assertEqual(exon2.getEnd(),   210)
-
-
-if __name__ == '__main__':
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/test/Test_Sequence.py
--- a/SMART/Java/Python/structure/test/Test_Sequence.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,90 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-import unittest
-from SMART.Java.Python.structure.Sequence import Sequence
-
-
-class Test_Sequence(unittest.TestCase):
-    
-    def setUp(self):
-        self._bs = Sequence()
-        self._bs1 = Sequence()
-    
-    def test_getSize(self):
-        self._bs.setName("sequence1")
-        self._bs.setSequence("AGCGGACGATGCAGCATGCGAATGACGATA")
-        obsSize = self._bs.getSize()
-        expSize = 30
-        self.assertEquals( expSize, obsSize )   
-    
-    def test_concatenate(self):
-        self._bs.setName("sequence")
-        self._bs.setSequence("GATGTGCAGACTTTTCACGCAGGACTACATCACTGT")
-        self._bs.setQuality("WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQ")
-        self._bs1.setName("sequence1")
-        self._bs1.setSequence("GGAAACATATGCACATAAACGTTGAAATCATGCTTA")
-        self._bs1.setQuality("WWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU")
-        self._bs.concatenate(self._bs1)
-        expSeq = "GATGTGCAGACTTTTCACGCAGGACTACATCACTGTGGAAACATATGCACATAAACGTTGAAATCATGCTTA"
-        expQal = "WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQWWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU"
-        self.assertEquals(expSeq, self._bs.getSequence())
-        self.assertEquals(expQal, self._bs.getQuality())
-
-    def test_reverseComplement(self):
-        self._bs.setName("seq1")
-        self._bs.setSequence("TACGGC")
-        exp = "GCCGTA"
-        self._bs.reverseComplement()
-        obs = self._bs.getSequence()
-        self.assertEquals(exp, obs)
-
-    def test_containsAmbiguousNucleotides(self):
-        self._bs.setName("seq1")
-        self._bs.setSequence("WCGTUacgtu")
-        self.assertTrue (self._bs.containsAmbiguousNucleotides())
-
-    def test_shrinkToFirstNucleotides(self):
-        self._bs.setName("seq1")
-        self._bs.setSequence("WCGTUacgtu")
-        self._bs.shrinkToFirstNucleotides(3)
-        expSeq = "WCG"
-        self.assertEquals(expSeq, self._bs.getSequence())
-        
-    def test_shrinkToLastNucleotides(self):
-        self._bs.setName("seq1")
-        self._bs.setSequence("WCGTUacgtu")
-        self._bs.shrinkToLastNucleotides(5)
-        expSeq = "acgtu"
-        self.assertEquals(expSeq, self._bs.getSequence())
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/test/Test_SubMapping.py
--- a/SMART/Java/Python/structure/test/Test_SubMapping.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,292 +0,0 @@\n-import unittest\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.structure.SubMapping import SubMapping\n-\n-class Test_SubMapping(unittest.TestCase):\n-\n-    def test__init__(self):\n-        expEvalue = 0.00\n-        expScore = 0\n-        expIdentity = 0.00\n-        expTargetInterval = Interval()\n-        expQueryInterval = Interval()\n-        expQueryRange = expQueryInterval\n-        expSubjectRange = expTargetInterval\n-        expSize = None\n-        expTags = {}\n-        \n-        iSubMapping = SubMapping()\n-        obsQueryRange = iSubMapping.getQueryAsRange()\n-        obsSubjectRange = iSubMapping.getSubjectAsRange()\n-        obsEvalue = iSubMapping.getEvalue()\n-        obsScore = iSubMapping.getScore()\n-        obsIdentity = iSubMapping.getIdentity()\n-        obsTargetInterval = iSubMapping.getTargetInterval()\n-        obsQueryInterval = iSubMapping.getQueryInterval()\n-        obsSize = iSubMapping.getSize()\n-        obsTags = iSubMapping.getTags()\n-        \n-        self.assertEquals(expEvalue, obsEvalue)\n-        self.assertEquals(expIdentity, obsIdentity)\n-        self.assertEquals(expQueryInterval, obsQueryInterval)\n-        self.assertEquals(expQueryRange, obsQueryRange)\n-        self.assertEquals(expScore, obsScore)\n-        self.assertEquals(expSize, obsSize)\n-        self.assertEquals(expSubjectRange, obsSubjectRange)\n-        self.assertEquals(expTags, obsTags)\n-        self.assertEquals(expTargetInterval, obsTargetInterval)\n-        \n-    def test__init__change_values_by_Interval(self):\n-        iSubMapping = SubMapping()\n-        \n-        expSeqName = ""\n-        \n-        obsRangeSubject = iSubMapping.range_subject.getSeqname()\n-        obsRangeQuery = iSubMapping.range_query.getSeqname()\n-        obsIntervalTarget = iSubMapping.getTargetInterval().getChromosome()\n-        obsIntervalQuery = iSubMapping.getQueryInterval().getChromosome()\n-        \n-        self.assertEquals(expSeqName, obsRangeSubject)\n-        self.assertEquals(expSeqName, obsRangeQuery)\n-        self.assertEquals(expSeqName, obsIntervalTarget)\n-        self.assertEquals(expSeqName, obsIntervalQuery)\n-        \n-        iSubMapping.getTargetInterval().setChromosome("intervalTarget")\n-        iSubMapping.getQueryInterval().setChromosome("intervalQuery")\n-        \n-        expTargetSeqName = "intervalTarget"\n-        expQuerySeqName = "intervalQuery"\n-        \n-        obsRangeSubject = iSubMapping.range_subject.getSeqname()\n-        obsRangeQuery = iSubMapping.range_query.getSeqname()\n-        obsIntervalTarget = iSubMapping.getTargetInterval().getChromosome()\n-        obsIntervalQuery = iSubMapping.getQueryInterval().getChromosome()\n-        \n-        self.assertEquals(expTargetSeqName, obsRangeSubject)\n-        self.assertEquals(expQuerySeqName, obsRangeQuery)\n-        self.assertEquals(expTargetSeqName, obsIntervalTarget)\n-        self.assertEquals(expQuerySeqName, obsIntervalQuery)\n-        \n-    def test__init__change_values_by_Align(self):\n-        iSubMapping = SubMapping()\n-        \n-        expSeqName = ""\n-        \n-        obsRangeSubject = iSubMapping.range_subject.getSeqname()\n-        obsRangeQuery = iSubMapping.range_query.getSeqname()\n-        obsIntervalTarget = iSubMapping.getTargetInterval().getChromosome()\n-        obsIntervalQuery = iSubMapping.getQueryInterval().getChromosome()\n-        \n-        self.assertEquals(expSeqName, obsRangeSubject)\n-        self.assertEquals(expSeqName, obsRangeQuery)\n-        self.assertEquals(expSeqName, obsIntervalTarget)\n-        self.assertEquals(expSeqName, obsIntervalQuery)\n-        \n-        iSubMapping.range_subject.setSeqName("intervalTarget")\n-        iSubMapping.range_query.setSeqName("intervalQuery")\n-        \n-        expTargetSeqName = "intervalTarget"\n-        expQuerySeqName = "intervalQuery"\n-        \n-        obsRangeSubject = iSubMapping.range_subject.getSeqname()\n-        obsRangeQuery = iSubMapping.range_query.getSeqname'..b'rt(0)\n-        iIntervalTarget.setEnd(123)\n-        iIntervalTarget.setDirection("+")\n-        iIntervalQuery = Interval()\n-        iIntervalQuery.setChromosome("chromosomeQuery")\n-        iIntervalQuery.setName("sequenceQuery")\n-        iIntervalQuery.setStart(200)\n-        iIntervalQuery.setEnd(323)\n-        iIntervalQuery.setDirection("+")    \n-        \n-        iTestSubMapping.setQueryInterval(iIntervalTarget)\n-        iTestSubMapping.setTargetInterval(iIntervalQuery)\n-        iTestSubMapping.setTagValue("identity", 50)\n-        iTestSubMapping.setSize(10)\n-        \n-        iSubMappingWithCopy = SubMapping(iTestSubMapping)\n-        self.assertEquals(iSubMappingWithCopy, iTestSubMapping)        \n-\n-      \n-    def test_copy(self):\n-        iSubMapping = SubMapping()\n-        iSubMapping.setQueryName("Query")\n-        iSubMapping.setQueryStart(50)\n-        iSubMapping.setQueryEnd(150)\n-        iSubMapping.setSubjectName("Subject")\n-        iSubMapping.setSubjectStart(100)\n-        iSubMapping.setSubjectEnd(200)\n-        iSubMapping.e_value = 1e-20\n-        iSubMapping.score = 30\n-        iSubMapping.identity = 90.2 \n-               \n-        iInterval1 = Interval()\n-        iInterval1.setChromosome("chromosome1")\n-        iInterval1.setName("sequence1")\n-        iInterval1.setStart(0)\n-        iInterval1.setEnd(123)\n-        iInterval1.setDirection("+")\n-        iInterval2 = Interval()\n-        iInterval2.setChromosome("chromosome2")\n-        iInterval2.setName("sequence2")\n-        iInterval2.setStart(200)\n-        iInterval2.setEnd(300)     \n-        iInterval2.setDirection("+")           \n-        iSubMapping.setQueryInterval(iInterval1)\n-        iSubMapping.setTargetInterval(iInterval2)\n-        iSubMapping.setTagValue("identity", 50)\n-        iSubMapping.setSize(10)\n-\n-        iSubMappingCopy = SubMapping()\n-        iSubMappingCopy.copy(iSubMapping)\n-        self.assertEqual(iSubMappingCopy, iSubMapping)\n-        \n-        \n-    def test_setTags(self):\n-        iSubMapping = SubMapping()\n-        iSubMapping.getQueryInterval().setSize(50)\n-        iSubMapping.getTargetInterval().setSize(2)\n-        iSubMapping.setTagValue("identity", 50)\n-        iSubMapping.setSize(10)\n-        \n-        expQueryIntervalSize = 50\n-        expTargetIntervalSize = 2\n-        expTags = {"identity" : 50,\n-                   "nbMismatches" : 5}\n-        \n-        obsTags = iSubMapping.getTags()\n-        self.assertEquals(expTags, obsTags)\n-        \n-        \n-    def test_setIdentity(self):\n-        iSubMapping = SubMapping()        \n-        iSubMapping.setIdentity(10)\n-        expIdentity = 10\n-        expTags = {"identity": 10}\n-        \n-        obsIdentity = iSubMapping.getIdentity()\n-        obsTags = iSubMapping.getTags()\n-        \n-        self.assertEquals(expIdentity,obsIdentity)\n-        self.assertEquals(expTags,obsTags)\n-        \n-        \n-    def test_setIdentity_with_size(self):\n-        iSubMapping = SubMapping()        \n-        iSubMapping.setSize(10)\n-        iSubMapping.setIdentity(50)\n-        \n-        expIdentity = 50\n-        expTags = {"identity" : 50,\n-                   "nbMismatches" : 5}\n-        \n-        obsIdentity = iSubMapping.getIdentity()\n-        obsTags = iSubMapping.getTags()\n-        \n-        self.assertEquals(expIdentity,obsIdentity)\n-        self.assertEquals(expTags,obsTags)\n-        \n-        \n-    def test_setIdentity_with_sizeAndMismatchTag(self):\n-        iSubMapping = SubMapping()        \n-        iSubMapping.setSize(10)\n-        iSubMapping.setTagValue("nbMismatches", 8)\n-        iSubMapping.setIdentity(50)\n-        \n-        expIdentity = 50\n-        expTags = {"identity" : 50,\n-                   "nbMismatches" : 8}\n-        \n-        obsIdentity = iSubMapping.getIdentity()\n-        obsTags = iSubMapping.getTags()\n-        \n-        self.assertEquals(expIdentity,obsIdentity)\n-        self.assertEquals(expTags,obsTags)\n-        \n-        \n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/test/Test_Transcript.py
--- a/SMART/Java/Python/structure/test/Test_Transcript.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,351 +0,0 @@\n-import unittest\n-from SMART.Java.Python.structure.Interval import Interval\n-from SMART.Java.Python.structure.Transcript import Transcript\n-\n-class Test_Transcript(unittest.TestCase):\n-  \n-    def test_getSize(self):\n-        transcript1 = Transcript()\n-        transcript1.setDirection("+")\n-        transcript1.setStart(2000)\n-        transcript1.setEnd(3000)\n-        transcript1.setChromosome("arm_X")\n-    \n-        self.assertEqual(transcript1.getSize(), 1001)\n-      \n-        transcript2 = Transcript()\n-        transcript2.copy(transcript1)\n-        self.assertEqual(transcript1.getSize(), 1001)\n-    \n-        transcript3 = Transcript()\n-        transcript3.setDirection("+")\n-        transcript3.setChromosome("arm_X")\n-    \n-        exon1 = Interval()\n-        exon1.setDirection("+")\n-        exon1.setChromosome("arm_X")\n-        exon1.setStart(100)\n-        exon1.setEnd(200)\n-        transcript3.addExon(exon1)\n-    \n-        exon2 = Interval()\n-        exon2.setDirection("+")\n-        exon2.setChromosome("arm_X")\n-        exon2.setStart(300)\n-        exon2.setEnd(400)\n-        transcript3.addExon(exon2)\n-    \n-        self.assertEqual(transcript3.getSize(), 203)\n-\n-\n-    def test_overlapWithExons(self):\n-        exon1_1 = Interval()\n-        exon1_1.setChromosome("chr1")\n-        exon1_1.setStart(100)\n-        exon1_1.setEnd(200)\n-        exon1_1.setDirection("+")\n-\n-        exon1_2 = Interval()\n-        exon1_2.setChromosome("chr1")\n-        exon1_2.setStart(500)\n-        exon1_2.setEnd(600)\n-        exon1_2.setDirection("+")\n-\n-        transcript1 = Transcript()\n-        transcript1.setChromosome("chr1")\n-        transcript1.setStart(100)\n-        transcript1.setEnd(600)\n-        transcript1.setDirection("+")\n-        transcript1.addExon(exon1_1)\n-        transcript1.addExon(exon1_2)\n-\n-        exon2_1 = Interval()\n-        exon2_1.copy(exon1_1)\n-\n-        transcript2 = Transcript()\n-        transcript2.setChromosome("chr1")\n-        transcript2.setStart(100)\n-        transcript2.setEnd(200)\n-        transcript2.setDirection("+")\n-        transcript2.addExon(exon2_1)\n-    \n-        self.assertTrue(transcript1.overlapWithExon(transcript2))\n-\n-        transcript2.reverse()\n-        try:\n-            self.assertFalse(transcript1.overlapWithExon(transcript2))\n-        except Exception:\n-            pass\n-      \n-        transcript2.reverse()\n-        transcript2.setChromosome("chr2")\n-        self.assertFalse(transcript1.overlapWithExon(transcript2))\n-\n-        exon3_1 = Interval()\n-        exon3_1.copy(exon1_1)\n-        exon3_1.setEnd(400)\n-        exon3_1.setStart(300)\n-\n-        transcript3 = Transcript()\n-        transcript3.setChromosome("chr1")\n-        transcript3.setStart(300)\n-        transcript3.setEnd(400)\n-        transcript3.setDirection("+")\n-        transcript3.addExon(exon3_1)\n-        self.assertFalse(transcript1.overlapWithExon(transcript3))\n-\n-\n-    def test_merge(self):\n-        exon1_1 = Interval()\n-        exon1_1.setChromosome("chr1")\n-        exon1_1.setStart(100)\n-        exon1_1.setEnd(200)\n-        exon1_1.setDirection("+")\n-\n-        exon1_2 = Interval()\n-        exon1_2.setChromosome("chr1")\n-        exon1_2.setStart(500)\n-        exon1_2.setEnd(600)\n-        exon1_2.setDirection("+")\n-\n-        transcript1 = Transcript()\n-        transcript1.setChromosome("chr1")\n-        transcript1.setEnd(600)\n-        transcript1.setStart(100)\n-        transcript1.setDirection("+")\n-        transcript1.addExon(exon1_1)\n-        transcript1.addExon(exon1_2)\n-\n-        exon2_1 = Interval()\n-        exon2_1.copy(exon1_1)\n-\n-        transcript2 = Transcript()\n-        transcript2.setChromosome("chr1")\n-        transcript2.setEnd(200)\n-        transcript2.setStart(100)\n-        transcript2.setDirection("+")\n-        transcript2.addExon(exon2_1)\n-    \n-        transcript1.merge(transcript2)\n-        transcript1.sortExonsIncreasing()\n-        exons = transcript1.getExons()\n-        self.assertEqual'..b'ctStart(301)\n-        exons = transcript1.getExons()\n-        self.assertEqual(len(exons), 2)\n-        exon1, exon2 = exons\n-        self.assertEqual(exon1.getStart(), 100)\n-        self.assertEqual(exon1.getEnd(),   200)\n-        self.assertEqual(exon2.getStart(), 300)\n-        self.assertEqual(exon2.getEnd(),   400)\n-\n-\n-    def test__include(self):\n-        iTranscript1 = Transcript()\n-        iTranscript1.setName("transcript1")\n-        iTranscript1.setChromosome("chr1")\n-        iTranscript1.setStart(100)\n-        iTranscript1.setEnd(200)\n-        iTranscript1.setDirection("+")\n-        \n-        iTranscript2 = Transcript()\n-        iTranscript2.copy(iTranscript1)\n-        iTranscript2.setName("transcript2")\n-        self.assertTrue(iTranscript1.include(iTranscript2))\n-        self.assertTrue(iTranscript2.include(iTranscript1))\n-\n-        iTranscript2.setChromosome("chr2")\n-        self.assertFalse(iTranscript1.include(iTranscript2))\n-        self.assertFalse(iTranscript2.include(iTranscript1))\n-\n-        iTranscript2.setChromosome("chr1")\n-        exon = Interval()\n-        exon.setChromosome("chr1")\n-        exon.setDirection("+")\n-        exon.setStart(300)\n-        exon.setEnd(400)\n-        iTranscript1.addExon(exon)\n-        self.assertTrue(iTranscript1.include(iTranscript2))\n-        self.assertFalse(iTranscript2.include(iTranscript1))\n-        \n-        exon = Interval()\n-        exon.setChromosome("chr1")\n-        exon.setDirection("+")\n-        exon.setStart(500)\n-        exon.setEnd(600)\n-        iTranscript2.addExon(exon)\n-        self.assertFalse(iTranscript1.include(iTranscript2))\n-        self.assertFalse(iTranscript2.include(iTranscript1))\n-        \n-\n-    def test__getDifference(self):\n-        iTranscript1 = Transcript()\n-        iTranscript1.setName("transcript1")\n-        iTranscript1.setChromosome("chr1")\n-        iTranscript1.setStart(100)\n-        iTranscript1.setEnd(400)\n-        iTranscript1.setDirection("+")\n-        \n-        iTranscript2 = Transcript()\n-        iTranscript2.setName("transcript1")\n-        iTranscript2.setChromosome("chr1")\n-        iTranscript2.setStart(200)\n-        iTranscript2.setEnd(400)\n-        iTranscript2.setDirection("+")\n-\n-        newTranscript = iTranscript1.getDifference(iTranscript2)\n-        self.assertTrue(newTranscript.getStart(), 100)\n-        self.assertTrue(newTranscript.getEnd(),   199)\n-        exons = newTranscript.getExons()\n-        self.assertTrue(len(exons), 1)\n-        exon1 = exons[0]\n-        self.assertTrue(exon1.getStart(), 100)\n-        self.assertTrue(exon1.getEnd(),   199)\n-\n-        iTranscript2 = Transcript()\n-        iTranscript2.setName("transcript1")\n-        iTranscript2.setChromosome("chr1")\n-        iTranscript2.setStart(100)\n-        iTranscript2.setEnd(200)\n-        iTranscript2.setDirection("+")\n-\n-        newTranscript = iTranscript1.getDifference(iTranscript2)\n-        self.assertTrue(newTranscript.getStart(), 201)\n-        self.assertTrue(newTranscript.getEnd(),   400)\n-        exons = newTranscript.getExons()\n-        self.assertTrue(len(exons), 1)\n-        exon1 = exons[0]\n-        self.assertTrue(exon1.getStart(), 201)\n-        self.assertTrue(exon1.getEnd(),   400)\n-\n-        iTranscript2 = Transcript()\n-        iTranscript2.setName("transcript1")\n-        iTranscript2.setChromosome("chr1")\n-        iTranscript2.setStart(200)\n-        iTranscript2.setEnd(300)\n-        iTranscript2.setDirection("+")\n-\n-        newTranscript = iTranscript1.getDifference(iTranscript2)\n-        self.assertTrue(newTranscript.getStart(), 100)\n-        self.assertTrue(newTranscript.getEnd(),   400)\n-        exons = newTranscript.getExons()\n-        self.assertTrue(len(exons), 2)\n-        exon1, exon2 = exons\n-        self.assertTrue(exon1.getStart(), 100)\n-        self.assertTrue(exon1.getEnd(),   199)\n-        self.assertTrue(exon2.getStart(), 301)\n-        self.assertTrue(exon2.getEnd(),   400)\n-\n-\n-if __name__ == \'__main__\':\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/structure/test/Test_TranscriptListsComparator.py
--- a/SMART/Java/Python/structure/test/Test_TranscriptListsComparator.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,262 +0,0 @@\n-import os\n-import unittest\n-from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator\n-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from commons.core.parsing.GffParser import GffParser\n-from commons.core.utils.FileUtils import FileUtils\n-\n-SMART_PATH = os.environ["REPET_PATH"] + "/SMART"\n-\n-class Test_TranscriptListsComparator(unittest.TestCase):\n-    \n-\n-    def test_compareTranscriptList(self):\n-        container1 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList1.bed" % SMART_PATH, "bed", 0)\n-        container2 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptList2.bed" % SMART_PATH, "bed", 0)\n-        outputContainer = "output.gff3"\n-        comparator = TranscriptListsComparator(None, 0)\n-        comparator.computeOdds(True)\n-        comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n-        comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n-        comparator.setOutputWriter(Gff3Writer(outputContainer, 0))\n-        comparator.compareTranscriptList()\n-        parser = GffParser("output.gff3", 0)\n-        self.assertEqual(parser.getNbTranscripts(), 2)\n-        cpt = 0\n-        for transcript in parser.getIterator():\n-            if cpt == 0:\n-                self.assertEqual(transcript.getChromosome(), "arm_X")\n-                self.assertEqual(transcript.getStart(), 1000)\n-                self.assertEqual(transcript.getEnd(), 1999)\n-                self.assertEqual(transcript.getDirection(), 1)\n-            elif cpt == 1:\n-                self.assertEqual(transcript.getChromosome(), "arm_X")\n-                self.assertEqual(transcript.getStart(), 1000)\n-                self.assertEqual(transcript.getEnd(), 1999)\n-                self.assertEqual(transcript.getDirection(), -1)\n-            cpt += 1\n-\n-\n-    def test_compareTranscriptListDistanceSimple(self):\n-        container1 = TranscriptContainer("%s/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple1.gff3" % SMART_PATH, "gff", 0)\n-        container2 = TranscriptContainer("%s/Java/Python/TestFiles/testCompareTranscriptListDistanceSimple2.gff3" % SMART_PATH, "gff", 0)\n-\n-        comparator = TranscriptListsComparator(None, 0)\n-        comparator.computeOdds(True)\n-        comparator.setMaxDistance(1000)\n-        comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n-        comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n-        distances = comparator.compareTranscriptListDistance()\n-\n-        self.assertEqual(distances, {0: 1})\n-\n-        comparator = TranscriptListsComparator(None, 0)\n-        comparator.computeOdds(True)\n-        comparator.setMaxDistance(1000)\n-        comparator.setInputTranscriptContainer(comparator.QUERY, container2)\n-        comparator.setInputTranscriptContainer(comparator.REFERENCE, container1)\n-        distances = comparator.compareTranscriptListDistance()\n-\n-        self.assertEqual(distances, {0: 1, -1000: 1})\n-\n-\n-    def test_compareTranscriptListDistanceAntisense(self):\n-        container1 = TranscriptContainer("%s/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense1.gff3" % SMART_PATH, "gff", 0)\n-        container2 = TranscriptContainer("%s/Java/Python/TestFiles/testCompareTranscriptListDistanceAntisense2.gff3" % SMART_PATH, "gff", 0)\n-\n-        comparator = TranscriptListsComparator(None, 0)\n-        comparator.computeOdds(True)\n-        comparator.setMaxDistance(10000)\n-        comparator.getAntisenseOnly(True)\n-        comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n-        comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n-        distances = comparator.compareTranscriptListDistance()\n-\n-        self.assertEqual(distances, {1000: 1})\n-\n-\n-\n-    def '..b'orCompareTranscriptListSelfMergeDifferentClusters1.bed" % SMART_PATH, "bed", 0)\n-        comparator = TranscriptListsComparator(None, 0)\n-        comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n-        comparator.setOutputWriter(Gff3Writer("output.gff3", 0))\n-        comparator.compareTranscriptListSelfMerge()\n-\n-        parser = GffParser("output.gff3", 0)\n-        self.assertEquals(parser.getNbTranscripts(), 1)\n-        for transcript in parser.getIterator():\n-            self.assertEqual(transcript.getChromosome(), "arm_X")\n-            self.assertEqual(transcript.getStart(), 100)\n-            self.assertEqual(transcript.getEnd(), 100099)\n-            self.assertEqual(transcript.getDirection(), 1)\n-            self.assertEqual(transcript.getNbExons(), 1)\n-            self.assertEqual(transcript.getSize(), 100000)\n-\n-\n-    def test_compareTranscriptListgetDifferenceTranscriptList(self):\n-        container1 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference1.gff3" % SMART_PATH, "gff", 0)\n-        container2 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference2.gff3" % SMART_PATH, "gff", 0)\n-\n-        comparator = TranscriptListsComparator(None, 0)\n-        comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n-        comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n-        comparator.setOutputWriter(Gff3Writer("output.gff3", 0))\n-        comparator.getDifferenceTranscriptList()\n-\n-        parser = GffParser("output.gff3", 0)\n-        self.assertEqual(parser.getNbTranscripts(), 1)\n-        for transcript in parser.getIterator():\n-            self.assertEqual(transcript.getChromosome(), "arm_X")\n-            self.assertEqual(transcript.getStart(), 1000)\n-            self.assertEqual(transcript.getEnd(), 4000)\n-            self.assertEqual(transcript.getDirection(), 1)\n-            self.assertEqual(transcript.getNbExons(), 2)\n-            exon1, exon2 = transcript.getExons()\n-            self.assertEqual(exon1.getStart(), 1000)\n-            self.assertEqual(exon1.getEnd(), 1999)\n-            self.assertEqual(exon2.getStart(), 3001)\n-            self.assertEqual(exon2.getEnd(), 4000)\n-\n-\n-\n-    def test_compareTranscriptListgetDifferenceTranscriptListSplit(self):\n-        container1 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference1.gff3" % SMART_PATH, "gff", 0)\n-        container2 = TranscriptContainer("%s/Java/Python/TestFiles/testTranscriptListsComparatorCompareTranscriptListGetDifference2.gff3" % SMART_PATH, "gff", 0)\n-\n-        comparator = TranscriptListsComparator(None, 0)\n-        comparator.setInputTranscriptContainer(comparator.QUERY, container1)\n-        comparator.setInputTranscriptContainer(comparator.REFERENCE, container2)\n-        comparator.setSplitDifference(True)\n-        comparator.setOutputWriter(Gff3Writer("output.gff3", 0))\n-        comparator.getDifferenceTranscriptList()\n-\n-        parser = GffParser("output.gff3", 0)\n-        self.assertEqual(parser.getNbTranscripts(), 2)\n-        for id, transcript in enumerate(parser.getIterator()):\n-            if id == 0:\n-                self.assertEqual(transcript.getChromosome(), "arm_X")\n-                self.assertEqual(transcript.getStart(), 1000)\n-                self.assertEqual(transcript.getEnd(), 1999)\n-                self.assertEqual(transcript.getDirection(), 1)\n-                self.assertEqual(transcript.getNbExons(), 1)\n-            else:\n-                self.assertEqual(transcript.getChromosome(), "arm_X")\n-                self.assertEqual(transcript.getStart(), 3001)\n-                self.assertEqual(transcript.getEnd(), 4000)\n-                self.assertEqual(transcript.getDirection(), 1)\n-                self.assertEqual(transcript.getNbExons(), 1)\n-\n-\n-if __name__ == \'__main__\':\n-        unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test.gff3
--- a/SMART/Java/Python/test.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,100000 +0,0 @@\n-chr1\tS-MART\ttranscript\t13\t33\t21\t-\t.\tbestRegion=(self);nbGaps=0;nbMismatches=0;ID=HWUSI-EAS454_0005:1:29:15426:13405#0/1;identity=100;Name=HWUSI-EAS454_0005:1:29:15426:13405#0/1\n-chr1\tS-MART\ttranscript\t14\t37\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:36:917:291#0;identity=100;Name=HWUSI-EAS454_0001:6:36:917:291#0\n-chr1\tS-MART\ttranscript\t14\t36\t23\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:39:1005:1200#0;identity=100;Name=HWUSI-EAS454_0001:6:39:1005:1200#0\n-chr1\tS-MART\ttranscript\t15\t38\t24\t-\t.\tquality=25;nbElements=3.000000;ID=HWUSI-EAS454_0005:1:88:8852:4891#0;Name=HWUSI-EAS454_0005:1:88:8852:4891#0--HWUSI-EAS454_0005:1:16:12336:7772#0--HWUSI-EAS454_0001:6:29:1176\n-chr1\tS-MART\ttranscript\t16\t39\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:12:605:1006#0;identity=100;Name=HWUSI-EAS454_0001:6:12:605:1006#0\n-chr1\tS-MART\ttranscript\t16\t38\t23\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:41:570:1349#0;identity=100;Name=HWUSI-EAS454_0001:6:41:570:1349#0\n-chr1\tS-MART\ttranscript\t17\t40\t24\t-\t.\tquality=37;nbElements=11.000000;ID=HWUSI-EAS454_0005:1:81:1819:13108#0;Name=HWUSI-EAS454_0005:1:81:1819:13108#0--HWUSI-EAS454_0005:1:75:6916:8155#0--HWUSI-EAS454_0005:1:67:2702\n-chr1\tS-MART\ttranscript\t17\t39\t23\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:90:6409:15569#0;identity=100;Name=HWUSI-EAS454_0005:1:90:6409:15569#0\n-chr1\tS-MART\ttranscript\t18\t41\t24\t-\t.\tquality=25;nbElements=11.000000;ID=HWUSI-EAS454_0005:1:8:14764:13869#0;Name=HWUSI-EAS454_0005:1:8:14764:13869#0--HWUSI-EAS454_0005:1:80:16600:20813#0--HWUSI-EAS454_0005:1:68:16\n-chr1\tS-MART\ttranscript\t19\t42\t24\t-\t.\tquality=37;nbElements=2.000000;ID=HWUSI-EAS454_0005:1:55:7969:5875#0;Name=HWUSI-EAS454_0005:1:55:7969:5875#0--HWUSI-EAS454_0001:6:20:1413:2018#0\n-chr1\tS-MART\ttranscript\t20\t42\t23\t-\t.\tquality=25;nbElements=2.000000;ID=HWUSI-EAS454_0005:1:8:12144:21397#0;Name=HWUSI-EAS454_0005:1:8:12144:21397#0--HWUSI-EAS454_0005:1:81:7335:14824#0\n-chr1\tS-MART\ttranscript\t20\t42\t23\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:117:3168:18210#0;identity=100;Name=HWUSI-EAS454_0005:1:117:3168:18210#0\n-chr1\tS-MART\ttranscript\t21\t44\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:71:270:748#0;identity=100;Name=HWUSI-EAS454_0001:6:71:270:748#0\n-chr1\tS-MART\ttranscript\t21\t41\t21\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:70:1405:628#0;identity=100;Name=HWUSI-EAS454_0001:6:70:1405:628#0\n-chr1\tS-MART\ttranscript\t22\t45\t24\t-\t.\tquality=37;nbElements=4.000000;ID=HWUSI-EAS454_0005:1:5:3208:12720#0;Name=HWUSI-EAS454_0005:1:5:3208:12720#0--HWUSI-EAS454_0001:6:77:795:914#0--HWUSI-EAS454_0001:6:67:1386:90\n-chr1\tS-MART\ttranscript\t23\t46\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:72:315:1393#0;identity=100;Name=HWUSI-EAS454_0001:6:72:315:1393#0\n-chr1\tS-MART\ttranscript\t23\t45\t23\t-\t.\tquality=37;nbElements=7.000000;ID=HWUSI-EAS454_0005:1:78:14145:12130#0;Name=HWUSI-EAS454_0005:1:78:14145:12130#0--HWUSI-EAS454_0005:1:54:2352:2026#0--HWUSI-EAS454_0005:1:23:106\n-chr1\tS-MART\ttranscript\t24\t48\t25\t-\t.\toccurrence=1;quality=25;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWUSI-EAS454_0001:6:57:318:773#0;identity=96;Name=HWUSI-EAS454_0001:6:57:318:773#0\n-chr1\tS-MART\ttranscript\t24\t47\t24\t-\t.\tquality=25;nbElements=20.000000;ID=HWUSI-EAS454_0005:1:95:13627:10660#0;Name=HWUSI-EAS454_0005:1:95:13627:10660#0--HWUSI-EAS454_0005:1:31:5776:16476#0--HWUSI-EAS454_0005:1:31:'..b'ART\ttranscript\t9315588\t9315612\t25\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:20:1492:802#0;identity=100;Name=HWUSI-EAS454_0001:6:20:1492:802#0\n-chr1\tS-MART\ttranscript\t9315654\t9315680\t27\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:5:3941:11137#0;identity=100;Name=HWUSI-EAS454_0005:1:5:3941:11137#0\n-chr1\tS-MART\ttranscript\t9315856\t9315882\t27\t-\t.\toccurrence=1;quality=25;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWUSI-EAS454_0005:1:119:16446:8333#0;identity=96;Name=HWUSI-EAS454_0005:1:119:16446:8333#0\n-chr1\tS-MART\ttranscript\t9315864\t9315883\t20\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:4:1116:1279#0;identity=100;Name=HWUSI-EAS454_0001:6:4:1116:1279#0\n-chr1\tS-MART\ttranscript\t9316130\t9316160\t31\t-\t.\toccurrence=1;quality=25;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWUSI-EAS454_0005:1:57:2462:8082#0;identity=96;Name=HWUSI-EAS454_0005:1:57:2462:8082#0\n-chr1\tS-MART\ttranscript\t9316130\t9316149\t20\t-\t.\toccurrence=1;quality=23;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWUSI-EAS454_0001:6:63:152:1917#0-1;identity=100;Name=HWUSI-EAS454_0001:6:63:152:1917#0\n-chr1\tS-MART\ttranscript\t9316240\t9316263\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:64:1494:1889#0;identity=100;Name=HWUSI-EAS454_0001:6:64:1494:1889#0\n-chr1\tS-MART\ttranscript\t9316788\t9316808\t21\t-\t.\tnbElements=2.000000;ID=HWUSI-EAS454_0005:1:37:6611:7504#0/1;Name=HWUSI-EAS454_0005:1:37:6611:7504#0/1--HWUSI-EAS454_0005:1:32:15853:3213#0/1\n-chr1\tS-MART\ttranscript\t9317112\t9317135\t24\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:5:14934:13715#0;identity=100;Name=HWUSI-EAS454_0005:1:5:14934:13715#0\n-chr1\tS-MART\ttranscript\t9318166\t9318186\t21\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:78:8582:8151#0;identity=100;Name=HWUSI-EAS454_0005:1:78:8582:8151#0\n-chr1\tS-MART\ttranscript\t9318484\t9318504\t21\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:9:894:642#0;identity=100;Name=HWUSI-EAS454_0001:6:9:894:642#0\n-chr1\tS-MART\ttranscript\t9318602\t9318621\t20\t-\t.\toccurrence=1;quality=0;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=HWUSI-EAS454_0005:1:40:5793:3039#0-1;identity=95;Name=HWUSI-EAS454_0005:1:40:5793:3039#0\n-chr1\tS-MART\ttranscript\t9319135\t9319156\t22\t-\t.\toccurrence=1;quality=0;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWUSI-EAS454_0005:1:30:14232:2440#0-1;identity=100;Name=HWUSI-EAS454_0005:1:30:14232:2440#0\n-chr1\tS-MART\ttranscript\t9321769\t9321794\t26\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0001:6:81:1482:1344#0;identity=100;Name=HWUSI-EAS454_0001:6:81:1482:1344#0\n-chr1\tS-MART\ttranscript\t9322055\t9322078\t24\t+\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:54:17933:11240#0;identity=100;Name=HWUSI-EAS454_0005:1:54:17933:11240#0\n-chr1\tS-MART\ttranscript\t9322742\t9322762\t21\t-\t.\tbestRegion=(self);nbGaps=0;nbMismatches=3;ID=HWUSI-EAS454_0005:1:39:12064:3568#0/1;identity=85;Name=HWUSI-EAS454_0005:1:39:12064:3568#0/1\n-chr1\tS-MART\ttranscript\t9324388\t9324411\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:64:8973:18503#0;identity=100;Name=HWUSI-EAS454_0005:1:64:8973:18503#0\n-chr1\tS-MART\ttranscript\t9325384\t9325407\t24\t-\t.\toccurrence=1;quality=37;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWUSI-EAS454_0005:1:54:2957:1577#0;identity=100;Name=HWUSI-EAS454_0005:1:54:2957:1577#0\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test.pdf
b
Binary file SMART/Java/Python/test.pdf has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test.png
b
Binary file SMART/Java/Python/test.png has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/MockGetLetterDistribution.py
--- a/SMART/Java/Python/test/MockGetLetterDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,69 +0,0 @@
-class MockGetLetterDistributionFasta (object) :
- def write(self,inFileName):
- f = open(inFileName,'w')
- f.write('>HWI-EAS337_3:7:1:415:1217/1\n')
- f.write('GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n')
- f.write('>HWI-EAS337_3:7:1:208:1489/1\n')
- f.write('GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n')
- f.write('>HWI-EAS337_3:7:1:278:1153\n')
- f.write('GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n')
- f.close()
-
-class MockGetLetterDistributionFastq (object) :
- def write(self,inFileName):
- f = open(inFileName,'w')
- f.write('@HWI-EAS337_3:7:1:415:1217/1\n')
- f.write('GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n')
- f.write('+HWI-EAS337_3:7:1:415:1217/1\n')
- f.write('WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQ\n')
- f.write('@HWI-EAS337_3:7:1:208:1489/1\n')
- f.write('GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n')
- f.write('+HWI-EAS337_3:7:1:208:1489/1\n')
- f.write('WWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU\n')
- f.write('@HWI-EAS337_3:7:1:278:1153/1\n')
- f.write('GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n')
- f.write('+HWI-EAS337_3:7:1:278:1153/1\n')
- f.write('WWWWWWWWWWWWWWWWWWWWWWVWVVVWWVUUUUUR\n')
- f.close()
-
-class MockGetLetterDistributionExpectedCSV (object) :
- def write(self,inFileName):
- f = open(inFileName,'w')
- f.write('1;A=0%;C=0%;T=0%;G=0%;\n')
- f.write('2;A=0%;C=0%;T=0%;G=100.00%;\n')
- f.write('3;A=66.67%;C=0%;T=0%;G=33.33%;\n')
- f.write('4;A=33.33%;C=0%;T=33.33%;G=33.33%;\n')
- f.write('5;A=66.67%;C=0%;T=0%;G=33.33%;\n')
- f.write('6;A=66.67%;C=0%;T=33.33%;G=0%;\n')
- f.write('7;A=33.33%;C=33.33%;T=0%;G=33.33%;\n')
- f.write('8;A=66.67%;C=33.33%;T=0%;G=0%;\n')
- f.write('9;A=33.33%;C=33.33%;T=33.33%;G=0%;\n')
- f.write('10;A=33.33%;C=33.33%;T=0%;G=33.33%;\n')
- f.write('11;A=33.33%;C=0%;T=66.67%;G=0%;\n')
- f.write('12;A=0%;C=33.33%;T=0%;G=66.67%;\n')
- f.write('13;A=0%;C=33.33%;T=66.67%;G=0%;\n')
- f.write('14;A=66.67%;C=0%;T=33.33%;G=0%;\n')
- f.write('15;A=33.33%;C=33.33%;T=33.33%;G=0%;\n')
- f.write('16;A=33.33%;C=0%;T=66.67%;G=0%;\n')
- f.write('17;A=33.33%;C=33.33%;T=33.33%;G=0%;\n')
- f.write('18;A=100.00%;C=0%;T=0%;G=0%;\n')
- f.write('19;A=66.67%;C=33.33%;T=0%;G=0%;\n')
- f.write('20;A=33.33%;C=0%;T=33.33%;G=33.33%;\n')
- f.write('21;A=33.33%;C=66.67%;T=0%;G=0%;\n')
- f.write('22;A=33.33%;C=33.33%;T=0%;G=33.33%;\n')
- f.write('23;A=0%;C=0%;T=66.67%;G=33.33%;\n')
- f.write('24;A=0%;C=0%;T=33.33%;G=66.67%;\n')
- f.write('25;A=66.67%;C=0%;T=0%;G=33.33%;\n')
- f.write('26;A=33.33%;C=33.33%;T=0%;G=33.33%;\n')
- f.write('27;A=66.67%;C=0%;T=33.33%;G=0%;\n')
- f.write('28;A=66.67%;C=0%;T=0%;G=33.33%;\n')
- f.write('29;A=33.33%;C=33.33%;T=33.33%;G=0%;\n')
- f.write('30;A=66.67%;C=33.33%;T=0%;G=0%;\n')
- f.write('31;A=66.67%;C=0%;T=33.33%;G=0%;\n')
- f.write('32;A=0%;C=33.33%;T=33.33%;G=33.33%;\n')
- f.write('33;A=33.33%;C=0%;T=33.33%;G=33.33%;\n')
- f.write('34;A=33.33%;C=66.67%;T=0%;G=0%;\n')
- f.write('35;A=0%;C=0%;T=66.67%;G=33.33%;\n')
- f.write('36;A=0%;C=0%;T=33.33%;G=66.67%;\n')
- f.close()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_Clusterize.py
--- a/SMART/Java/Python/test/Test_F_Clusterize.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,119 +0,0 @@\n-import unittest\n-import os, os.path\n-from optparse import OptionParser\n-from SMART.Java.Python.misc import Utils\n-from SMART.Java.Python.clusterize import Clusterize\n-#TODO: test for n option when option corrected\n-\n-SMART_PATH = os.environ["REPET_PATH"] + "/SMART"\n-\n-class Test_F_Clusterize(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._inputFileName     = "inputFileTest.bed"\n-        self._expOutputFileName = "expOutput.gff3"\n-        self._outputFileName    = "output.gff3"\n-        self._writeInputFile()\n-        \n-    def tearDown(self):\n-        for file in (self._inputFileName, self._expOutputFileName, self._outputFileName):\n-            if os.path.exists(file):\n-                os.remove(file)\n-\n-    def test_run_2DifferentStrand(self):\n-        self._writeOutputFile_2DiffStrand(self._expOutputFileName)\n-        \n-        args = ["-i", self._inputFileName, "-f", "bed", "-o", self._outputFileName, "-v", "0"]\n-        parser = OptionParser()\n-        parser.add_option("-i", "--input",          dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")\n-        parser.add_option("-f", "--format",         dest="format",         action="store",                     type="string", help="format of file [format: transcript file format]")\n-        parser.add_option("-o", "--output",         dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in GFF3 format]")\n-        parser.add_option("-c", "--colinear",       dest="colinear",       action="store_true", default=False,                help="merge colinear transcripts only [format: bool] [default: false]")\n-        parser.add_option("-d", "--distance",       dest="distance",       action="store",      default=0,     type="int",    help="max. distance between two transcripts to be merged [format: int] [default: 0]")\n-        parser.add_option("-n", "--normalize",      dest="normalize",      action="store_true", default=False,                help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")\n-        parser.add_option("-v", "--verbosity",      dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")\n-        (options, args) = parser.parse_args(args)\n-        iClusterize = Clusterize(options.verbosity)\n-        iClusterize.setInputFile(options.inputFileName, options.format)\n-        iClusterize.setOutputFileName(options.outputFileName)\n-        iClusterize.setColinear(options.colinear)\n-        iClusterize.setDistance(options.distance)\n-        iClusterize.setNormalize(options.normalize)\n-        iClusterize.run()\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-\n-    def test_runAsScript_2DifferentStrand(self):\n-        self._writeOutputFile_2DiffStrand(self._expOutputFileName)\n-        os.system("python %s/Java/Python/clusterize.py -i %s -f bed -o %s -v 0" % (SMART_PATH, self._inputFileName, self._outputFileName))\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-        \n-    def test_runAsScript_2DifferentStrand_map_output(self):\n-        self._expOutputFileName = "expOutput.map"\n-        self._outputFileName    = "output.map"\n-        self._writeOutputFile_2DiffStrandMapFormat(self._expOutputFileName)\n-        os.system("python %s/Java/Python/clusterize.py -i %s -f bed -o %s -u map -v 0" % (SMART_PATH, self._inputFileName, self._outputFileName))\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-        \n-    def test_runAsScript_2SameStrand(self):\n-        self._writeOutputFile_2SameStrand(self._expOutputFileName)\n-        os.system("python %s/Java/Python/clusterize.py -i %s -f bed -o %s -c -v 0" % (SMART_PATH, self._inputFileName, self._outputFileNam'..b'f.write("arm_X\\t10000100\\t10000200\\ttest1.1\\t100\\t+\\t10000100\\t10000200\\t0\\t1\\t100,\\t0,\\n")\n-        f.write("arm_X\\t10000100\\t10000200\\ttest1.2\\t100\\t-\\t10000100\\t10000200\\t0\\t1\\t100,\\t0,\\n")\n-        f.write("arm_2R\\t10000100\\t10000200\\ttest1.3\\t100\\t+\\t10000100\\t10000200\\t0\\t1\\t100,\\t0,\\n")\n-        f.write("arm_X\\t10000000\\t10000100\\ttest1.4\\t100\\t+\\t10000000\\t10000100\\t0\\t1\\t100,\\t0,\\n")\n-        f.write("arm_X\\t10000200\\t10000300\\ttest1.5\\t100\\t+\\t10000200\\t10000300\\t0\\t1\\t100,\\t0,\\n")\n-        f.write("arm_X\\t9999900\\t9999950\\ttest1.6\\t100\\t+\\t9999900\\t9999950\\t0\\t1\\t50,\\t0,\\n")\n-        f.write("arm_X\\t10000000\\t10000050\\ttest1.7\\t100\\t-\\t10000000\\t10000050\\t0\\t1\\t50,\\t0,\\n")\n-        f.close()\n-        \n-    def _writeOutputFile_2DiffStrand(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_2R\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t9999900\\t9999949\\t.\\t+\\t.\\tID=test1.6;Name=test1.6\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t10000000\\t10000099\\t.\\t-\\t.\\tnbElements=2.000000;ID=test1.7--test1.4;Name=test1.7--test1.4\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t-\\t.\\tnbElements=2.000000;ID=test1.2--test1.1;Name=test1.2--test1.1\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t10000200\\t10000299\\t.\\t+\\t.\\tID=test1.5;Name=test1.5\\n")\n-        f.close()\n-        \n-    def _writeOutputFile_2SameStrand(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_2R\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.3;Name=test1.3\\n") \n-        f.write("arm_X\\tS-MART\\ttranscript\\t9999900\\t9999949\\t.\\t+\\t.\\tID=test1.6;Name=test1.6\\n") \n-        f.write("arm_X\\tS-MART\\ttranscript\\t10000000\\t10000099\\t.\\t+\\t.\\tID=test1.4;Name=test1.4\\n") \n-        f.write("arm_X\\tS-MART\\ttranscript\\t10000000\\t10000049\\t.\\t-\\t.\\tID=test1.7;Name=test1.7\\n") \n-        f.write("arm_X\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.1;Name=test1.1\\n") \n-        f.write("arm_X\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t-\\t.\\tID=test1.2;Name=test1.2\\n") \n-        f.write("arm_X\\tS-MART\\ttranscript\\t10000200\\t10000299\\t.\\t+\\t.\\tID=test1.5;Name=test1.5\\n")\n-        f.close()\n-        \n-    def _writeOutputFile_distance(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_2R\\tS-MART\\ttranscript\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t9999900\\t9999949\\t.\\t+\\t.\\tID=test1.6;Name=test1.6\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t10000000\\t10000299\\t.\\t+\\t.\\tnbElements=5.000000;ID=test1.5--test1.2--test1.1--test1.7--test1.4;Name=test1.5--test1.2--test1.1--test1.7--test1.4\\n")\n-        f.write("arm_X\\tS-MART\\texon\\t10000000\\t10000099\\t.\\t+\\t.\\tID=test1.5--test1.2--test1.1--test1.7--test1.4-exon1;Name=test1.5--test1.2--test1.1--test1.7--test1.4-exon1;Parent=test1.5--test1.2--test1.1--test1.7--test1.4\\n")\n-        f.write("arm_X\\tS-MART\\texon\\t10000100\\t10000199\\t.\\t+\\t.\\tID=test1.5--test1.2--test1.1--test1.7--test1.4-exon2;Name=test1.5--test1.2--test1.1--test1.7--test1.4-exon2;Parent=test1.5--test1.2--test1.1--test1.7--test1.4\\n")\n-        f.write("arm_X\\tS-MART\\texon\\t10000200\\t10000299\\t.\\t+\\t.\\tID=test1.5--test1.2--test1.1--test1.7--test1.4-exon3;Name=test1.5--test1.2--test1.1--test1.7--test1.4-exon3;Parent=test1.5--test1.2--test1.1--test1.7--test1.4\\n")\n-        f.close()\n-        \n-    def _writeOutputFile_2DiffStrandMapFormat(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("test1.3\\tarm_2R\\t10000100\\t10000200\\n")\n-        f.write("test1.6\\tarm_X\\t9999900\\t9999950\\n")\n-        f.write("test1.7--test1.4\\tarm_X\\t10000000\\t10000100\\n")\n-        f.write("test1.2--test1.1\\tarm_X\\t10000100\\t10000200\\n")\n-        f.write("test1.5\\tarm_X\\t10000200\\t10000300\\n")\n-        f.close()\n-                \n-\n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_ClusterizeByTags.py
--- a/SMART/Java/Python/test/Test_F_ClusterizeByTags.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,144 +0,0 @@\n-import unittest, os, os.path\n-from SMART.Java.Python.misc import Utils\n-from SMART.Java.Python.ClusterizeByTags import ClusterizeByTags\n-\n-class Test_F_ClusterizeByTags(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._inputFileName     = "inputFileTest.gff3"\n-        self._expOutputFileName = "expOutput.gff3"\n-        self._outputFileName    = "output.gff3"\n-        \n-    def tearDown(self):\n-        for file in (self._inputFileName, self._expOutputFileName, self._outputFileName):\n-            if os.path.exists(file):\n-                os.remove(file)\n-\n-    def test_diff_simple(self):\n-        handle = open(self._inputFileName, "w")\n-        handle.write("""chr1\\tS-MART\\ttest1.1\\t100\\t200\\t.\\t+\\t.\\tName=test1.1;score=10;ID=test1.1\n-chr1\\tS-MART\\ttest1.2\\t300\\t400\\t.\\t+\\t.\\tName=test1.2;score=15;ID=test1.2\n-chr1\\tS-MART\\ttest1.3\\t500\\t600\\t.\\t+\\t.\\tName=test1.3;score=15;ID=test1.3\n-chr1\\tS-MART\\ttest1.4\\t700\\t800\\t.\\t+\\t.\\tName=test1.4;score=100;ID=test1.4\n-chr1\\tS-MART\\ttest1.5\\t900\\t1000\\t.\\t+\\t.\\tName=test1.5;score=110;ID=test1.5\n-chr1\\tS-MART\\ttest1.6\\t1100\\t1200\\t.\\t+\\t.\\tName=test1.6;score=105;ID=test1.6\n-""")\n-        handle.close()\n-        handle = open(self._expOutputFileName, "w")\n-        handle.write("""chr1\tS-MART\ttest1.1\t100\t600\t40\t+\t.\tnbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3\n-chr1\tS-MART\texon\t100\t200\t10\t+\t.\tID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1\n-chr1\tS-MART\texon\t300\t400\t15\t+\t.\tID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1\n-chr1\tS-MART\texon\t500\t600\t15\t+\t.\tID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1\n-chr1\tS-MART\ttest1.4\t700\t1200\t315\t+\t.\tnbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6\n-chr1\tS-MART\texon\t700\t800\t100\t+\t.\tID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4\n-chr1\tS-MART\texon\t900\t1000\t110\t+\t.\tID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4\n-chr1\tS-MART\texon\t1100\t1200\t105\t+\t.\tID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4\n-""")\n-        handle.close()\n-        cbt = ClusterizeByTags(0)\n-        cbt.setInputFile(self._inputFileName, "gff3")\n-        cbt.setOutputFile(self._outputFileName)\n-        cbt.setTag("score", None)\n-        cbt.setThreshold(20)\n-        cbt.setOperation("diff")\n-        cbt.setMaxDistance(None)\n-        cbt.run()\n-        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))\n-\n-    def test_diff_two_strands(self):\n-        handle = open(self._inputFileName, "w")\n-        handle.write("""chr1\\tS-MART\\ttest1.1\\t100\\t200\\t.\\t+\\t.\\tName=test1.1;score=10;ID=test1.1\n-chr1\\tS-MART\\ttest1.2\\t300\\t400\\t.\\t+\\t.\\tName=test1.2;score=15;ID=test1.2\n-chr1\\tS-MART\\ttest1.3\\t500\\t600\\t.\\t+\\t.\\tName=test1.3;score=15;ID=test1.3\n-chr1\\tS-MART\\ttest1.4\\t700\\t800\\t.\\t-\\t.\\tName=test1.4;score=10;ID=test1.4\n-chr1\\tS-MART\\ttest1.5\\t900\\t1000\\t.\\t-\\t.\\tName=test1.5;score=15;ID=test1.5\n-chr1\\tS-MART\\ttest1.6\\t1100\\t1200\\t.\\t-\\t.\\tName=test1.6;score=15;ID=test1.6\n-""")\n-        handle.close()\n-        handle = open(self._expOutputFileName, "w")\n-        handle.write("""chr1\tS-MART\ttest1.4\t700\t1200\t40\t-\t.\tnbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6\n-chr1\tS-MART\texon\t700\t800\t10\t-\t.\tID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4\n-chr1\tS-MART\texon\t900\t1000\t15\t-\t.\tID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4\n-chr1\tS-MART\texon\t1100\t1200\t15\t-\t.\tID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4\n-chr1\tS-MART\ttest1.1\t100\t600\t40\t+\t.\tnbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3\n-chr1\tS-MART\texon\t100\t200\t10\t+\t.\tID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1\n-chr1\tS-MART\texon\t300\t400\t15\t+\t.\tID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1\n-chr1\tS-MART\texon\t500\t600\t15\t+\t.\tID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1\n-""")\n-        handle.cl'..b'tance(None)\n-        cbt.run()\n-        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))\n-    \n-    def test_diff_one_strands(self):\n-        handle = open(self._inputFileName, "w")\n-        handle.write("""chr1\\tS-MART\\ttest1.1\\t100\\t200\\t.\\t+\\t.\\tName=test1.1;score=10;ID=test1.1\n-chr1\\tS-MART\\ttest1.2\\t300\\t400\\t.\\t+\\t.\\tName=test1.2;score=15;ID=test1.2\n-chr1\\tS-MART\\ttest1.3\\t500\\t600\\t.\\t+\\t.\\tName=test1.3;score=15;ID=test1.3\n-chr1\\tS-MART\\ttest1.4\\t700\\t800\\t.\\t-\\t.\\tName=test1.4;score=10;ID=test1.4\n-chr1\\tS-MART\\ttest1.5\\t900\\t1000\\t.\\t-\\t.\\tName=test1.5;score=15;ID=test1.5\n-chr1\\tS-MART\\ttest1.6\\t1100\\t1200\\t.\\t-\\t.\\tName=test1.6;score=15;ID=test1.6\n-""")\n-        handle.close()\n-        handle = open(self._expOutputFileName, "w")\n-        handle.write("""chr1\tS-MART\ttest1.1\t100\t600\t40\t+\t.\tnbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3\n-chr1\tS-MART\texon\t100\t200\t10\t+\t.\tID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1\n-chr1\tS-MART\texon\t300\t400\t15\t+\t.\tID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1\n-chr1\tS-MART\texon\t500\t600\t15\t+\t.\tID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1\n-chr1\tS-MART\ttest1.4\t700\t1200\t40\t-\t.\tnbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6\n-chr1\tS-MART\texon\t700\t800\t10\t-\t.\tID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4\n-chr1\tS-MART\texon\t900\t1000\t15\t-\t.\tID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4\n-chr1\tS-MART\texon\t1100\t1200\t15\t-\t.\tID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4\n-""")\n-        handle.close()\n-        cbt = ClusterizeByTags(0)\n-        cbt.setInputFile(self._inputFileName, "gff3")\n-        cbt.setOutputFile(self._outputFileName)\n-        cbt.setTag("score", None)\n-        cbt.setThreshold(20)\n-        cbt.setOperation("diff")\n-        cbt.setMaxDistance(None)\n-        cbt.setOneStrand(True)\n-        cbt.run()\n-        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))\n-\n-    def test_diff_distance(self):\n-        handle = open(self._inputFileName, "w")\n-        handle.write("""chr1\\tS-MART\\ttest1.1\\t100\\t200\\t.\\t+\\t.\\tName=test1.1;score=10;ID=test1.1\n-chr1\\tS-MART\\ttest1.2\\t300\\t400\\t.\\t+\\t.\\tName=test1.2;score=15;ID=test1.2\n-chr1\\tS-MART\\ttest1.3\\t500\\t600\\t.\\t+\\t.\\tName=test1.3;score=15;ID=test1.3\n-chr1\\tS-MART\\ttest1.4\\t1000\\t1100\\t.\\t+\\t.\\tName=test1.4;score=10;ID=test1.4\n-chr1\\tS-MART\\ttest1.5\\t1200\\t1300\\t.\\t+\\t.\\tName=test1.5;score=15;ID=test1.5\n-chr1\\tS-MART\\ttest1.6\\t1400\\t1500\\t.\\t+\\t.\\tName=test1.6;score=15;ID=test1.6\n-""")\n-        handle.close()\n-        handle = open(self._expOutputFileName, "w")\n-        handle.write("""chr1\tS-MART\ttest1.1\t100\t600\t40\t+\t.\tnbElements=3.000000;ID=test1.1;Name=test1.1--test1.2--test1.3\n-chr1\tS-MART\texon\t100\t200\t10\t+\t.\tID=test1.1-exon1;Name=test1.1--test1.2--test1.3-exon1;Parent=test1.1\n-chr1\tS-MART\texon\t300\t400\t15\t+\t.\tID=test1.1-exon2;Name=test1.1--test1.2--test1.3-exon2;Parent=test1.1\n-chr1\tS-MART\texon\t500\t600\t15\t+\t.\tID=test1.1-exon3;Name=test1.1--test1.2--test1.3-exon3;Parent=test1.1\n-chr1\tS-MART\ttest1.4\t1000\t1500\t40\t+\t.\tnbElements=3.000000;ID=test1.4;Name=test1.4--test1.5--test1.6\n-chr1\tS-MART\texon\t1000\t1100\t10\t+\t.\tID=test1.4-exon1;Name=test1.4--test1.5--test1.6-exon1;Parent=test1.4\n-chr1\tS-MART\texon\t1200\t1300\t15\t+\t.\tID=test1.4-exon2;Name=test1.4--test1.5--test1.6-exon2;Parent=test1.4\n-chr1\tS-MART\texon\t1400\t1500\t15\t+\t.\tID=test1.4-exon3;Name=test1.4--test1.5--test1.6-exon3;Parent=test1.4\n-""")\n-        handle.close()\n-        cbt = ClusterizeByTags(0)\n-        cbt.setInputFile(self._inputFileName, "gff3")\n-        cbt.setOutputFile(self._outputFileName)\n-        cbt.setTag("score", None)\n-        cbt.setThreshold(20)\n-        cbt.setOperation("diff")\n-        cbt.setMaxDistance(200)\n-        cbt.run()\n-        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))\n-\n-\n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_CollapseReads.py
--- a/SMART/Java/Python/test/Test_F_CollapseReads.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,92 +0,0 @@
-import unittest
-import os
-from SMART.Java.Python.CollapseReads import CollapseReads
-from SMART.Java.Python.misc import Utils
-from commons.core.utils.FileUtils import FileUtils
-from SMART.Java.Python.ncList.test.MockFindOverlaps_randomExample import MockFindOverlaps_randomExample_NonOrder
-
-class Test_F_CollapseReads(unittest.TestCase):
-
-    def setUp(self):
-        self._inputFileName = 'inputCR.gff3'
-        self._writeInput(self._inputFileName)
-        self._outputFileName = 'outputCR.gff3'
-        self._expOutputFileName = 'expOutputCR.gff3'
-        
-    def tearDown(self):
-        return
-
-    def test_run_default_option(self):
-        iCR = CollapseReads(0)
-        iCR.setInputFile(self._inputFileName, 'gff3')
-        iCR.setOutputFile(self._outputFileName)
-        iCR.strands = False
-        iCR.collapse()
-        self._writeExp_strand_False(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-            
-    def test_run_strand_option(self):
-        iCR = CollapseReads(0)
-        iCR.setInputFile(self._inputFileName, 'gff3')
-        iCR.setOutputFile(self._outputFileName)
-        iCR.strands = True
-        iCR.collapse()
-        self._writeExp_strand_True(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-        
-    def test_run_asScript_default_option(self):
-        cmd = 'python ../CollapseReads.py -i %s -f gff3 -o %s -v 0' % (self._inputFileName, self._outputFileName)
-        os.system(cmd)
-        self._writeExp_strand_True(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))    
-        
-    def test_run_asScript_strand_option(self):
-        cmd = 'python ../CollapseReads.py -i %s -f gff3 -o %s -s -v 0' % (self._inputFileName, self._outputFileName)
-        os.system(cmd)
-        self._writeExp_strand_False(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))    
-        
-    def test_run_toOrderGff(self):
-        iMRE = MockFindOverlaps_randomExample_NonOrder(self._inputFileName, 'chrom', 10, 1000)    
-        iMRE.write()
-        iCR = CollapseReads(0)
-        iCR.setInputFile(self._inputFileName, 'gff3')
-        iCR.setOutputFile(self._outputFileName)
-        iCR.strands = False
-        iCR.collapse()
-        f = open(self._expOutputFileName, "w")
-        f.close()
-        cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._expOutputFileName, self._inputFileName)
-        os.system(cmd) 
-
-    def _writeInput(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\ttest\tmatch\t6155418\t6155441\t.\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
-        f.write("chr2\ttest\tmatch\t26303950\t26303981\t.\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
-        f.write("chr3\ttest\tmatch\t28320540\t28320574\t.\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
-        f.write("chr4\ttest\tmatch\t28565007\t28565041\t.\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
-        f.write("chr1\ttest\tmatch\t6155418\t6155441\t.\t+\t.\toccurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50;Name=test3/1\n")
-        f.write("chr1\ttest\tmatch\t6155418\t6155441\t.\t-\t.\toccurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50;Name=test3/1\n")
-        f.close()
-
-    def _writeExp_strand_False(self, fileName):
-        f = open(fileName, 'w')
-        f.write("""chr1 S-MART match 6155418 6155441 . - . nbElements=3.000000;ID=test3/1;Name=test3/1--test3/1--test1/1
-chr2 S-MART match 26303950 26303981 . + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
-chr3 S-MART match 28320540 28320574 . + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1
-chr4 S-MART match 28565007 28565041 . + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1
-""")
-        f.close()
-        
-    def _writeExp_strand_True(self, fileName):
-        f = open(fileName, 'w')
-        f.write("""chr1 S-MART match 6155418 6155441 . + . nbElements=2.000000;ID=test3/1;Name=test3/1--test1/1
-chr1 S-MART match 6155418 6155441 . - . occurrence=2;rank=2;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=test3/1;identity=50;Name=test3/1
-chr2 S-MART match 26303950 26303981 . + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
-chr3 S-MART match 28320540 28320574 . + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1
-chr4 S-MART match 28565007 28565041 . + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1
-""")
-        f.close()
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_CombineTags.py
--- a/SMART/Java/Python/test/Test_F_CombineTags.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,109 +0,0 @@
-import unittest
-import os
-from optparse import OptionParser
-from SMART.Java.Python.structure.Transcript import Transcript
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.parsing.GffParser import GffParser
-from commons.core.writer.Gff3Writer import Gff3Writer
-from SMART.Java.Python.CombineTags import CombineTags
-
-class Test_F_CombineTags(unittest.TestCase):
-
-    def setUp(self):
-        self._inputFileName  = "inputFileTest.gff3"
-        self._outputFileName = "outputFileTest.gff3"
-        self._tag1           = "tag1"
-        self._tag2           = "tag2"
-        self._outputTag      = "outputTag"
-        
-    def tearDown(self):
-        os.remove(self._inputFileName)
-        os.remove(self._outputFileName)
-
-    def test_plus(self):
-        transcript = self._createDummyTranscript()
-        transcript.setTagValue(self._tag1, 1)
-        transcript.setTagValue(self._tag2, 2)
-        self._writeTranscript(transcript)
-        combiner = CombineTags(0)
-        combiner.setInputFile(self._inputFileName, "gff3")
-        combiner.setOutputFile(self._outputFileName)
-        combiner.setTags(self._tag1, self._tag2, self._outputTag, None)
-        combiner.setOperation("plus")
-        combiner.run()
-        self._testOutputTag(3)
-
-    def test_minus(self):
-        transcript = self._createDummyTranscript()
-        transcript.setTagValue(self._tag1, 2)
-        transcript.setTagValue(self._tag2, 1)
-        self._writeTranscript(transcript)
-        combiner = CombineTags(0)
-        combiner.setInputFile(self._inputFileName, "gff3")
-        combiner.setOutputFile(self._outputFileName)
-        combiner.setTags(self._tag1, self._tag2, self._outputTag, None)
-        combiner.setOperation("minus")
-        combiner.run()
-        self._testOutputTag(1)
-
-    def test_times(self):
-        transcript = self._createDummyTranscript()
-        transcript.setTagValue(self._tag1, 6)
-        transcript.setTagValue(self._tag2, 3)
-        self._writeTranscript(transcript)
-        combiner = CombineTags(0)
-        combiner.setInputFile(self._inputFileName, "gff3")
-        combiner.setOutputFile(self._outputFileName)
-        combiner.setTags(self._tag1, self._tag2, self._outputTag, None)
-        combiner.setOperation("times")
-        combiner.run()
-        self._testOutputTag(18)
-
-    def test_div(self):
-        transcript = self._createDummyTranscript()
-        transcript.setTagValue(self._tag1, 6)
-        transcript.setTagValue(self._tag2, 3)
-        self._writeTranscript(transcript)
-        combiner = CombineTags(0)
-        combiner.setInputFile(self._inputFileName, "gff3")
-        combiner.setOutputFile(self._outputFileName)
-        combiner.setTags(self._tag1, self._tag2, self._outputTag, None)
-        combiner.setOperation("div")
-        combiner.run()
-        self._testOutputTag(2)
-
-    def test_default(self):
-        transcript = self._createDummyTranscript()
-        transcript.setTagValue(self._tag1, 1)
-        self._writeTranscript(transcript)
-        combiner = CombineTags(0)
-        combiner.setInputFile(self._inputFileName, "gff3")
-        combiner.setOutputFile(self._outputFileName)
-        combiner.setTags(self._tag1, self._tag2, self._outputTag, 0)
-        combiner.setOperation("plus")
-        combiner.run()
-        self._testOutputTag(0)
-
-    def _createDummyTranscript(self):
-        transcript = Transcript()
-        transcript.setChromosome("chr1")
-        transcript.setName("test1")
-        transcript.setEnd(200)
-        transcript.setStart(100)
-        transcript.setDirection("+")
-        return transcript
-    
-    def _testOutputTag(self, value):
-        parser = GffParser(self._outputFileName, 0)
-        for transcript in parser.getIterator():
-            self.assertEquals(float(transcript.getTagValue(self._outputTag)), value)
-        parser.close()
-
-    def _writeTranscript(self, transcript):
-        f = open(self._inputFileName, "w")
-        f.write(transcript.printGff3("test"))
-        f.close()
-        
-
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_FindOverlapsOptim.py
--- a/SMART/Java/Python/test/Test_F_FindOverlapsOptim.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,500 +0,0 @@\n-import unittest\n-import os\n-from SMART.Java.Python.misc import Utils\n-from commons.core.utils.FileUtils import FileUtils\n-from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim\n-from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *\n-\n-class Test_F_FindOverlapsOptim(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._inputRefGff3FileName = \'sorted_Ref.gff3\'\n-        self._writeGFF3File(self._inputRefGff3FileName)\n-        self._inputQueryGff3FileName = \'sorted_Query.gff3\'\n-        self._writeQueryGff3File(self._inputQueryGff3FileName)\n-        self._outputGff3FileName = \'output.gff3\'\n-        self._expOutputFileName = \'expOutGff3.gff3\'\n-        \n-    def tearDown(self):\n-        for fileName in (self._inputRefGff3FileName, self._inputQueryGff3FileName, self._outputGff3FileName, self._expOutputFileName):\n-            if os.path.exists(fileName):\n-                os.remove(fileName)\n-        \n-    def test_run_general_asScript(self):\n-        cmd = \'python ../FindOverlapsOptim.py -i %s -f gff -j %s -g gff -o %s -v 0\' % (self._inputQueryGff3FileName, self._inputRefGff3FileName, self._outputGff3FileName)      \n-        os.system(cmd)\n-        self._writeExpOutFile_general(self._expOutputFileName)\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))         \n-\n-    def test_run_overlap_special_case(self):\n-        inputQueryGff3FileName = \'query2.gff3\'\n-        self._writeQueryGff3File2(inputQueryGff3FileName)\n-        iFOO = FindOverlapsOptim(0)\n-        iFOO.setRefFileName(self._inputRefGff3FileName, "gff")\n-        iFOO.setQueryFileName(inputQueryGff3FileName, "gff")\n-        iFOO.setOutputFileName(self._outputGff3FileName)\n-        iFOO.run()\n-        iFOO.close()\n-        self._writeExpOutFile_special_case(self._expOutputFileName)        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))   \n-        os.remove(inputQueryGff3FileName) \n-        \n-    def test_run_overlap_special_case_asScript(self):\n-        inputQueryGff3FileName = \'query2.gff3\'\n-        self._writeQueryGff3File2(inputQueryGff3FileName)\n-        cmd = \'python ../FindOverlapsOptim.py -i %s -f gff -j %s -g gff -o %s -v 0\' % (inputQueryGff3FileName, self._inputRefGff3FileName, self._outputGff3FileName)      \n-        os.system(cmd) \n-        self._writeExpOutFile_special_case(self._expOutputFileName)        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))         \n-        os.remove(inputQueryGff3FileName) \n-        \n-    def test_case_2(self):\n-        inputRefGff3FileName = \'ref_case2.gff3\'\n-        iMock = MockFindOverlapsWithServeralIntervals_case2()\n-        iMock.write(inputRefGff3FileName)\n-        inputQueryGff3FileName = \'query_case2.gff3\'\n-        self._writeQueryGff3File_case2(inputQueryGff3FileName)  \n-        iFOO = FindOverlapsOptim(0)\n-        iFOO.setRefFileName(inputRefGff3FileName, "gff")\n-        iFOO.setQueryFileName(inputQueryGff3FileName, "gff")\n-        iFOO.setOutputFileName(self._outputGff3FileName)\n-        iFOO.run()\n-        iFOO.close()\n-        self._writeExpOutFile_case2(self._expOutputFileName)        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputGff3FileName))   \n-        os.remove(inputQueryGff3FileName) \n-        os.remove(inputRefGff3FileName) \n-        \n-    def test_case_3(self):\n-        inputRefGff3FileName = \'ref_case3.gff3\'\n-        iMock = MockFindOverlapsWithServeralIntervals_case3()\n-        iMock.write(inputRefGff3FileName)\n-        inputQueryGff3FileName = \'query_case3.gff3\'\n-        self._writeQueryGff3File_case3(inputQueryGff3FileName)  \n-        iFOO = FindOverlapsOptim(0)\n-        iFOO.setRefFileName(inputRefGff3FileName, "gff")\n-        iFOO.setQueryFileName(inputQueryGff3FileName, "gff")\n-        iFOO.setOutputFileName(self._outputGff3FileName)\n-        iFOO.run()\n-        iFOO.close()\n-        self._writeExpOu'..b'ry_4;Name=test3.4\\n")\n-        f.write("chr1\\tquery\\ttest3.5\\t900\\t950\\t51\\t+\\t.\\tID=query_5;Name=test3.5\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case4(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest4.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=test4.1\\n")\n-        f.write("chr1\\tquery\\ttest4.2\\t450\\t600\\t151\\t+\\t.\\tID=query_2;Name=test4.2\\n")\n-        f.write("chr1\\tquery\\ttest4.3\\t700\\t800\\t101\\t+\\t.\\tID=query_3;Name=test4.3\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case5(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest5.1\\t850\\t950\\t101\\t+\\t.\\tID=query_1;Name=test5.1\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case6(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest6.1\\t200\\t300\\t101\\t+\\t.\\tID=query_1;Name=test6.1\\n")\n-        f.write("chr1\\tquery\\ttest6.2\\t800\\t900\\t101\\t+\\t.\\tID=query_2;Name=test6.2\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case7(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest7.1\\t530\\t550\\t21\\t+\\t.\\tID=query_1;Name=test7.1\\n")\n-        f.write("chr1\\tquery\\ttest7.2\\t600\\t700\\t101\\t+\\t.\\tID=query_2;Name=test7.2\\n")\n-        f.write("chr1\\tquery\\ttest7.3\\t650\\t900\\t251\\t+\\t.\\tID=query_3;Name=test7.3\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case8(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest8.1\\t500\\t600\\t101\\t+\\t.\\tID=query_1;Name=test8.1\\n")\n-        f.write("chr1\\tquery\\ttest8.2\\t700\\t800\\t101\\t+\\t.\\tID=query_2;Name=test8.2\\n")\n-        f.write("chr1\\tquery\\ttest8.3\\t900\\t1100\\t201\\t+\\t.\\tID=query_3;Name=test8.3\\n")\n-        f.write("chr1\\tquery\\ttest8.4\\t1200\\t1300\\t101\\t+\\t.\\tID=query_4;Name=test8.4\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case9(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest9.1\\t400\\t500\\t101\\t+\\t.\\tID=query_1;Name=test9.1\\n")\n-        f.write("chr1\\tquery\\ttest9.2\\t550\\t650\\t101\\t+\\t.\\tID=query_2;Name=test9.2\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case10(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest10.1\\t700\\t800\\t101\\t+\\t.\\tID=query_1;Name=test10.1\\n")\n-        f.write("chr1\\tquery\\ttest10.2\\t900\\t1000\\t101\\t+\\t.\\tID=query_2;Name=test10.2\\n")\n-        f.write("chr1\\tquery\\ttest10.3\\t1100\\t1300\\t201\\t+\\t.\\tID=query_3;Name=test10.3\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case11(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest11.1\\t420\\t480\\t61\\t+\\t.\\tID=query_1;Name=test11.1\\n")\n-        f.write("chr1\\tquery\\ttest11.2\\t450\\t715\\t266\\t+\\t.\\tID=query_2;Name=test11.2\\n")\n-        f.close()\n-        \n-    def _writeQueryGff3File_case12(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest12.1\\t200\\t400\\t.\\t+\\t.\\tID=query_1;Name=test12.1\\n")\n-        f.write("chr1\\tquery\\ttest12.2\\t600\\t900\\t.\\t+\\t.\\tID=query_2;Name=test12.2\\n")\n-        f.write("chr1\\tquery\\ttest12.3\\t700\\t1000\\t.\\t+\\t.\\tID=query_3;Name=test12.3\\n")\n-        f.close()\n-        \n-    def _writeGFF3File(self, fileName):\n-        f = open(fileName, "w")\n-        f.write("chr1\\ttest\\ttest2.1\\t9\\t1000\\t1001\\t+\\t.\\tID=test2.1;Name=test2.1\\n")\n-        f.write("chr1\\ttest\\ttest2.2\\t50\\t350\\t301\\t+\\t.\\tID=test2.2;Name=test2.2\\n")\n-        f.write("chr1\\ttest\\ttest2.3\\t100\\t600\\t501\\t+\\t.\\tID=test2.3;Name=test2.3\\n")\n-        f.write("chr1\\ttest\\ttest2.4\\t200\\t450\\t251\\t+\\t.\\tID=test2.4;Name=test2.4\\n")\n-        f.write("chr1\\ttest\\ttest2.5\\t700\\t950\\t251\\t+\\t.\\tID=test2.5;Name=test2.5\\n")\n-        f.write("chr1\\ttest\\ttest2.6\\t800\\t900\\t101\\t+\\t.\\tID=test2.6;Name=test2.6\\n")\n-        f.write("chr1\\ttest\\ttest2.7\\t1200\\t1300\\t101\\t+\\t.\\tID=test2.7;Name=test2.7\\n")\n-        f.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_GetDifferentialExpression.py
--- a/SMART/Java/Python/test/Test_F_GetDifferentialExpression.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,50 +0,0 @@
-import unittest
-import os, os.path, glob
-from SMART.Java.Python.GetDifferentialExpression import GetDifferentialExpression
-
-
-class Test_F_GetDifferentialExpression(unittest.TestCase):
-
-    def setUp(self):
-        self.inputFileName1 = "inputFile1.gff3"
-        self.inputFileName2 = "inputFile2.gff3"
-        self.refFileName    = "refFile.gff3"
-        self.outputFileName = "outputFile.gff3"
-        self.plotFileName   = "outputFile.png"
-         
-    def tearDown(self):
-        for fileRoot in (self.inputFileName1, self.inputFileName2, self.refFileName, self.outputFileName, self.plotFileName):
-            for file in glob.glob("%s*" % (fileRoot)):
-                os.remove(file)
-        os.remove(".RData")
-
-    def test_run_simple(self):
-        handle = open(self.inputFileName1, "w")
-        handle.write("chr1\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test1;Name=test1;nbElements=100\n")
-        handle.write("chr2\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test2;Name=test2;nbElements=1000\n")
-        handle.close()
-        handle = open(self.inputFileName2, "w")
-        handle.write("chr1\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test1;Name=test1;nbElements=200\n")
-        handle.write("chr2\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test2;Name=test2;nbElements=1000\n")
-        handle.close()
-        handle = open(self.refFileName, "w")
-        handle.write("chr1\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test1;Name=test1")
-        handle.close()
-        differentialExpression = GetDifferentialExpression(0)
-        differentialExpression.setInputFile(0, self.inputFileName1, "gff3")
-        differentialExpression.setInputFile(1, self.inputFileName2, "gff3")
-        differentialExpression.setReferenceFile(self.refFileName, "gff3")
-        differentialExpression.setOutputFile(self.outputFileName)
-        differentialExpression.setPlotterName(self.plotFileName)
-        differentialExpression.setPlotter()
-        differentialExpression.getDifferentialExpression()
-        differentialExpression.mySqlConnection.deleteDatabase()
-        handle = open(self.outputFileName)
-        lines = handle.readlines()
-        self.assertEquals(len(lines), 1)
-        self.assertEquals(lines[0], "chr1\tS-MART\ttranscript\t1000\t2000\t.\t+\t.\tnbReadsCond1=100;nbReadsCond2=200;ID=test1;regulation=up;nbUnnormalizedReadsCond2=200;nbUnnormalizedReadsCond1=100;pValue=6.010045e-08;Name=test1\n")
-        self.assertTrue(os.path.exists(self.plotFileName))
-        
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_GetFlanking.py
--- a/SMART/Java/Python/test/Test_F_GetFlanking.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,238 +0,0 @@\n-import unittest\n-import os, os.path, glob\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.GetFlanking import GetFlanking\n-from commons.core.writer.Gff3Writer import Gff3Writer\n-from commons.core.parsing.GffParser import GffParser\n-\n-class Test_F_GetFlanking(unittest.TestCase):\n-\n-    def setUp(self):\n-        self.queryFileName     = "testQuery.gff3"\n-        self.referenceFileName = "testReference.gff3"\n-        self.outputFileName    = "testOutput.gff3"\n-         \n-    def tearDown(self):\n-        for fileRoot in (self.queryFileName, self.referenceFileName, self.outputFileName):\n-            for file in glob.glob("%s*" % (fileRoot)):\n-                os.remove(file)\n-\n-    def test_run_simple(self):\n-        #return\n-        reference1 = self._createTranscript("chr1", 1000, 1100, "+", "ref1")\n-        reference2 = self._createTranscript("chr1", 2000, 2100, "+", "ref2")\n-        reference3 = self._createTranscript("chr1", 1000000, 1200000, "+", "ref3")\n-        writer = Gff3Writer(self.referenceFileName, 0)\n-        writer.addTranscript(reference1)\n-        writer.addTranscript(reference2)\n-        writer.addTranscript(reference3)\n-        writer.close()\n-        query1 = self._createTranscript("chr1", 100, 200, "+", "query1")\n-        query2 = self._createTranscript("chr1", 10000, 10100, "+", "query2")\n-        writer = Gff3Writer(self.queryFileName, 0)\n-        writer.addTranscript(query1)\n-        writer.addTranscript(query2)\n-        writer.close()\n-        gf = GetFlanking(0)\n-        gf.setInputFile(self.queryFileName, \'gff3\', 0)\n-        gf.setInputFile(self.referenceFileName, \'gff3\', 1)\n-        gf.setOutputFile(self.outputFileName)\n-        gf.run()\n-        parser = GffParser(self.outputFileName)\n-        self.assertEqual(parser.getNbTranscripts(), 2)\n-        for i, transcript in enumerate(sorted(parser.getIterator(), key = lambda t: t.getStart())):\n-            if i == 0:\n-                self._checkTranscript(transcript, "chr1", 100, 200, "+", "query1")\n-                self.assertEqual(transcript.getTagValue("flanking"), "ref1")\n-                self.assertEqual(transcript.getTagValue("_region_flanking"), "downstream")\n-                self.assertEqual(transcript.getTagValue("_sense_flanking"), "collinear")\n-            else:\n-                self._checkTranscript(transcript, "chr1", 10000, 10100, "+", "query2")\n-                self.assertEqual(transcript.getTagValue("flanking"), "ref2")\n-                self.assertEqual(transcript.getTagValue("_region_flanking"), "upstream")\n-                self.assertEqual(transcript.getTagValue("_sense_flanking"), "collinear")\n-\n-    def test_run_simple_downstream(self):\n-        return\n-        reference1 = self._createTranscript("chr1", 300, 400, "+", "ref1")\n-        reference2 = self._createTranscript("chr1", 1000, 1100, "+", "ref2")\n-        writer = Gff3Writer(self.referenceFileName, 0)\n-        writer.addTranscript(reference1)\n-        writer.addTranscript(reference2)\n-        writer.close()\n-        query1 = self._createTranscript("chr1", 100, 200, "+", "query1")\n-        query2 = self._createTranscript("chr1", 1200, 1300, "+", "query2")\n-        query3 = self._createTranscript("chr1", 1400, 1500, "+", "query3")\n-        writer = Gff3Writer(self.queryFileName, 0)\n-        writer.addTranscript(query1)\n-        writer.addTranscript(query2)\n-        writer.addTranscript(query3)\n-        writer.close()\n-        gf = GetFlanking(0)\n-        gf.setInputFile(self.queryFileName, \'gff3\', 0)\n-        gf.setInputFile(self.referenceFileName, \'gff3\', 1)\n-        gf.setOutputFile(self.outputFileName)\n-        gf.addDownstreamDirection(True)\n-        gf.run()\n-        parser = GffParser(self.outputFileName)\n-        self.assertEqual(parser.getNbTranscripts(), 3)\n-        for i, transcript in enumerate(sorted(parser.getIterator(), key = lambda t: t.getStart())):\n-            if i == 0:\n-                self._checkTranscript'..b' "chr1", 100, 200, "+", "query1")\n-                self.assertIsNone(transcript.getTagValue("flanking_upstream"))\n-            if i == 1:\n-                self._checkTranscript(transcript, "chr1", 300, 400, "+", "query2")\n-                self.assertIsNone(transcript.getTagValue("flanking_upstream"))\n-            if i == 2:\n-                self._checkTranscript(transcript, "chr1", 1200, 1300, "+", "query3")\n-                self.assertEqual(transcript.getTagValue("flanking_upstream"), "ref2")\n-\n-    def test_run_simple_colinear(self):\n-        return\n-        reference1 = self._createTranscript("chr1", 100, 200, "+", "ref1")\n-        reference2 = self._createTranscript("chr1", 1000, 1100, "+", "ref2")\n-        reference3 = self._createTranscript("chr1", 1600, 1700, "+", "ref3")\n-        writer = Gff3Writer(self.referenceFileName, 0)\n-        writer.addTranscript(reference1)\n-        writer.addTranscript(reference2)\n-        writer.addTranscript(reference3)\n-        writer.close()\n-        query1 = self._createTranscript("chr1", 1200, 1300, "-", "query1")\n-        query2 = self._createTranscript("chr1", 1400, 1500, "+", "query2")\n-        writer = Gff3Writer(self.queryFileName, 0)\n-        writer.addTranscript(query1)\n-        writer.addTranscript(query2)\n-        writer.close()\n-        gf = GetFlanking(0)\n-        gf.setInputFile(self.queryFileName, \'gff3\', 0)\n-        gf.setInputFile(self.referenceFileName, \'gff3\', 1)\n-        gf.setOutputFile(self.outputFileName)\n-        gf.addUpstreamDirection(True)\n-        gf.setColinear(True)\n-        gf.run()\n-        parser = GffParser(self.outputFileName)\n-        self.assertEqual(parser.getNbTranscripts(), 2)\n-        for i, transcript in enumerate(sorted(parser.getIterator(), key = lambda t: t.getStart())):\n-            if i == 0:\n-                self._checkTranscript(transcript, "chr1", 1200, 1300, "-", "query1")\n-                self.assertIsNone(transcript.getTagValue("flanking"))\n-            if i == 1:\n-                self._checkTranscript(transcript, "chr1", 1400, 1500, "+", "query2")\n-                self.assertEqual(transcript.getTagValue("flanking_upstream"), "ref2")\n-\n-    def test_run_simple_max_distance(self):\n-        return\n-        reference = self._createTranscript("chr1", 1000, 1100, "+", "ref")\n-        writer = Gff3Writer(self.referenceFileName, 0)\n-        writer.addTranscript(reference)\n-        writer.close()\n-        query1 = self._createTranscript("chr1", 2000, 2100, "-", "query1")\n-        writer = Gff3Writer(self.queryFileName, 0)\n-        writer.addTranscript(query1)\n-        writer.close()\n-        gf = GetFlanking(0)\n-        gf.setInputFile(self.queryFileName, \'gff3\', 0)\n-        gf.setInputFile(self.referenceFileName, \'gff3\', 1)\n-        gf.setOutputFile(self.outputFileName)\n-        gf.setMaxDistance(100)\n-        gf.run()\n-        parser = GffParser(self.outputFileName)\n-        self.assertEqual(parser.getNbTranscripts(), 1)\n-        for i, transcript in enumerate(sorted(parser.getIterator(), key = lambda t: t.getStart())):\n-            if i == 0:\n-                self._checkTranscript(transcript, "chr1", 2000, 2100, "-", "query1")\n-                self.assertIsNone(transcript.getTagValue("flanking"))\n-\n-    def _createTranscript(self, chromosome, start, end, strand, name):\n-        transcript = Transcript()\n-        transcript.setChromosome(chromosome)\n-        transcript.setStart(start)\n-        transcript.setEnd(end)\n-        transcript.setDirection(strand)\n-        transcript.setName(name)\n-        return transcript\n-\n-    def _checkTranscript(self, transcript, chromosome, start, end, strand, name):\n-        self.assertEqual(transcript.getChromosome(), chromosome)\n-        self.assertEqual(transcript.getStart(), start)\n-        self.assertEqual(transcript.getEnd(), end)\n-        self.assertEqual(transcript.getStrand(), strand)\n-        self.assertEqual(transcript.getName(), name)\n-\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_GetRandomSubset.py
--- a/SMART/Java/Python/test/Test_F_GetRandomSubset.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,55 +0,0 @@
-import unittest
-import os
-from SMART.Java.Python.GetRandomSubset import GetRandomSubset
-from commons.core.parsing.BedParser import BedParser
-from commons.core.parsing.GffParser import GffParser
-
-class Test_F_GetRandomSubset(unittest.TestCase):
-
-    def setUp(self):
-        self._inputFileName = "inputFileTest.bed"
-        self._writeInputFile()
-        self._outputFileName = "output.gff3"
-        
-    def tearDown(self):
-        os.remove(self._inputFileName)
-        os.remove(self._outputFileName)
-
-    def test_run_simple(self):
-        nbElements = 2
-        inputTranscripts = []
-        parser = BedParser(self._inputFileName, 0)
-        for transcript in parser.getIterator():
-            inputTranscripts.append(transcript)
-        grs = GetRandomSubset(0)
-        grs.setInputFile(self._inputFileName, "bed")
-        grs.setNumber(nbElements, None)
-        grs.setOutputFile(self._outputFileName)
-        grs.run()
-        outputTranscripts = []
-        parser = GffParser(self._outputFileName, 0)
-        for transcript in parser.getIterator():
-            outputTranscripts.append(transcript)
-        self.assertEqual(len(outputTranscripts), nbElements)
-        for outputTranscript in outputTranscripts:
-            for inputTranscript in inputTranscripts:
-                if outputTranscript.getChromosome() == inputTranscript.getChromosome() and outputTranscript.getDirection() == inputTranscript.getDirection() and outputTranscript.getStart() == inputTranscript.getStart() and outputTranscript.getEnd() == inputTranscript.getEnd() and outputTranscript.getName() == inputTranscript.getName():
-                    break
-            else:
-                self.fail()
-
-    def _writeInputFile(self):
-        f = open(self._inputFileName, "w")
-        f.write("arm_X\t10000100\t10000200\ttest1.1\t100\t+\t10000100\t10000200\t0\t1\t100,\t0,\n")
-        f.write("arm_X\t10000100\t10000200\ttest1.2\t100\t-\t10000100\t10000200\t0\t1\t100,\t0,\n")
-        f.write("arm_2R\t10000100\t10000200\ttest1.3\t100\t+\t10000100\t10000200\t0\t1\t100,\t0,\n")
-        f.write("arm_X\t10000000\t10000100\ttest1.4\t100\t+\t10000000\t10000100\t0\t1\t100,\t0,\n")
-        f.write("arm_X\t10000200\t10000300\ttest1.5\t100\t+\t10000200\t10000300\t0\t1\t100,\t0,\n")
-        f.write("arm_X\t9999900\t9999950\ttest1.6\t100\t+\t9999900\t9999950\t0\t1\t50,\t0,\n")
-        f.write("arm_X\t10000000\t10000050\ttest1.7\t100\t-\t10000000\t10000050\t0\t1\t50,\t0,\n")
-        f.close()
-        
-                
-
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_GetSizes.py
--- a/SMART/Java/Python/test/Test_F_GetSizes.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,78 +0,0 @@
-import unittest
-import os, glob
-import subprocess
-from SMART.Java.Python.getSizes import GetSizes
-
-class Test_F_GetSizes(unittest.TestCase):
-
-    def setUp(self):
-        self._inputFastaFileName =  "inputFile.fa"
-        self._inputFastqFileName =  "inputFile.fq"
-         
-    def tearDown(self):
-        try :
-            os.remove(self._inputFastaFileName)
-        except:pass
-    
-    def test_run(self):
-        self.writeInputFasta(self._inputFastaFileName)
-        format = "fasta"
-        iGetSizes = GetSizes(inFileName = self._inputFastaFileName, inFormat=format)
-        iGetSizes.run()
-        
-        self.assertEquals(iGetSizes.items, 3)
-        self.assertEquals(iGetSizes.subItems, 3)
-        self.assertEquals(iGetSizes.nucleotides, 108)
-        self.assertEquals(iGetSizes.minAvgMedMax, (36, 36.0, 36, 36))
-    
-    def test_run_as_script(self):
-        self.writeInputFasta(self._inputFastaFileName)
-        format = "fasta"
-        cmd = "%s/SMART/Java/Python/getSizes.py -i %s -f %s -v 0" % (os.environ["REPET_PATH"], self._inputFastaFileName, format)
-        process = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        obsOutput = process.communicate()[0]
-        expOutput = """3 items\n3 sub-items\n108 nucleotides\nmin/avg/med/max transcripts: 36/36.00/36.0/36\n"""
-        
-        self.assertEquals(expOutput, obsOutput)
-    
-    def writeInputFile(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tS-MART\ttest2.1\t9\t1000\t1001\t+\t.\tName=test2.1;overlapsWith=query_3;ID=test2.1\n")
-        f.write("chr1\tS-MART\ttest2.3\t100\t600\t501\t+\t.\tName=test2.3;overlapsWith=query_3;ID=test2.3\n")
-        f.write("chr1\tS-MART\ttest2.5\t700\t950\t251\t+\t.\tName=test2.5;overlapsWith=query_3;ID=test2.5\n")
-        f.write("chr1\tS-MART\ttest2.6\t800\t900\t101\t+\t.\tName=test2.6;overlapsWith=query_3;ID=test2.6\n")
-        f.close()     
-        
-    
-    def writeInputFasta(self,inFileName):
-        f = open(inFileName,'w')
-        f.write('>HWI-EAS337_3:7:1:415:1217/1\n')
-        f.write('GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n')
-        f.write('>HWI-EAS337_3:7:1:208:1489/1\n')
-        f.write('GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n')
-        f.write('>HWI-EAS337_3:7:1:278:1153\n')
-        f.write('GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n')
-        f.close()
-
-
-    def writeInputFastq(self,inFileName):
-        f = open(inFileName,'w')
-        f.write('@HWI-EAS337_3:7:1:415:1217/1\n')
-        f.write('GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n')
-        f.write('+HWI-EAS337_3:7:1:415:1217/1\n')
-        f.write('WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQ\n')
-        f.write('@HWI-EAS337_3:7:1:208:1489/1\n')
-        f.write('GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n')
-        f.write('+HWI-EAS337_3:7:1:208:1489/1\n')
-        f.write('WWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU\n')
-        f.write('@HWI-EAS337_3:7:1:278:1153/1\n')
-        f.write('GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n')
-        f.write('+HWI-EAS337_3:7:1:278:1153/1\n')
-        f.write('WWWWWWWWWWWWWWWWWWWWWWVWVVVWWVUUUUUR\n')
-        f.close()
-        
-            
-        
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_RestrictFromCoverage.py
--- a/SMART/Java/Python/test/Test_F_RestrictFromCoverage.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,133 +0,0 @@
-import unittest
-import os, os.path
-from optparse import OptionParser
-from SMART.Java.Python.misc import Utils
-from SMART.Java.Python.structure.Transcript import Transcript
-from commons.core.writer.Gff3Writer import Gff3Writer
-from commons.core.parsing.GffParser import GffParser
-from SMART.Java.Python.RestrictFromCoverage import RestrictFromCoverage
-
-REFERENCE = 0
-QUERY     = 1
-
-class Test_F_Clusterize(unittest.TestCase):
-
-    def setUp(self):
-        self._queryFileName     = "testQuery.gff3"
-        self._refFileName       = "testRef.gff3"
-        self._outputFileName    = "output.gff3"
-        self._parsers           = {QUERY: Gff3Writer(self._queryFileName, 0), REFERENCE: Gff3Writer(self._refFileName, 0)}
-        self._writeQuery()
-        
-    def tearDown(self):
-        for file in (self._queryFileName, self._refFileName, self._outputFileName):
-            if os.path.exists(file):
-                os.remove(file)
-
-    def _writeQuery(self):
-        self._addTranscript(QUERY, 1, 1000, 2000, "+")
-        self._parsers[QUERY].close()
-
-    def _writeReferences(self, values):
-        for value in values:
-            self._addTranscript(REFERENCE, value["cpt"], value["start"], value["end"], value["strand"])
-        self._parsers[REFERENCE].close()
-
-    def _addTranscript(self, type, cpt, start, end, strand):
-        t = Transcript()
-        t.setChromosome("chr1")
-        t.setName("test%d" % (cpt))
-        t.setStart(start)
-        t.setEnd(end)
-        t.setDirection(strand)
-        self._parsers[type].addTranscript(t)
-
-    def _checkTranscript(self, transcript, start, end, strand):
-        self.assertEquals(transcript.getStart(),     start)
-        self.assertEquals(transcript.getEnd(),       end)
-        self.assertEquals(transcript.getDirection(), strand)
-
-    def _startTool(self, minNucleotides = None, maxNucleotides = None, minPercent = None, maxPercent = None, minOverlap = None, maxOverlap = None, strands = False):
-        rfc = RestrictFromCoverage(0)
-        rfc.setInputFileName(self._queryFileName, "gff3", QUERY)
-        rfc.setInputFileName(self._refFileName,   "gff3", REFERENCE)
-        rfc.setOutputFileName(self._outputFileName)
-        rfc.setNbNucleotides(minNucleotides, maxNucleotides)
-        rfc.setPercent(minPercent, maxPercent)
-        rfc.setOverlap(minOverlap, maxOverlap)
-        rfc.setStrands(strands)
-        rfc.run()
-        return GffParser(self._outputFileName, 0)
-
-    def test_simple(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}])
-        parser = self._startTool()
-        self.assertEquals(parser.getNbTranscripts(), 1)
-        for transcript in parser.getIterator():
-            self._checkTranscript(transcript, 1000, 2000, 1)
-
-    def test_nbOverlapsMin_pos(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}, {"cpt": 2, "start": 1000, "end": 2000, "strand": "+"}])
-        parser = self._startTool(1, None, None, None, 2)
-        self.assertEquals(parser.getNbTranscripts(), 1)
-        for transcript in parser.getIterator():
-            self._checkTranscript(transcript, 1000, 2000, 1)
-
-    def test_nbOverlapsMin_neg(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}])
-        parser = self._startTool(1, None, None, None, 2)
-        self.assertEquals(parser.getNbTranscripts(), 0)
-
-    def test_nbOverlapsMax_pos(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}])
-        parser = self._startTool(1, None, None, None, None, 1)
-        self.assertEquals(parser.getNbTranscripts(), 1)
-        for transcript in parser.getIterator():
-            self._checkTranscript(transcript, 1000, 2000, 1)
-
-    def test_nbOverlapsMax_neg(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 2000, "strand": "+"}, {"cpt": 2, "start": 1000, "end": 2000, "strand": "+"}])
-        parser = self._startTool(1, None, None, None, None, 1)
-        self.assertEquals(parser.getNbTranscripts(), 0)
-
-    def test_nbNucleotidesMin_pos(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1100, "strand": "+"}])
-        parser = self._startTool(100, None, None, None, 1)
-        self.assertEquals(parser.getNbTranscripts(), 1)
-        for transcript in parser.getIterator():
-            self._checkTranscript(transcript, 1000, 2000, 1)
-
-    def test_nbNucleotidesMin_neg(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1100, "strand": "+"}])
-        parser = self._startTool(200, None, None, None, 1)
-        self.assertEquals(parser.getNbTranscripts(), 0)
-
-    def test_PercentMin_pos(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "+"}])
-        parser = self._startTool(None, None, 50, None, 1)
-        self.assertEquals(parser.getNbTranscripts(), 1)
-        for transcript in parser.getIterator():
-            self._checkTranscript(transcript, 1000, 2000, 1)
-
-    def test_PercentMin_neg(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "+"}])
-        parser = self._startTool(None, None, 100, None, 1)
-        self.assertEquals(parser.getNbTranscripts(), 0)
-
-    def test_NoStrand_neg(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "-"}])
-        parser = self._startTool(1, None, None, None, 1)
-        self.assertEquals(parser.getNbTranscripts(), 1)
-
-    def test_strand_pos(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "+"}])
-        parser = self._startTool(1, None, None, None, 1, None, True)
-        self.assertEquals(parser.getNbTranscripts(), 1)
-
-    def test_strand_neg(self):
-        self._writeReferences([{"cpt": 1, "start": 1000, "end": 1500, "strand": "-"}])
-        parser = self._startTool(1, None, None, None, 1, None, True)
-        self.assertEquals(parser.getNbTranscripts(), 0)
-
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_clusterizeBySlidingWindows.py
--- a/SMART/Java/Python/test/Test_F_clusterizeBySlidingWindows.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,79 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-from SMART.Java.Python.misc import Utils
-
-
-class Test_F_clusterizeBySlidingWindows(unittest.TestCase):
-
-    def setUp(self):
-        self._outputFileName         = 'output.gff3'
-        self._outputCsvFileName      = 'output.csv'
-        self._outputPngFileName      = 'output.png'
-        self._expectedOutputFileName = 'expOut.png'
-        
-    def tearDown(self):
-        for fileName in (self._outputFileName, self._outputCsvFileName, self._outputPngFileName, self._expectedOutputFileName):
-            if os.path.exists(fileName):
-                os.remove(fileName)
-        
-    def test_run_default_option(self):
-        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerExpected.gff3 -f gff3 -o %s -s 50000 -e 50 -v 0" % (self._outputFileName)
-        os.system(cmd)
-        exp = '../TestFiles/clusterize_default_expected.gff3' 
-        self.assertTrue(Utils.diff(exp, self._outputFileName))
-        
-    def test_run_default_option_map_output(self):
-        self._outputFileName         = 'output.map'
-        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerExpected.gff3 -f gff3 -o %s -s 50000 -e 50 -u map -v 0" % (self._outputFileName)
-        os.system(cmd)
-        exp = '../TestFiles/clusterize_default_expected.map' 
-        self.assertTrue(Utils.diff(exp, self._outputFileName))
-        
-    def test_run_newTag_option(self):
-        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerExpected.gff3 -f gff3 -o %s -s 50000 -e 50 -w newTag -v 0" % (self._outputFileName)
-        os.system(cmd)
-        exp = '../TestFiles/clusterize_output_tag_expected.gff3' 
-        self.assertTrue(Utils.diff(exp, self._outputFileName))
-        
-    def test_run_normalize_option(self):
-        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerOutput.gff3 -f gff3 -o %s -s 50000 -e 50 -m -v 0" % (self._outputFileName)
-        os.system(cmd)
-        exp = '../TestFiles/clusterize_normalize_expected.gff3' 
-        self.assertTrue(Utils.diff(exp, self._outputFileName))
-
-    def test_run_strand_option(self):
-        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerOutput.gff3 -f gff3 -o %s -s 50000 -e 50 -2 -v 0"  % (self._outputFileName)
-        os.system(cmd)
-        exp = '../TestFiles/clusterize_strands_expected.gff3'
-        self.assertTrue(Utils.diff(exp, self._outputFileName))
-
-    def test_run_excel_option(self):
-        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerOutput.gff3 -f gff3 -o %s -s 50000 -e 50 -x %s -v 0" % (self._outputFileName, self._outputCsvFileName)
-        os.system(cmd)
-        self.assertTrue(FileUtils.isRessourceExists(self._outputCsvFileName))
-        
-        
-    def test_run_normalize_strand_excel_option(self):
-        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/mapperAnalyzerOutput.gff3 -f gff3 -o %s -s 50000 -e 50 -m -2 -x %s -v 0" % (self._outputFileName, self._outputCsvFileName)
-        os.system(cmd)
-        self.assertTrue(FileUtils.isRessourceExists(self._outputCsvFileName))
-      
-    def test_run_tag_operation_options(self):
-        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/testSW.gff3 -f gff -s 100 -e 0 -g value1 -r min -o %s -v 0" % (self._outputFileName)
-        os.system(cmd)
-        exp = open(self._expectedOutputFileName, 'w')
-        exp.write("chr1\tS-MART\ttranscript\t1\t100\t.\t+\t.\tnbElements=0;minValue1=0;ID=region1;Name=region1\n")
-        exp.write("chr1\tS-MART\ttranscript\t101\t200\t.\t+\t.\tnbElements=2.000000;minValue1=1.0;ID=region2;Name=region2\n")
-        exp.write("chr1\tS-MART\ttranscript\t201\t300\t.\t+\t.\tnbElements=2.000000;minValue1=10.0;ID=region3;Name=region3\n")
-        exp.close()   
-        self.assertTrue(Utils.diff(self._outputFileName, self._expectedOutputFileName))
-        
-    def test_run_plot_options(self):
-        cmd = "python ../clusterizeBySlidingWindows.py -i ../TestFiles/testSW.gff3 -f gff -s 100 -e 0 -g value1 -r min -o %s -p %s -v 0" % (self._outputFileName, self._outputPngFileName)
-        os.system(cmd)     
-        self.assertTrue(FileUtils.isRessourceExists(self._outputPngFileName))
-        
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_compareOverlapping.py
--- a/SMART/Java/Python/test/Test_F_compareOverlapping.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,199 +0,0 @@\n-import unittest\n-import os, os.path\n-from SMART.Java.Python.misc import Utils\n-\n-SMART_PATH = os.environ["REPET_PATH"] + "/SMART"\n-\n-class Test_F_compareOverlapping(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._inputFileQuery     = "inputFileTest1.bed"\n-        self._inputFileReference = "inputFileTest2.bed"\n-        self._inputFileName3     = "inputFileTest3.bed"\n-        self._expOutputFileName  = "expOutput.gff3"  \n-        self._outputFileName     = "output.gff3"   \n-        self._writeInputFileQuery()\n-        self._writeInputFileRefence()\n-        self._writeInputFile3()  \n-        \n-    def tearDown(self):\n-        for fileName in (self._inputFileQuery, self._inputFileReference, self._inputFileName3, self._expOutputFileName, self._outputFileName):\n-            if os.path.exists(fileName):\n-                os.remove(fileName)\n-\n-    def test_runAsScript_withoutOption(self):\n-        self._writeOutputFile_withoutOption(self._expOutputFileName) \n-        os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-            \n-    def test_runAsScript_optionNFirstNucleotide(self):\n-        self._writeOutputFile_NFirstNucleotide(self._expOutputFileName) \n-        os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -S 200 -s 200 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-\n-    def test_runAsScript_optionNLastNucleotide(self):\n-        self._writeOutputFile_NLastNucleotide(self._expOutputFileName) \n-        os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -U 200 -u 200 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-\n-    def test_runAsScript_option_5PrimeExtension(self):\n-        self._writeOutputFile_5PrimeExtension(self._expOutputFileName) \n-        os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -E 110 -e 110 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-\n-    def test_runAsScript_option_3PrimeExtension(self):\n-        self._writeOutputFile_3PrimeExtension(self._expOutputFileName) \n-        os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -N 110 -n 110 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-        \n-    def test_runAsScript_colinear(self):\n-        self._writeOutputFile_colinear(self._expOutputFileName) \n-        os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -c -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-\n-    def test_runAsScript_antisense(self):\n-        self._writeOutputFile_antisense(self._expOutputFileName) \n-        os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -a -v 0" % (SMART_PATH, self._inputFileQuery, self._inputFileReference, self._outputFileName))\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-\n-    def test_runAsScript_minOverlap(self):\n-        self._writeOutputFile_minOverlap(self._expOutputFileName) \n-        os.system("python %s/Java/Python/CompareOverlapping.py -i %s -f bed -j %s -g bed -o %s -m 51 -v 0" % (SMART_PATH, self._inputFileQuery, self._inputF'..b'890\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n-        f.close()   \n-        \n-    def _writeOutputFile_colinear(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t100\\t3199\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.4;Name=test1.4\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n-        f.close()    \n-        \n-    def _writeOutputFile_antisense(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n-        f.close()  \n-           \n-    def _writeOutputFile_minOverlap(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t100\\t3199\\t.\\t+\\t.\\toverlapWith=test2.3,test2.2,test2.1;nbOverlaps=3.000000;ID=test1.4;Name=test1.4\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n-        f.close() \n-        \n-    def _writeOutputFile_pcOverlap(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n-        f.close() \n-        \n-    def _writeOutputFile_includeNotOverlap(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t100\\t3199\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.4;Name=test1.4\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t100\\t199\\t.\\t+\\t.\\tnbOverlaps=0;ID=test1.3;Name=test1.3\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n-        f.close()\n-\n-    def _writeOutputFile_exclude(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t100\\t199\\t.\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n-        f.close()\n-        \n-    def _writeOutputFile_included(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n-        f.close()\n-\n-    def _writeOutputFile_including(self, outputFileName):\n-        f = open(outputFileName, "w")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t100\\t3199\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.4;Name=test1.4\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.1;Name=test1.1\\n")\n-        f.write("arm_X\\tS-MART\\ttranscript\\t1000\\t1999\\t.\\t-\\t.\\toverlapWith=test2.1;nbOverlaps=1.000000;ID=test1.2;Name=test1.2\\n")\n-        f.close()\n-\n-    def _writeInputFileQuery_withExon(self):\n-        f = open(self._inputFileQuery, "w")\n-        f.write("arm_X\\t1000\\t2000\\ttest1.1\\t1000\\t+\\t1000\\t2000\\t0\\t1\\t1000,\\t0,\\n")\n-        f.write("arm_X\\t1000\\t2000\\ttest1.2\\t1000\\t-\\t1000\\t2000\\t0\\t1\\t1000,\\t0,\\n")\n-        f.write("arm_X\\t100\\t200\\ttest1.3\\t1000\\t+\\t100\\t200\\t0\\t1\\t100,\\t0,\\n")\n-        f.write("arm_X\\t100\\t3200\\ttest1.4\\t1000\\t+\\t100\\t3200\\t0\\t2\\t100,100,\\t0,3000,\\n")\n-        f.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_convertTranscriptFile.py
--- a/SMART/Java/Python/test/Test_F_convertTranscriptFile.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,125 +0,0 @@\n-import unittest\n-import os\n-from SMART.Java.Python.misc import Utils\n-from commons.core.utils.FileUtils import FileUtils\n-\n-\n-class Test_F_convertTranscriptFile(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._inputFileName     = None\n-        self._expOutputFileName = None\n-        self._outputFileName    = None\n-\n-    def tearDown(self):\n-        for fileName in (self._expOutputFileName, self._outputFileName):\n-            if fileName != None and os.path.exists(fileName):\n-                os.remove(fileName)\n-\n-    def test_run_SAMtoGFF3(self):\n-        self._inputFileName = "%s/SMART/Java/Python/test/input.sam" % (os.environ["REPET_PATH"])\n-        self._writeInputSam(self._inputFileName)\n-        self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.gff3\' % (os.environ["REPET_PATH"])\n-        cmd =  \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f sam -o %s -g gff3 -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n-        os.system(cmd)\n-        self._expOutputFileName = \'%s/SMART/Java/Python/test/expOutput.gff3\' % (os.environ["REPET_PATH"])\n-        self._writeExpGff3File_test1(self._expOutputFileName)\n-        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-        os.remove(self._inputFileName)\n-        \n-    def test_run_BEDtoGFF3(self):\n-        self._inputFileName = "%s/SMART/Java/Python/TestFiles/test_distance.bed" % (os.environ["REPET_PATH"])\n-        self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.gff3\' % (os.environ["REPET_PATH"])\n-        cmd =  \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f bed -o %s -g gff3 -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n-        os.system(cmd)\n-        self._expOutputFileName = \'%s/SMART/Java/Python/test/expOutput.gff3\' % (os.environ["REPET_PATH"])\n-        self._writeExpGff3File_test2(self._expOutputFileName)\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-              \n-    def test_run_GFF3toCSV(self):\n-        self._inputFileName = "%s/SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3" % (os.environ["REPET_PATH"])\n-        self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.csv\' % (os.environ["REPET_PATH"])\n-        cmd =  \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f gff3 -o %s -g csv -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n-        os.system(cmd)\n-        self._expOutputFileName = \'%s/SMART/Java/Python/test/expOutput.csv\' % (os.environ["REPET_PATH"])\n-        self._writeExpCsvFile(self._expOutputFileName)\n-        \n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-              \n-    def test_run_GFF3toSam(self):\n-        self._inputFileName = "%s/SMART/Java/Python/TestFiles/mapperAnalyzerExpected.gff3" % (os.environ["REPET_PATH"])\n-        self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.sam\' % (os.environ["REPET_PATH"])\n-        cmd =  \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f gff3 -o %s -g sam -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n-        os.system(cmd)\n-        self._expOutputFileName = \'%s/SMART/Java/Python/test/expOutput.sam\' % (os.environ["REPET_PATH"])\n-        self._writeExpSamFile(self._expOutputFileName)\n-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))\n-\n-    def test_run_Gff3toWig(self):\n-        self._inputFileName = \'%s/SMART/Java/Python/TestFiles/sorted_query.gff3\' % (os.environ["REPET_PATH"])\n-        self._outputFileName = \'%s/SMART/Java/Python/test/obsOutput.wig\' % (os.environ["REPET_PATH"])\n-        cmd =  \'python %s/SMART/Java/Python/convertTranscriptFile.py -i %s -f gff3 -o %s -g wig -v 0\' % (os.environ["REPET_PATH"], self._inputFileName, self._outputFileName)\n-        os.system(cmd) \n-        outputFile = \'%s/SMART/Java/Python/Test'..b'WWWWVWWWWVVWWWWWVVWWWWVVWWTTTTTR\\tXT:A:U\\tNM:i:0\\tSM:i:37\\tAM:i:37\\tX0:i:1\\tX1:i:0\\tXM:i:0\\tXO:i:0\\tXG:i:0\\tMD:Z:36\\n\' )\n-        file.close()\n-        \n-    def _writeExpGff3File_test1(self, fileName):\n-        file = open(fileName, \'w\')\n-        file.write( \'C02HBa0185P07_LR40\\tSMART\\ttranscript\\t3889\\t3924\\t.\\t-\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\\n\')\n-        file.write( \'C02HBa0185P07_LR40\\tSMART\\ttranscript\\t3830\\t3865\\t.\\t+\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:415:1217;identity=100.000000;Name=HWI-EAS337_3:7:1:415:1217\\n\')\n-        file.write( \'C11SLe0053P22_LR298\\tSMART\\ttranscript\\t2130\\t2165\\t.\\t-\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\\n\')\n-        file.write( \'C11SLe0053P22_LR298\\tSMART\\ttranscript\\t1980\\t2015\\t.\\t+\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:1178:755;identity=100.000000;Name=HWI-EAS337_3:7:1:1178:755\\n\')\n-        file.write( \'C06HBa0144J05_LR355\\tSMART\\ttranscript\\t1\\t36\\t.\\t+\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\\n\')\n-        file.write( \'C06HBa0144J05_LR355\\tSMART\\ttranscript\\t101\\t136\\t.\\t-\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:277:1259;identity=100.000000;Name=HWI-EAS337_3:7:1:277:1259\\n\')\n-        file.write( \'C08HBa0165B06_LR218\\tSMART\\ttranscript\\t3619\\t3654\\t.\\t-\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\\n\')\n-        file.write( \'C08HBa0165B06_LR218\\tSMART\\ttranscript\\t3575\\t3610\\t.\\t+\\t.\\tquality=60;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0.000000;ID=HWI-EAS337_3:7:1:447:1231;identity=100.000000;Name=HWI-EAS337_3:7:1:447:1231\\n\')\n-        file.close()\n-        \n-    def _writeExpGff3File_test2(self, fileName):\n-        file = open(fileName, \'w\')\n-        file.write( \'arm_X\\tSMART\\ttranscript\\t1000\\t1999\\t.\\t+\\t.\\tID=test2.1;Name=test2.1\\n\' )\n-        file.write( \'arm_X\\tSMART\\ttranscript\\t250\\t349\\t.\\t+\\t.\\tID=test2.2;Name=test2.2\\n\' )\n-        file.write( \'arm_X\\tSMART\\ttranscript\\t150\\t249\\t.\\t+\\t.\\tID=test2.3;Name=test2.3\\n\' )\n-        file.close()\n-        \n-    def _writeExpCsvFile(self, fileName):\n-        file = open(fileName, \'w\')      \n-        file.write( \'chromosome,start,end,strand,exons,ID,bestRegion,feature,identity,nbGaps,nbMismatches,nbOccurrences,occurrence,rank,score\\n\' )\n-        file.write( \'chr1,6155418,6155441,"+",None,test1/1,(self),match,100,0,0,1,1,1,24\\n\' )\n-        file.write( \'chr2,26303950,26303981,"+",None,test2/1-1,(self),match,93,0,2,3,1,1,32\\n\' )\n-        file.write( \'chr3,28320540,28320574,"+",None,test2/1-2,chr2:26303950-26303981,match,94,0,2,3,2,None,35\\n\' )\n-        file.write( \'chr4,28565007,28565041,"+",None,test2/1-3,chr2:26303950-26303981,match,88,0,4,3,3,3,35\\n\' )\n-        file.close()\n-        \n-    def _writeExpSamFile(self, fileName):\n-        file = open(fileName, \'w\')\n-        file.write(\'@SQ\\tSN:chr4\\tLN:28565041\\n\')\n-        file.write(\'@SQ\\tSN:chr3\\tLN:28320574\\n\')\n-        file.write(\'@SQ\\tSN:chr2\\tLN:26303981\\n\')\n-        file.write(\'@SQ\\tSN:chr1\\tLN:6155441\\n\')\n-        file.write(\'test1/1\\t0\\tchr1\\t6155418\\t255\\t24M\\t*\\t0\\t0\\t*\\t*\\tNM:i:0\\n\')\n-        file.write(\'test2/1\\t0\\tchr2\\t26303950\\t255\\t32M\\t*\\t0\\t0\\t*\\t*\\tNM:i:0\\n\')\n-        file.write(\'test2/1\\t0\\tchr3\\t28320540\\t255\\t35M\\t*\\t0\\t0\\t*\\t*\\tNM:i:0\\n\')\n-        file.write(\'test2/1\\t0\\tchr4\\t28565007\\t255\\t35M\\t*\\t0\\t0\\t*\\t*\\tNM:i:0\\n\')\n-        file.close()\n-\n-\n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_coordinatesToSequence.py
--- a/SMART/Java/Python/test/Test_F_coordinatesToSequence.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,31 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-
-TESTFILES_PATH = os.environ['REPET_PATH'] + '/SMART/Java/Python/TestFiles'
-
-class Test_F_coordinatesToSequence(unittest.TestCase):
-
-    def test_run(self):
-        cmd = "python ../coordinatesToSequence.py -i %s/testC2S.gff3 -f gff3 -s %s/testC2S.fa -o testOut.fa -v 10 " % (TESTFILES_PATH, TESTFILES_PATH)
-        os.system(cmd)
-        obs = 'testOut.fa'
-        exp = 'expOut.fa'
-        self._writeExpOut(exp)
-        self.assertTrue(FileUtils.isRessourceExists(obs))
-        self.assertTrue(FileUtils.are2FilesIdentical(obs, exp))
-        os.remove(obs)
-        os.remove(exp)       
-
-    def _writeExpOut(self, outputFileName):
-        f = open(outputFileName, "w")
-        f.write(">region0\n")
-        f.write("CAACATTAGC\n")
-        f.write(">region1\n")
-        f.write("TTAGCCGGCC\n")
-        f.write(">region2\n")
-        f.write("GGCCGGCTAA\n")
-        f.close()
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_findTss.py
--- a/SMART/Java/Python/test/Test_F_findTss.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,93 +0,0 @@
-import unittest
-import os, os.path
-from SMART.Java.Python.misc import Utils
-
-
-TestF_PATH = os.environ['REPET_PATH'] + '/SMART/Java/Python/TestFiles'
-
-
-class Test_F_findTss(unittest.TestCase):
-    
-    def setUp(self):
-        self._outputFileName = 'output.gff3'
-        self._expOutputFileName = 'expOutput.gff3'
-        
-    def tearDown(self):
-        os.remove(self._outputFileName)
-        os.remove(self._expOutputFileName)
-
-    def test_run_default_option(self):
-        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -v 0" % TestF_PATH
-        os.system(cmd)
-        self._writeExpDefaultOption(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-        
-    def test_run_normalize_option(self):
-        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -n -v 0" % TestF_PATH
-        os.system(cmd)
-        self._writeExpNormalizeOption(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-
-    def test_run_distance_option(self):
-        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -d 20 -v 0" % TestF_PATH
-        os.system(cmd)
-        self._writeExpDistance_option(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-
-    def test_run_colinear_option(self):
-        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -e -v 0" % TestF_PATH
-        os.system(cmd)
-        self._writeExpColinearOption(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-        
-    def test_run_excel_option(self):
-        cmd = "python ../findTss.py -i %s/mapperAnalyzerExpected.gff3 -f gff3 -o output.gff3 -c output.csv -v 0" % TestF_PATH
-        os.system(cmd)
-        obsCsv = 'output.csv'
-        self.assertTrue(os.path.exists(obsCsv))
-        os.remove(obsCsv)
-        self._writeExpExcelOption(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-
-    def _writeExpDefaultOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
-        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
-        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
-        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
-        f.close()
-        
-    def _writeExpNormalizeOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
-        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
-        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
-        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
-        f.close()
-        
-    def _writeExpDistance_option(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
-        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
-        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
-        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
-        f.close()
-        
-    def _writeExpColinearOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
-        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
-        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
-        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
-        f.close()
-        
-    def _writeExpExcelOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tS-MART\tmatch\t6155418\t6155418\t24\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1\n")
-        f.write("chr2\tS-MART\tmatch\t26303950\t26303950\t32\t+\t.\toccurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1\n")
-        f.write("chr3\tS-MART\tmatch\t28320540\t28320540\t35\t+\t.\toccurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1\n")
-        f.write("chr4\tS-MART\tmatch\t28565007\t28565007\t35\t+\t.\toccurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1\n")
-        f.close()
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_getExons.py
--- a/SMART/Java/Python/test/Test_F_getExons.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,145 +0,0 @@
-import unittest
-import os
-from SMART.Java.Python.getExons import GetExons
-from commons.core.parsing.GffParser import GffParser
-
-class Test_F_GetExons(unittest.TestCase):
-
-    def setUp(self):
-        self._inputFileName = "inputFileTest.bed"
-        self._writeInputFile()
-        self._outputFileName = "output.gff3"
-        
-    def tearDown(self):
-        os.remove(self._inputFileName)
-        os.remove(self._outputFileName)
-
-    def test_run_simple(self):
-        ge = GetExons(0)
-        ge.setInputFile(self._inputFileName, "bed")
-        ge.setOutputFile(self._outputFileName)
-        ge.run()
-        parser = GffParser(self._outputFileName, 0)
-        self.assertEqual(parser.getNbTranscripts(), 5)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            self.assertEqual(transcript.getChromosome(), "arm_X")
-            self.assertEqual(transcript.getDirection(), 1)
-            self.assertEqual(transcript.getNbExons(), 1)
-            if cpt == 0:
-                self.assertEqual(transcript.getStart(), 10000001)
-                self.assertEqual(transcript.getEnd(),   10000100)
-            elif cpt == 1:
-                self.assertEqual(transcript.getStart(), 10000201)
-                self.assertEqual(transcript.getEnd(),   10000300)
-            elif cpt == 2:
-                self.assertEqual(transcript.getStart(), 10000401)
-                self.assertEqual(transcript.getEnd(),   10000500)
-            elif cpt == 3:
-                self.assertEqual(transcript.getStart(), 10000601)
-                self.assertEqual(transcript.getEnd(),   10000700)
-            elif cpt == 4:
-                self.assertEqual(transcript.getStart(), 10000801)
-                self.assertEqual(transcript.getEnd(),   10000900)
-
-    def test_run_firstExon(self):
-        ge = GetExons(0)
-        ge.setInputFile(self._inputFileName, "bed")
-        ge.setOutputFile(self._outputFileName)
-        ge.setSelection("1")
-        ge.run()
-        parser = GffParser(self._outputFileName, 0)
-        self.assertEqual(parser.getNbTranscripts(), 1)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            self.assertEqual(transcript.getChromosome(), "arm_X")
-            self.assertEqual(transcript.getDirection(), 1)
-            self.assertEqual(transcript.getNbExons(), 1)
-            self.assertEqual(transcript.getStart(), 10000001)
-            self.assertEqual(transcript.getEnd(),   10000100)
-
-    def test_run_lastExon(self):
-        ge = GetExons(0)
-        ge.setInputFile(self._inputFileName, "bed")
-        ge.setOutputFile(self._outputFileName)
-        ge.setSelection("-1")
-        ge.run()
-        parser = GffParser(self._outputFileName, 0)
-        self.assertEqual(parser.getNbTranscripts(), 1)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            self.assertEqual(transcript.getChromosome(), "arm_X")
-            self.assertEqual(transcript.getDirection(), 1)
-            self.assertEqual(transcript.getNbExons(), 1)
-            self.assertEqual(transcript.getStart(), 10000801)
-            self.assertEqual(transcript.getEnd(),   10000900)
-
-    def test_run_first_lastExon(self):
-        ge = GetExons(0)
-        ge.setInputFile(self._inputFileName, "bed")
-        ge.setOutputFile(self._outputFileName)
-        ge.setSelection("1,-1")
-        ge.run()
-        parser = GffParser(self._outputFileName, 0)
-        self.assertEqual(parser.getNbTranscripts(), 2)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            self.assertEqual(transcript.getChromosome(), "arm_X")
-            self.assertEqual(transcript.getDirection(), 1)
-            self.assertEqual(transcript.getNbExons(), 1)
-            if cpt == 0:
-                self.assertEqual(transcript.getStart(), 10000001)
-                self.assertEqual(transcript.getEnd(),   10000100)
-            elif cpt == 1:
-                self.assertEqual(transcript.getStart(), 10000801)
-                self.assertEqual(transcript.getEnd(),   10000900)
-
-    def test_run_interval(self):
-        ge = GetExons(0)
-        ge.setInputFile(self._inputFileName, "bed")
-        ge.setOutputFile(self._outputFileName)
-        ge.setSelection("2..3")
-        ge.run()
-        parser = GffParser(self._outputFileName, 0)
-        self.assertEqual(parser.getNbTranscripts(), 2)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            self.assertEqual(transcript.getChromosome(), "arm_X")
-            self.assertEqual(transcript.getDirection(), 1)
-            self.assertEqual(transcript.getNbExons(), 1)
-            if cpt == 0:
-                self.assertEqual(transcript.getStart(), 10000201)
-                self.assertEqual(transcript.getEnd(),   10000300)
-            elif cpt == 1:
-                self.assertEqual(transcript.getStart(), 10000401)
-                self.assertEqual(transcript.getEnd(),   10000500)
-
-    def test_run_interval_element(self):
-        ge = GetExons(0)
-        ge.setInputFile(self._inputFileName, "bed")
-        ge.setOutputFile(self._outputFileName)
-        ge.setSelection("2..3,-1")
-        ge.run()
-        parser = GffParser(self._outputFileName, 0)
-        self.assertEqual(parser.getNbTranscripts(), 3)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            self.assertEqual(transcript.getChromosome(), "arm_X")
-            self.assertEqual(transcript.getDirection(), 1)
-            self.assertEqual(transcript.getNbExons(), 1)
-            if cpt == 0:
-                self.assertEqual(transcript.getStart(), 10000201)
-                self.assertEqual(transcript.getEnd(),   10000300)
-            elif cpt == 1:
-                self.assertEqual(transcript.getStart(), 10000401)
-                self.assertEqual(transcript.getEnd(),   10000500)
-            elif cpt == 2:
-                self.assertEqual(transcript.getStart(), 10000801)
-                self.assertEqual(transcript.getEnd(),   10000900)
-
-
-
-    def _writeInputFile(self):
-        f = open(self._inputFileName, "w")
-        f.write("arm_X\t10000001\t10000900\ttest1.1\t100\t+\t10000100\t10000200\t0\t5\t100,100,100,100,100,\t0,200,400,600,800,\n")
-        f.close()
-        
-                
-
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_getLetterDistribution.py
--- a/SMART/Java/Python/test/Test_F_getLetterDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,118 +0,0 @@
-import os
-import sys
-import unittest
-from SMART.Java.Python.test.MockGetLetterDistribution import MockGetLetterDistributionFasta
-from SMART.Java.Python.test.MockGetLetterDistribution import MockGetLetterDistributionFastq
-from SMART.Java.Python.test.MockGetLetterDistribution import MockGetLetterDistributionExpectedCSV
-
-class Test_F_getLetterDistribution(unittest.TestCase):
-
- def tearDown(self):
- os.system("rm tmp*.*")
-
- def test_getLetterDistributionWithFasta(self):
- iFastaMock = MockGetLetterDistributionFasta()
- fastaFileName = "MockFasta_GetLetterDistribution.fa"
- iFastaMock.write(fastaFileName)
-
- outputName = "dummy_result_fasta"
- os.system("python %s/SMART/Java/Python/getLetterDistribution.py -i %s -f fasta -o %s" % (os.environ["REPET_PATH"], fastaFileName, outputName))
-
- self.assertTrue (os.path.exists(outputName + ".png"))
- self.assertTrue (os.path.exists(outputName + "PerNt.png"))
-
- os.remove(outputName + ".png")
- os.remove(outputName + "PerNt.png")
- os.remove(fastaFileName)
-
- def test_getLetterDistributionWithFastq(self):
- iFastqMock = MockGetLetterDistributionFastq()
- fastqFileName = "MockFastq_GetLetterDistribution.fastq"
- iFastqMock.write(fastqFileName)
-
- outputName = "dummy_result_fastq"
- os.system("python %s/SMART/Java/Python/getLetterDistribution.py -i %s -f fastq -o %s" % (os.environ["REPET_PATH"], fastqFileName, outputName))
-
- self.assertTrue (os.path.exists(outputName + ".png"))
- self.assertTrue (os.path.exists(outputName + "PerNt.png"))
-
- os.remove(fastqFileName)
- os.remove(outputName + ".png")
- os.remove(outputName + "PerNt.png")
-
- def test_getLetterDistributionWithFastaCSVOutput(self):
- iFastaMock = MockGetLetterDistributionFasta()
- fastaFileName = "MockFasta_GetLetterDistribution.fa"
- iFastaMock.write(fastaFileName)
-
- iCSVMock = MockGetLetterDistributionExpectedCSV()
- expCSVFileName = "expCSV.csv"
- iCSVMock.write(expCSVFileName)
-
- outputName = "dummy_result_fasta"
- os.system("python %s/SMART/Java/Python/getLetterDistribution.py -i %s -f fasta -o %s -c" % (os.environ["REPET_PATH"], fastaFileName, outputName))
-
- obsCSVFileName = outputName + ".csv"
-
- self.assertTrue (os.path.exists(outputName + ".png"))
- self.assertTrue (os.path.exists(outputName + "PerNt.png"))
- self.assertTrue (self._are2FilesIdentical(expCSVFileName, obsCSVFileName))
-
- os.remove(outputName + ".png")
- os.remove(outputName + "PerNt.png")
- os.remove(fastaFileName)
- os.remove(expCSVFileName)
- os.remove(obsCSVFileName)
-
- def test_getLetterDistributionWithFastqCVSOutput(self):
- iFastqMock = MockGetLetterDistributionFastq()
- fastqFileName = "MockFastq_GetLetterDistribution.fastq"
- iFastqMock.write(fastqFileName)
-
- iCSVMock = MockGetLetterDistributionExpectedCSV()
- expCSVFileName = "expCSV.csv"
- iCSVMock.write(expCSVFileName)
-
- outputName = "dummy_result_fastq"
- os.system("python %s/SMART/Java/Python/getLetterDistribution.py -i %s -f fastq -o %s -c" % (os.environ["REPET_PATH"], fastqFileName, outputName))
-
- obsCSVFileName = outputName + ".csv"
-
- self.assertTrue (os.path.exists(outputName + ".png"))
- self.assertTrue (os.path.exists(outputName + "PerNt.png"))
- self.assertTrue (self._are2FilesIdentical(expCSVFileName, obsCSVFileName))
-
- os.remove(fastqFileName)
- os.remove(outputName + ".png")
- os.remove(outputName + "PerNt.png")
- os.remove(expCSVFileName)
- os.remove(obsCSVFileName)
-
- def _are2FilesIdentical(self, file1, file2 ):
- tmpFile = "diff_%s_%s" % ( os.path.basename(file1), os.path.basename(file2) )
- cmd = "diff %s %s >> %s" % ( file1, file2, tmpFile )
- returnStatus = os.system( cmd )
- if returnStatus != 0:
- msg = "ERROR: 'diff' returned '%i'" % ( returnStatus )
- sys.stderr.write( "%s\n" % msg )
- sys.stderr.flush()
- os.remove( tmpFile )
- return False
- if self.isEmpty( tmpFile ):
- os.remove( tmpFile )
- return True
- else:
- os.remove( tmpFile )
- return False
-
- def getNbLinesInSingleFile(self, fileName):
- fileHandler = open(fileName, "r" )
- lines = fileHandler.readlines()
- fileHandler.close()
- return len(lines)
-
- def isEmpty(self, fileName):
- return 0 == self.getNbLinesInSingleFile( fileName )
-
-if __name__ == "__main__":
- unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_getRandomRegions.py
--- a/SMART/Java/Python/test/Test_F_getRandomRegions.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,66 +0,0 @@
-import unittest
-import os
-from commons.core.parsing.GffParser import GffParser
-from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator
-
-MIN_SIZE = 36
-MAX_SIZE = 100
-CHR      = "chr1"
-CHR_SIZE = 1000000
-NB_READS = 1000
-
-class Test_F_getRandomRegions(unittest.TestCase):
-
-    def setUp(self):
-        self._outputFileName = 'output.gff3'
-        
-    def tearDown(self):
-        for fileName in [self._outputFileName]:
-            if os.path.exists(fileName):
-                os.remove(fileName)
-        
-    def test_simple(self):
-        iRR = RandomRegionsGenerator(0)
-        iRR.setMinSize(MIN_SIZE)
-        iRR.setMaxSize(MAX_SIZE)
-        iRR.setGenomeSize(CHR_SIZE)
-        iRR.setChromosomeName(CHR)
-        iRR.setStrands(False)
-        iRR.setNumber(NB_READS)
-        iRR.setOutputFile(self._outputFileName)
-        iRR.run()
-        parser = GffParser(self._outputFileName, 0)
-        starts = set()
-        self.assertTrue(parser.getNbTranscripts(), CHR_SIZE)
-        for transcript in parser.getIterator():
-            start      = transcript.getStart()
-            end        = transcript.getEnd()
-            size       = transcript.getSize()
-            chromosome = transcript.getChromosome()
-            strand     = transcript.getDirection()
-            self.assertTrue(start not in starts)
-            self.assertTrue(start >= 1)
-            self.assertTrue(end <= CHR_SIZE)
-            self.assertEquals(chromosome, CHR)
-            self.assertEquals(strand, 1)
-            starts.add(start)
-
-    def test_both_strands(self):
-        iRR = RandomRegionsGenerator(0)
-        iRR.setMinSize(MIN_SIZE)
-        iRR.setMaxSize(MAX_SIZE)
-        iRR.setGenomeSize(CHR_SIZE)
-        iRR.setChromosomeName(CHR)
-        iRR.setStrands(True)
-        iRR.setNumber(NB_READS)
-        iRR.setOutputFile(self._outputFileName)
-        iRR.run()
-        parser = GffParser(self._outputFileName, 0)
-        strands = set()
-        for transcript in parser.getIterator():
-            strands.add(transcript.getDirection())
-        self.assertTrue(1 in strands)
-        self.assertTrue(-1 in strands)
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_getReadDistribution.py
--- a/SMART/Java/Python/test/Test_F_getReadDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,37 +0,0 @@
-import unittest
-import os, glob
-
-
-class Test_F_getReadDistribution(unittest.TestCase):
-
-    def setUp(self):
-        self.inputFileName  = "inputFile.fasta"
-        self.outputFileName = "outputFile"
-         
-    def tearDown(self):
-        for fileRoot in (self.inputFileName, self.outputFileName):
-            for file in glob.glob("%s*" % (fileRoot)):
-                os.remove(file)
-        os.system("rm .RData")
-
-    def test_run_simple(self):
-        handle = open(self.inputFileName, "w")
-        handle.write(">test1\n")
-        handle.write("AAAAAA\n")
-        handle.write(">test2\n")
-        handle.write("AAAAAA\n")
-        handle.write(">test3\n")
-        handle.write("CCCCCC\n")
-        handle.close()
-        handle.close()
-        os.system("python ../getReadDistribution.py -i %s -f fasta -n 1 -o %s -v 0" % (self.inputFileName, self.outputFileName))
-        self.assertTrue(os.path.exists("%s.png" % (self.outputFileName)))
-        handle = open("%s.txt" % (self.outputFileName))
-        lines = handle.readlines()
-        self.assertEquals(len(lines), 1)
-        self.assertEquals(lines[0], "AAAAAA\t2\n")
-
-        
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_getWigData.py
--- a/SMART/Java/Python/test/Test_F_getWigData.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,71 +0,0 @@
-import unittest
-import os, os.path
-from SMART.Java.Python.misc import Utils
-from SMART.Java.Python.getWigData import *
-
-class Test_F_getWigData(unittest.TestCase):
-
-
-    def setUp(self):
-        self._inputGffFileName = 'inputGff.gff3'
-        self._writeInputGff(self._inputGffFileName)
-        self._inputWigFileName = '../TestFiles/sorted_query_wig.wig'
-        self._expOutFileName = 'expOut.gff3'
-        self._outFileName = 'outGffWig.gff3'
-
-
-    def tearDown(self):
-        os.remove(self._inputGffFileName)
-        os.remove(self._expOutFileName)
-        os.remove(self._outFileName)
-
-
-    def test_getWigData_DefaultOption_asScript(self):
-        cmd = 'python ../getWigData.py -i %s -f gff3 -w %s -t wigValue -o %s -v 0' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
-        os.system(cmd)
-        self._writeExpDefaultOption(self._expOutFileName)
-        self.assertTrue(Utils.diff(self._expOutFileName, self._outFileName))
-        
-    def test_getWigData_strandsOption(self):
-        cmd = 'python ../getWigData.py -i %s -f gff3 -w %s -t wigValue -o %s -s -v 0' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
-        os.system(cmd)
-        self._writeExpStransOption(self._expOutFileName)
-        self.assertTrue(Utils.diff(self._expOutFileName, self._outFileName))  
-        
-    
-    def _writeInputGff(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tquery\ttest1.1\t25\t150\t126\t+\t.\tID=query_1;Name=test1.1\n")
-        f.write("chr1\tquery\ttest1.2\t70\t850\t781\t-\t.\tID=query_2;Name=test1.2\n")
-        f.write("chr1\tquery\ttest1.3\t550\t850\t201\t-\t.\tID=query_3;Name=test1.3\n")
-        f.write("chr1\tquery\ttest1.4\t925\t1025\t101\t+\t.\tID=query_4;Name=test1.4\n")
-        f.write("chr1\tquery\ttest1.5\t1201\t1210\t10\t+\t.\tID=query_5;Name=test1.5\n")
-        f.write("chr1\tquery\ttest1.6\t1500\t1600\t101\t+\t.\tID=query_6;Name=test1.6\n")
-        f.close()
-        
-    def _writeExpDefaultOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tS-MART\ttest1.1\t25\t150\t126\t+\t.\twigValue=1.64285714286;ID=query_1;Name=test1.1\n")
-        f.write("chr1\tS-MART\ttest1.2\t70\t850\t781\t-\t.\twigValue=1.48911651729;ID=query_2;Name=test1.2\n")
-        f.write("chr1\tS-MART\ttest1.3\t550\t850\t201\t-\t.\twigValue=2.0;ID=query_3;Name=test1.3\n")
-        f.write("chr1\tS-MART\ttest1.4\t925\t1025\t101\t+\t.\twigValue=1.0;ID=query_4;Name=test1.4\n")
-        f.write("chr1\tS-MART\ttest1.5\t1201\t1210\t10\t+\t.\twigValue=1.0;ID=query_5;Name=test1.5\n")
-        f.write("chr1\tS-MART\ttest1.6\t1500\t1600\t101\t+\t.\twigValue=1.0;ID=query_6;Name=test1.6\n")
-        f.close()
-        
-    def _writeExpStransOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tS-MART\ttest1.1\t25\t150\t126\t+\t.\twigValue=0.0;ID=query_1;Name=test1.1\n")
-        f.write("chr1\tS-MART\ttest1.2\t70\t850\t781\t-\t.\twigValue=0.0;ID=query_2;Name=test1.2\n")
-        f.write("chr1\tS-MART\ttest1.3\t550\t850\t201\t-\t.\twigValue=0.0;ID=query_3;Name=test1.3\n")
-        f.write("chr1\tS-MART\ttest1.4\t925\t1025\t101\t+\t.\twigValue=0.0;ID=query_4;Name=test1.4\n")
-        f.write("chr1\tS-MART\ttest1.5\t1201\t1210\t10\t+\t.\twigValue=0.0;ID=query_5;Name=test1.5\n")
-        f.write("chr1\tS-MART\ttest1.6\t1500\t1600\t101\t+\t.\twigValue=0.0;ID=query_6;Name=test1.6\n")
-        f.close() 
-        
-    
-        
-
-
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_getWigDistance.py
--- a/SMART/Java/Python/test/Test_F_getWigDistance.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,45 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-from SMART.Java.Python.getWigDistance import *
-
-class Test_F_getWigDistance(unittest.TestCase):
-
-
-    def setUp(self):
-        self._inputGffFileName = 'inputGff.gff3'
-        self._writeInputGff(self._inputGffFileName)
-        self._inputWigFileName = '../TestFiles/sorted_query_wig.wig'
-        self._outFileName = 'outGffWig.png'
-
-    def tearDown(self):
-        os.remove(self._inputGffFileName)
-        os.remove(self._outFileName)        
-
-    def test_getWigDistance_defaultOption(self):
-        cmd = 'python ../getWigDistance.py -i %s -f gff3 -w %s -d 10 -o %s' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
-        os.system(cmd)
-
-    def test_getWigDistance_strandsOption(self):
-        cmd = 'python ../getWigDistance.py -i %s -f gff3 -w %s -d 10 -o %s -s' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
-        os.system(cmd)      
-          
-    def test_getWigDistance_logOption(self):
-        cmd = 'python ../getWigDistance.py -i %s -f gff3 -w %s -d 10 -o %s -l' % (self._inputGffFileName, self._inputWigFileName, self._outFileName)
-        os.system(cmd)
-    
-    def _writeInputGff(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tquery\ttest1.1\t25\t150\t126\t+\t.\tID=query_1;Name=test1.1\n")
-        f.write("chr1\tquery\ttest1.2\t70\t850\t781\t-\t.\tID=query_2;Name=test1.2\n")
-        f.write("chr1\tquery\ttest1.3\t550\t850\t201\t-\t.\tID=query_3;Name=test1.3\n")
-        f.write("chr1\tquery\ttest1.4\t925\t1025\t101\t+\t.\tID=query_4;Name=test1.4\n")
-        f.write("chr1\tquery\ttest1.5\t1201\t1210\t10\t+\t.\tID=query_5;Name=test1.5\n")
-        f.write("chr1\tquery\ttest1.6\t1500\t1600\t101\t+\t.\tID=query_6;Name=test1.6\n")
-        f.close()
-
-
-
-if __name__ == "__main__":
-    #import sys;sys.argv = ['', 'Test.testName']
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_getWigProfile.py
--- a/SMART/Java/Python/test/Test_F_getWigProfile.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,65 +0,0 @@
-import unittest
-import os, glob
-from SMART.Java.Python.getWigProfile import GetWigProfile
-
-
-class Test_F_GetWigProfile(unittest.TestCase):
-
-    def setUp(self):
-        self.transcriptFileName = "transcriptFile.gff3"
-        self.wigFileName        = "file.wig"
-        self.outputFileName     = "outputFile.png"
-         
-    def tearDown(self):
-        for fileRoot in (self.transcriptFileName, self.wigFileName, self.outputFileName):
-            for file in glob.glob("%s*" % (fileRoot)):
-                os.remove(file)
-        os.system("rm .RData .chr1.index ")
-
-    def test_run_simple(self):
-        handle = open(self.transcriptFileName, "w")
-        handle.write("chr1\tSMART\tmRNA\t10\t20\t.\t+\t.\tID=test1;Name=test1")
-        handle.close()
-        handle = open(self.wigFileName, "w")
-        handle.write("variableStep chrom=chr1\n")
-        handle.write("1 1\n")
-        handle.write("2 1\n")
-        handle.write("3 1\n")
-        handle.write("4 1\n")
-        handle.write("5 1\n")
-        handle.write("6 1\n")
-        handle.write("7 1\n")
-        handle.write("8 1\n")
-        handle.write("9 1\n")
-        handle.write("10 1\n")
-        handle.write("11 2\n")
-        handle.write("12 3\n")
-        handle.write("13 4\n")
-        handle.write("14 5\n")
-        handle.write("15 5\n")
-        handle.write("16 5\n")
-        handle.write("17 5\n")
-        handle.write("18 5\n")
-        handle.write("19 5\n")
-        handle.write("20 5\n")
-        handle.write("21 1\n")
-        handle.write("21 1\n")
-        handle.close()
-        wigProfile = GetWigProfile(0)
-        wigProfile.strands        = False
-        wigProfile.inputFileName  = self.transcriptFileName
-        wigProfile.inputFormat    = "gff3"
-        wigProfile.wig            = self.wigFileName
-        wigProfile.nbPoints       = 11
-        wigProfile.distance       = 1
-        wigProfile.smoothenForce  = None
-        wigProfile.log            = False
-        wigProfile.outputFileName = self.outputFileName
-        wigProfile.readTranscripts()
-        wigProfile.smoothen()
-        wigProfile.plot()
-        self.assertTrue(os.path.exists(self.outputFileName))
-        
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_mapperAnalyzer.py
--- a/SMART/Java/Python/test/Test_F_mapperAnalyzer.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,55 +0,0 @@
-import unittest
-import os, glob
-from SMART.Java.Python.mapperAnalyzer import MapperAnalyzer
-from commons.core.parsing.GffParser import GffParser
-
-class Test_F_mapperAnalyzer(unittest.TestCase):
-
-    def setUp(self):
-        self.readsFileName   = "inputFile.fastq"
-        self.mappingFileName = "inputFile.sam"
-        self.outputFileName  = "outputFile.gff3"
-         
-    def tearDown(self):
-        for fileRoot in (self.readsFileName, self.mappingFileName, self.outputFileName):
-            for file in glob.glob("%s*" % (fileRoot)):
-                os.remove(file)
-        os.system("rm tmpNames_* tmpSequences_* smartdb*")
-
-    def test_run_simple(self):
-        handle = open(self.readsFileName, "w")
-        handle.write("@read1\n")
-        handle.write("AAAAAAAAAAAAAAAAAAAA\n")
-        handle.write("+\n")
-        handle.write("AAAAAAAAAAAAAAAAAAAA\n")
-        handle.write("@read2\n")
-        handle.write("CCCCCCCCCCCCCCCCCCCC\n")
-        handle.write("+\n")
-        handle.write("AAAAAAAAAAAAAAAAAAAA\n")
-        handle.close()
-        handle = open(self.mappingFileName, "w")
-        handle.write("read1\t0\tchr1\t1\t30\t20M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAA\tAAAAAAAAAAAAAAAAAAAA\tNM:i:0\n")
-        handle.write("read2\t0\tchr2\t1\t30\t20M\t*\t0\t0\tCCCCCCCCCCCCCCCCCCCC\tAAAAAAAAAAAAAAAAAAAA\tNM:i:0\n")
-        handle.write("read2\t0\tchr3\t1\t30\t20M\t*\t0\t0\tCCCCCCCCCCCCCCCCCCCC\tAAAAAAAAAAAAAAAAAAAA\tNM:i:0\n")
-        handle.close()
-        analyzer = MapperAnalyzer(0)
-        analyzer.setMappingFile(self.mappingFileName, "sam")
-        analyzer.setSequenceFile(self.readsFileName, "fastq")
-        analyzer.setOutputFile(self.outputFileName, "S-MART")
-        analyzer.setMaxMappings(1)
-        analyzer.mergeExons(True)
-        analyzer.analyze()
-
-        parser = GffParser(self.outputFileName)
-        self.assertEqual(parser.getNbTranscripts(), 1)
-        for transcript in parser.getIterator():
-            self.assertEqual(transcript.getName(), "read1")
-            self.assertEqual(transcript.getChromosome(), "chr1")
-            self.assertEqual(transcript.getStart(), 1)
-            self.assertEqual(transcript.getEnd(), 20)
-            self.assertEqual(transcript.getDirection(), 1)
-            
-        
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_mappingToCoordinates.py
--- a/SMART/Java/Python/test/Test_F_mappingToCoordinates.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,22 +0,0 @@
-import unittest
-import os
-from SMART.Java.Python.misc import Utils
-
-class Test_F_mappingToCoordinates(unittest.TestCase):
-
-
-    def setUp(self):
-        self._inputFileName = 'inputMTC.sam'
-        self._outputFileName = 'outputGff.gff3'
-        self._expOutputFileName = '../TestFiles/expOutputGff.gff3'
-        
-    def tearDown(self):
-        os.remove(self._outputFileName)
-
-    def test_run_default_option(self):
-        cmd = 'python ../mappingToCoordinates.py -i ../TestFiles/%s -f sam -o %s -v 0' % (self._inputFileName, self._outputFileName)
-        os.system(cmd)
-        self.assertTrue(Utils.diff(self._outputFileName, self._expOutputFileName))
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_mergeSlidingWindowsClusters.py
--- a/SMART/Java/Python/test/Test_F_mergeSlidingWindowsClusters.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,80 +0,0 @@
-import unittest
-import os
-from SMART.Java.Python.mergeSlidingWindowsClusters import MergeSlidingWindowsClusters
-from SMART.Java.Python.misc import Utils
-
-class Test_F_mergeSlidingWindowsClusters(unittest.TestCase):
-
-
-    def setUp(self):
-        self._outputFileName = 'outputMSWC'
-        self._inputFileName1 = 'inputMSWC1.gff3'
-        self._inputFileName2 = 'inputMSWC2.gff3'
-        self._writeInput1(self._inputFileName1)
-        self._writeInput2(self._inputFileName2)
-        self._expOutput = 'expOutputMSWC.gff3'
-        self._writeExpOutput(self._expOutput)
-
-    def tearDown(self):
-        os.remove(self._inputFileName1)
-        os.remove(self._inputFileName2)
-        os.remove(self._outputFileName+'.gff3')
-        os.remove(self._expOutput)
-
-
-    def test_run(self):
-        iMSWC = MergeSlidingWindowsClusters(0)
-        iMSWC.addInput(self._inputFileName1, 'gff3')
-        iMSWC.addInput(self._inputFileName2, 'gff3')
-        iMSWC.setOutput(self._outputFileName)
-        iMSWC.merge()
-        self.assertTrue(Utils.diff(self._outputFileName+'.gff3', self._expOutput))
-        
-    def test_run_asScript(self):
-        cmd = 'python ../mergeSlidingWindowsClusters.py -i %s -f gff3 -j %s -g gff3 -o outputMSWC.gff3 --galaxy -v 0' % (self._inputFileName1, self._inputFileName2)
-        os.system(cmd)
-        self.assertTrue(Utils.diff(self._outputFileName+'.gff3', self._expOutput)) 
-        
-    def _writeInput1(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\ttest\tmatch\t6155418\t6155441\t24\t+\t.\tName=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100\n")
-        f.write("chr2\ttest\tmatch\t26303950\t26303981\t32\t+\t.\tName=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93\n")
-        f.write("chr3\ttest\tmatch\t28320540\t28320574\t35\t+\t.\tName=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94\n")
-        f.write("chr4\ttest\tmatch\t28565007\t28565041\t35\t+\t.\tName=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88\n")
-        f.write("chr6\ttest\tmatch\t48565007\t48565041\t35\t+\t.\tName=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80\n")
-        f.close()
-
-    def _writeInput2(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\ttest\tmatch\t6155418\t6155441\t24\t+\t.\tName=test1/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100\n")
-        f.write("chr2\ttest\tmatch\t26303990\t26304021\t32\t+\t.\tName=test2/1;occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93\n")
-        f.write("chr3\ttest\tmatch\t28320540\t28320574\t35\t+\t.\tName=test2/1;occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94\n")
-        f.write("chr4\ttest\tmatch\t28565017\t28565051\t35\t+\t.\tName=test2/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88\n")
-        f.write("chr5\ttest\tmatch\t30000000\t30000050\t50\t+\t.\tName=test3/1;occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=test3/1-4;identity=50\n")
-        f.close()   
-        
-    def _writeExpOutput(self, fileName):
-        f = open(fileName, 'w')
-        f.write("""chr6 S-MART match 48565007 48565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80;Name=region_1
-chr5 S-MART match 30000000 30000050 50 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=test3/1-4;identity=50;Name=region_2
-chr4 S-MART match 28565017 28565051 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=region_3
-chr4 S-MART match 28565007 28565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=region_4
-chr3 S-MART match 28320540 28320574 35 + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=region_5
-chr2 S-MART match 26303990 26304021 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=region_6
-chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=region_7
-chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=region_8
-""")
-#       f.write("chr6\tS-MART\tmatch\t48565007\t48565041\t35\t+\t.\tName=region_1;occurrence=3;feature=match;rank=3;score=35;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80\n")
-#       f.write("chr5\tS-MART\tmatch\t30000000\t30000050\t50\t+\t.\tName=region_2;occurrence=3;feature=match;rank=3;score=50;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=1;ID=test3/1-4;identity=50\n")
-#       f.write("chr4\tS-MART\tmatch\t28565017\t28565051\t35\t+\t.\tName=region_3;occurrence=3;feature=match;rank=3;score=35;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88\n")
-#       f.write("chr4\tS-MART\tmatch\t28565007\t28565041\t35\t+\t.\tName=region_4;occurrence=3;feature=match;rank=3;score=35;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88\n")
-#       f.write("chr3\tS-MART\tmatch\t28320540\t28320574\t35\t+\t.\tName=region_5;occurrence=2;feature=match;score=35;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94\n")
-#       f.write("chr2\tS-MART\tmatch\t26303990\t26304021\t32\t+\t.\tName=region_6;occurrence=1;feature=match;rank=1;score=32;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93\n")
-#       f.write("chr2\tS-MART\tmatch\t26303950\t26303981\t32\t+\t.\tName=region_7;occurrence=1;feature=match;rank=1;score=32;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93\n")
-#       f.write("chr1\tS-MART\tmatch\t6155418\t6155441\t24\t+\t.\tName=region_8;occurrence=1;feature=match;rank=1;score=24;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100\n")
-        f.close()
-    
-            
-
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_mergeTranscriptLists.py
--- a/SMART/Java/Python/test/Test_F_mergeTranscriptLists.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,92 +0,0 @@
-import unittest
-import os, os.path, glob
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.mergeTranscriptLists import MergeLists
-from commons.core.writer.Gff3Writer import Gff3Writer
-from commons.core.parsing.GffParser import GffParser
-
-class Test_F_mergeTranscriptLists(unittest.TestCase):
-
-    def setUp(self):
-        self.queryFileName     = "testQuery.gff3"
-        self.referenceFileName = "testReference.gff3"
-        self.outputFileName    = "testOutput.gff3"
-         
-    def tearDown(self):
-        for fileRoot in (self.queryFileName, self.referenceFileName, self.outputFileName):
-            for file in glob.glob("%s*" % (fileRoot)):
-                os.remove(file)
-        for file in glob.glob("tmp_*.gff3"):
-            os.remove(file)
-
-    def test_run_simple(self):
-        reference1 = self._createTranscript("chr1", 1000, 2000, "+", "ref1")
-        reference2 = self._createTranscript("chr1", 3000, 4000, "+", "ref2")
-        reference3 = self._createTranscript("chr1", 5000, 6000, "+", "ref3")
-        writer = Gff3Writer(self.referenceFileName, 0)
-        writer.addTranscript(reference1)
-        writer.addTranscript(reference2)
-        writer.addTranscript(reference3)
-        writer.close()
-        query1 = self._createTranscript("chr1", 1500, 3500, "+", "query1")
-        writer = Gff3Writer(self.queryFileName, 0)
-        writer.addTranscript(query1)
-        writer.close()
-        ml = MergeLists(0)
-        ml.setInputFileName(self.queryFileName, 'gff3', 0)
-        ml.setInputFileName(self.referenceFileName, 'gff3', 1)
-        ml.setOutputFileName(self.outputFileName)
-        ml.run()
-        parser = GffParser(self.outputFileName)
-        self.assertEqual(parser.getNbTranscripts(), 1)
-        for transcript in parser.getIterator():
-            self._checkTranscript(transcript, "chr1", 1000, 4000, "+", None)
-
-    def test_run_simple_aggregate(self):
-        reference1 = self._createTranscript("chr1", 1000, 2000, "+", "ref1")
-        reference2 = self._createTranscript("chr1", 3000, 4000, "+", "ref2")
-        reference3 = self._createTranscript("chr1", 5000, 6000, "+", "ref3")
-        writer = Gff3Writer(self.referenceFileName, 0)
-        writer.addTranscript(reference1)
-        writer.addTranscript(reference2)
-        writer.addTranscript(reference3)
-        writer.close()
-        query1 = self._createTranscript("chr1", 1500, 3500, "+", "query1")
-        writer = Gff3Writer(self.queryFileName, 0)
-        writer.addTranscript(query1)
-        writer.close()
-        ml = MergeLists(0)
-        ml.setInputFileName(self.queryFileName, 'gff3', 0)
-        ml.setInputFileName(self.referenceFileName, 'gff3', 1)
-        ml.setOutputFileName(self.outputFileName)
-        ml.setAggregate(True)
-        ml.run()
-        parser = GffParser(self.outputFileName)
-        self.assertEqual(parser.getNbTranscripts(), 2)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            if cpt == 0:
-                self._checkTranscript(transcript, "chr1", 1000, 4000, "+", None)
-            else:
-                self._checkTranscript(transcript, "chr1", 5000, 6000, "+", None)
-
-    def _createTranscript(self, chromosome, start, end, strand, name):
-        transcript = Transcript()
-        transcript.setChromosome(chromosome)
-        transcript.setStart(start)
-        transcript.setEnd(end)
-        transcript.setDirection(strand)
-        transcript.setName(name)
-        return transcript
-
-    def _checkTranscript(self, transcript, chromosome, start, end, strand, name):
-        self.assertEqual(transcript.getChromosome(), chromosome)
-        self.assertEqual(transcript.getStart(), start)
-        self.assertEqual(transcript.getEnd(), end)
-        self.assertEqual(transcript.getStrand(), strand)
-        if name != None:
-            self.assertEqual(transcript.getName(), name)
-
-        
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_plot.py
--- a/SMART/Java/Python/test/Test_F_plot.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,42 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-
-TestF_PATH = os.environ['REPET_PATH'] + '/SMART/Java/Python/TestFiles'
-
-
-class Test_F_plot(unittest.TestCase):
-    
-    def setUp(self):
-        self.outputFileName = "testOut.png"
-
-    def tearDown(self):
-        os.remove(self.outputFileName)
-
-    def test_run_default_option(self):
-        cmd = "python ../plot.py -i %s/mapperAnalyzerOutput.gff3 -f gff3 -x identity -y nbMismatches -s line -o %s " % (TestF_PATH, self.outputFileName)
-        os.system(cmd)
-        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
-
-    def test_run_log_option(self):
-        cmd = "python ../plot.py -i %s/mapperAnalyzerOutput.gff3 -f gff3 -x identity -y nbMismatches -s line -o %s -l xy" % (TestF_PATH, self.outputFileName)
-        os.system(cmd)
-        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
-        
-    def test_run_z_xDefault_yDefault_heatPoints_option(self):
-        cmd = "python ../plot.py -i %s/testPlot.gff3 -f gff3 -x value1 -X 1 -y value2 -Y 1 -z value3 -s heatPoints -o %s -v 10 " % (TestF_PATH, self.outputFileName)
-        os.system(cmd)
-        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
-        
-    def test_points_option(self):
-        cmd = "python ../plot.py -i %s/testPlot.gff3 -f gff3 -x value1 -X 1 -y value2 -Y 1 -s points -o %s -v 10 " % (TestF_PATH, self.outputFileName)
-        os.system(cmd)
-        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
-        
-    def test_xDefault_points_option(self):
-        cmd = "python ../plot.py -i %s/testPlot.gff3 -f gff3 -x value1 -X 1 -n 2 -s barplot -o %s -v 10 " % (TestF_PATH, self.outputFileName)
-        os.system(cmd)
-        self.assertTrue(FileUtils.isRessourceExists(self.outputFileName))
-    
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_plotCoverage.py
--- a/SMART/Java/Python/test/Test_F_plotCoverage.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,39 +0,0 @@
-import unittest
-import os, glob
-from SMART.Java.Python.plotCoverage import PlotParser
-
-
-class Test_F_PlotCoverage(unittest.TestCase):
-
-    def setUp(self):
-        self.queryFileName  = "queryFile.gff3"
-        self.refFileName    = "refFile.gff3"
-        self.outputFileName = "outputFile"
-         
-    def tearDown(self):
-        for fileRoot in (self.queryFileName, self.refFileName, self.outputFileName):
-            for file in glob.glob("%s*" % (fileRoot)):
-                os.remove(file)
-        os.remove(".RData")
-
-    def test_run_simple(self):
-        handle = open(self.refFileName, "w")
-        handle.write("chr1\tSMART\tmRNA\t1000\t2000\t.\t+\t.\tID=test1;Name=test1")
-        handle.close()
-        handle = open(self.queryFileName, "w")
-        handle.write("chr1\tSMART\tmRNA\t1100\t1200\t.\t+\t.\tID=test2.1;Name=test2.1\n")
-        handle.write("chr1\tSMART\tmRNA\t1300\t1400\t.\t+\t.\tID=test2.2;Name=test2.2\n")
-        handle.close()
-        pp = PlotParser(0)
-        pp.addInput(0, self.queryFileName, "gff3")
-        pp.addInput(1, self.refFileName, "gff3")
-        pp.setLabels("x", "y")
-        pp.setPlotSize(1000, 500)
-        pp.setOutput(self.outputFileName)
-        pp.start()
-        self.assertTrue(os.path.exists("%s_test1_overlap.png" % (self.outputFileName)))
-        self.assertTrue(os.path.exists("%s_test1_coverage.png" % (self.outputFileName)))
-        
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_qualToFastq.py
--- a/SMART/Java/Python/test/Test_F_qualToFastq.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,38 +0,0 @@
-import unittest
-import os, glob
-
-
-class Test_F_qualToFastq(unittest.TestCase):
-
-    def setUp(self):
-        self.fastaFileName  = "file.fasta"
-        self.qualFileName   = "file.qual"
-        self.outputFileName = "outputFile.fastq"
-         
-    def tearDown(self):
-        for fileRoot in (self.fastaFileName, self.qualFileName, self.outputFileName):
-            for file in glob.glob("%s*" % (fileRoot)):
-                os.remove(file)
-
-    def test_run_simple(self):
-        handle = open(self.fastaFileName, "w")
-        handle.write(">test1\n")
-        handle.write("AAAAAA")
-        handle.close()
-        handle = open(self.qualFileName, "w")
-        handle.write(">test1\n")
-        handle.write("32\t32\t32\t32\t32\t32")
-        handle.close()
-        os.system("python ../qualToFastq.py -f %s -q %s -o %s -v 0" % (self.fastaFileName, self.qualFileName, self.outputFileName))
-        handle = open(self.outputFileName)
-        lines = handle.readlines()
-        self.assertEquals(len(lines), 4)
-        self.assertEquals(lines[0], "@test1\n")
-        self.assertEquals(lines[1], "AAAAAA\n")
-        self.assertEquals(lines[2], "+\n")
-        self.assertEquals(lines[3], "AAAAAA\n")
-
-        
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_restrictSequenceList.py
--- a/SMART/Java/Python/test/Test_F_restrictSequenceList.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,37 +0,0 @@
-import unittest
-import os, glob
-
-class Test_F_restrictSequenceList(unittest.TestCase):
-
-    def setUp(self):
-        self.sequencesFileName = "inputFile.fasta"
-        self.namesFileName     = "names.txt"
-        self.outputFileName    = "outputFile.fasta"
-         
-    def tearDown(self):
-        for fileRoot in (self.sequencesFileName, self.namesFileName, self.outputFileName):
-            for file in glob.glob("%s*" % (fileRoot)):
-                os.remove(file)
-
-    def test_run_simple(self):
-        handle = open(self.sequencesFileName, "w")
-        handle.write(">sequence1\n")
-        handle.write("AAAAAAAAAAAAAAAAAAAA\n")
-        handle.write(">sequence2\n")
-        handle.write("CCCCCCCCCCCCCCCCCCCC\n")
-        handle.close()
-        handle = open(self.namesFileName, "w")
-        handle.write("""sequence1""")
-        handle.close()
-        os.system("python ../restrictSequenceList.py -i %s -f fasta -n %s -o %s -v 0" % (self.sequencesFileName, self.namesFileName, self.outputFileName))
-        handle = open(self.outputFileName)
-        lines = handle.readlines()
-        handle.close()
-        self.assertEqual(len(lines), 2)
-        self.assertEqual(lines[0], ">sequence1\n")
-        self.assertEqual(lines[1], "AAAAAAAAAAAAAAAAAAAA\n")
-
-        
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_selectByTag.py
--- a/SMART/Java/Python/test/Test_F_selectByTag.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,86 +0,0 @@
-import unittest
-import os
-from SMART.Java.Python.misc import Utils
-
-class Test_F_selectByTag(unittest.TestCase):
-
-    def setUp(self):
-        self._inputFileName = "%s/SMART/Java/Python/TestFiles/inputMSWC1.gff3" % os.environ["REPET_PATH"]
-        self._outputFileName = "outputSBT.gff3"
-        self._expOutputFileName = "expSBT.gff3"
-
-    def tearDown(self):
-        os.remove(self._outputFileName)
-        os.remove(self._expOutputFileName)
-
-    def test_run_compulsory_option(self):
-        cmd = 'python ../SelectByTag.py -i %s -f gff3 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
-        os.system(cmd)
-        self._writeExpDefault(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-        
-    def test_run_value_option(self):
-        cmd = 'python ../SelectByTag.py -i %s -f gff3 -a 1 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
-        os.system(cmd)
-        self._writeExpValueOption(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-        
-    def test_run_min_option(self):
-        cmd = 'python ../SelectByTag.py -i %s -f gff3 -m 3 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
-        os.system(cmd)
-        self._writeExpMinOption(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-    
-    def test_run_max_option(self):
-        cmd = 'python ../SelectByTag.py -i %s -f gff3 -M 1 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
-        os.system(cmd)
-        self._writeExpMaxOption(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-
-    def test_run_max_min_option(self):
-        cmd = 'python ../SelectByTag.py -i %s -f gff3 -M 2 -m 1 -g occurrence -o %s -v 0' % (self._inputFileName, self._outputFileName)
-        os.system(cmd)
-        self._writeExpMaxMinOption(self._expOutputFileName)
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._outputFileName))
-        
-    def _writeExpDefault(self, fileName):
-        f = open(fileName, 'w')
-        f.write("""chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1
-chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
-chr3 S-MART match 28320540 28320574 35 + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1
-chr4 S-MART match 28565007 28565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1
-chr6 S-MART match 48565007 48565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80;Name=test2/1
-""")
-        f.close()
-        
-    def _writeExpValueOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("""chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1
-chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
-""")
-        f.close()
-        
-    def _writeExpMinOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("""chr4 S-MART match 28565007 28565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=4;ID=test2/1-3;identity=88;Name=test2/1
-chr6 S-MART match 48565007 48565041 35 + . occurrence=3;rank=3;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=3;ID=test2/1-4;identity=80;Name=test2/1
-""")
-        f.close()
-        
-    def _writeExpMaxOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("""chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1
-chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
-""")
-        f.close()        
-        
-    def _writeExpMaxMinOption(self, fileName):
-        f = open(fileName, 'w')
-        f.write("""chr1 S-MART match 6155418 6155441 24 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=test1/1;identity=100;Name=test1/1
-chr2 S-MART match 26303950 26303981 32 + . occurrence=1;rank=1;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-1;identity=93;Name=test2/1
-chr3 S-MART match 28320540 28320574 35 + . occurrence=2;bestRegion=chr2:26303950-26303981;nbGaps=0;nbOccurrences=3;nbMismatches=2;ID=test2/1-2;identity=94;Name=test2/1
-""")
-        f.close()
-        
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_F_trimSequences.py
--- a/SMART/Java/Python/test/Test_F_trimSequences.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,28 +0,0 @@
-import unittest, os, os.path
-from SMART.Java.Python.misc import Utils
-SMART_PATH = os.environ["REPET_PATH"] + "/SMART"
-
-class Test_F_trimSequences(unittest.TestCase):
-
-    def setUp(self):
-        self._expOutputFileName = "expOutputFile.fasta"
-        self._obsOutputFileName = "obsOutputFile.mfa"
-        self._inputFileName     = "inputFile.fasta"
-
-    def tearDown(self):
-        for fileName in (self._expOutputFileName, self._obsOutputFileName, self._inputFileName):
-            if os.path.exists(fileName):
-                os.remove(fileName)
-
-    def test_simple(self):
-        expOutputFile = open(self._expOutputFileName, "w")
-        expOutputFile.write(">sequence1\nTTGCATAGCGCTACGTA\n")
-        expOutputFile.close()
-        inputFile = open(self._inputFileName, "w")
-        inputFile.write(">sequence1\nAGCTCGGGTATTGCATAGCGCTACGTACCCTTTATATC\n")
-        inputFile.close()
-        os.system("python %s/Java/Python/trimSequences.py -i %s -f fasta -3 CCCTTTATATC -5 AGCTCGGGTA -o %s -v 0" % (SMART_PATH, self._inputFileName, self._obsOutputFileName))
-        self.assertTrue(Utils.diff(self._expOutputFileName, self._obsOutputFileName))
-
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_FindOverlapsOptim.py
--- a/SMART/Java/Python/test/Test_FindOverlapsOptim.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,174 +0,0 @@\n-import unittest\n-import os\n-from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim\n-from SMART.Java.Python.ncList.NCListCursor import NCListCursor\n-from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *\n-\n-REFERENCE = 0\n-QUERY = 1\n-\n-class Test_FindOverlapsOptim(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._inputRefGff3FileName = \'sorted_Ref.gff3\'\n-        self._inputQueryGff3FileName = \'sorted_Query.gff3\'\n-        self._writeQueryGff3File(self._inputQueryGff3FileName)\n-        self._outputGff3FileName = \'overlaps.gff3\'\n-        iMock = MockFindOverlapsWithServeralIntervals_case1()\n-        iMock.write(self._inputRefGff3FileName)\n-        self._iFOO = FindOverlapsOptim(0)\n-        self._iFOO.setRefFileName(self._inputRefGff3FileName, "gff3")\n-        self._iFOO.setQueryFileName(self._inputQueryGff3FileName, "gff3")\n-        self._iFOO.setOutputFileName(self._outputGff3FileName)\n-        self._iFOO.prepareIntermediateFiles()\n-        self._iFOO.createNCLists()\n-        self._queryNcList = self._iFOO._ncLists[QUERY]["chr1"]\n-        self._refNcList   = self._iFOO._ncLists[REFERENCE]["chr1"]\n-        \n-    def tearDown(self):\n-        os.remove(self._inputRefGff3FileName)\n-        os.remove(self._inputQueryGff3FileName)\n-        os.remove(self._outputGff3FileName)\n-    \n-    def test_isOverlapping_true(self):\n-        queryCursor = NCListCursor(None, self._queryNcList, 5, 0)\n-        refCursor   = NCListCursor(None, self._refNcList,   4, 0)\n-        obs = self._iFOO.isOverlapping(queryCursor, refCursor)\n-        exp = 0\n-        self.assertEquals(exp, obs)\n-        \n-    def test_isOverlapping_false_left(self):\n-        queryCursor = NCListCursor(None, self._queryNcList, 5, 0)\n-        refCursor   = NCListCursor(None, self._refNcList,   2, 0)\n-        obs = self._iFOO.isOverlapping(queryCursor, refCursor)\n-        exp = -1\n-        self.assertEquals(exp, obs)\n-    \n-    def test_isOverlapping_false_right(self):\n-        queryCursor = NCListCursor(None, self._queryNcList, 5, 0)\n-        refCursor   = NCListCursor(None, self._refNcList,   1, 0)\n-        obs = self._iFOO.isOverlapping(queryCursor, refCursor)\n-        exp = 1\n-        self.assertEquals(exp, obs) \n-           \n-    def test_isLastElement_true(self):\n-        refCursor = NCListCursor(None, self._refNcList, 4, 0)\n-        obsBool   = refCursor.isLast()\n-        expBool   = True\n-        self.assertEquals(expBool, obsBool)\n-    \n-    def test_isLastElement_false(self):\n-        refCursor = NCListCursor(None, self._refNcList, 3, 0)\n-        obsBool   = refCursor.isLast()\n-        expBool   = False\n-        self.assertEquals(expBool, obsBool)  \n-        \n-    def test_isLastElement_highestLevel_true(self):\n-        refCursor = NCListCursor(None, self._refNcList, 1, 0)\n-        obsBool   = refCursor.isLast()\n-        expBool   = True\n-        self.assertEquals(expBool, obsBool)\n-    \n-    def test_isLastElement_highestLevel_false(self):\n-        refCursor = NCListCursor(None, self._refNcList, 0, 0)\n-        obsBool   = refCursor.isLast()\n-        expBool   = False\n-        self.assertEquals(expBool, obsBool)           \n-\n-    def test_findOverlapIter(self):\n-        queryCursor           = NCListCursor(None, self._queryNcList, 2, 0)\n-        refCursor             = NCListCursor(None, self._refNcList,   0, 0)\n-        queryTranscript       = queryCursor.getTranscript()\n-        done                  = False\n-        (cursor, done, empty) = self._iFOO.findOverlapIter(queryTranscript, refCursor, done)\n-        obsFirstOverlapLAddr  = (cursor._lIndex, done, empty)\n-        expFirstOverlapLAddr  = 4, True, False\n-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)\n-        \n-    def test_not_findOverlapIter(self):\n-        queryCursor           = NCListCursor(None, self._queryNcList, 4, 0)\n-        refCursor             = NCListCursor(None, self._refNcList,   1, 0)\n-        queryTranscript   '..b' done, empty)\n-        expFirstOverlapLAddr  = -1, False, True\n-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)\n-        \n-    def test_findOverlapIter_not_the_first_RefOverlap(self):\n-        queryCursor           = NCListCursor(None, self._queryNcList, 3, 0)\n-        refCursor             = NCListCursor(None, self._refNcList,   4, 0)\n-        queryTranscript       = queryCursor.getTranscript()\n-        done                  = True\n-        (cursor, done, empty) = self._iFOO.findOverlapIter(queryTranscript, refCursor, done)\n-        obsFirstOverlapLAddr  = (cursor._lIndex, done, empty)\n-        expFirstOverlapLAddr  = 1, True, False\n-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)\n-        \n-    def test_moveDown(self):\n-        refCursor = NCListCursor(None, self._refNcList, 0, 0)\n-        refCursor.moveDown()\n-        expFirstChildLAddr = 2\n-        self.assertEquals(expFirstChildLAddr, refCursor._lIndex) \n-    \n-    def test_moveUp(self):\n-        refCursor = NCListCursor(None, self._refNcList, 4, 0)\n-        refCursor.moveUp()\n-        expFirstChildLAddr = 0\n-        self.assertEquals(expFirstChildLAddr, refCursor._lIndex) \n-    \n-    def test_moveRight(self):\n-        refCursor = NCListCursor(None, self._refNcList, 3, 0)\n-        refCursor.moveRight()\n-        expFirstChildLAddr = 4\n-        self.assertEquals(expFirstChildLAddr, refCursor._lIndex) \n-       \n-    def test_moveNext(self):\n-        refCursor = NCListCursor(None, self._refNcList, 6, 0)\n-        refCursor.moveNext()\n-        expFirstChildLAddr = 1\n-        self.assertEquals(expFirstChildLAddr, refCursor._lIndex) \n-\n-    def test_not_findOverlapIter_between2RefIntervals(self):\n-        inputQueryGff3FileName = \'query2.gff3\'\n-        self._writeQueryGff3File2(inputQueryGff3FileName)\n-        self._outputGff3FileName = \'overlaps.gff3\'\n-        iMock = MockFindOverlapsWithServeralIntervals_case1()\n-        iMock.write(self._inputRefGff3FileName)\n-        _iFOO = FindOverlapsOptim(0)\n-        _iFOO.setRefFileName(self._inputRefGff3FileName, "gff3")\n-        _iFOO.setQueryFileName(inputQueryGff3FileName, "gff3")\n-        _iFOO.setOutputFileName(self._outputGff3FileName)\n-        _iFOO.prepareIntermediateFiles()\n-        _iFOO.createNCLists()\n-        _queryNcList          = _iFOO._ncLists[QUERY]["chr1"]\n-        _refNcList            = _iFOO._ncLists[REFERENCE]["chr1"]\n-        queryCursor           = NCListCursor(None, _queryNcList, 0, 0)\n-        refCursor             = NCListCursor(None, _refNcList,   0, 0)\n-        queryTranscript       = queryCursor.getTranscript()\n-        done                  = True\n-        (cursor, done, empty) = _iFOO.findOverlapIter(queryTranscript, refCursor, done)\n-        lIndex                = cursor._lIndex\n-        obsFirstOverlapLAddr  = (lIndex, done, empty)\n-        expFirstOverlapLAddr  = 1, False, True\n-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)\n-        os.remove(inputQueryGff3FileName) \n-\n-    def _writeQueryGff3File2(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest1\\t1100\\t1150\\t126\\t+\\t.\\tID=test1.1;Name=test1.1\\n")\n-        f.write("chr1\\tquery\\ttest2\\t1250\\t1300\\t781\\t+\\t.\\tID=test1.2;Name=test1.2\\n")\n-        f.close()  \n-        \n-    def _writeQueryGff3File(self, fileName):\n-        f = open(fileName, \'w\')\n-        f.write("chr1\\tquery\\ttest1.1\\t25\\t150\\t126\\t+\\t.\\tID=test1.1;Name=test1.1\\n")\n-        f.write("chr1\\tquery\\ttest1.2\\t70\\t850\\t781\\t+\\t.\\tID=test1.2;Name=test1.2\\n")\n-        f.write("chr1\\tquery\\ttest1.3\\t550\\t850\\t201\\t+\\t.\\tID=test1.3;Name=test1.3\\n")\n-        f.write("chr1\\tquery\\ttest1.4\\t925\\t1025\\t101\\t+\\t.\\tID=test1.4;Name=test1.4\\n")\n-        f.write("chr1\\tquery\\ttest1.5\\t1201\\t1210\\t10\\t+\\t.\\tID=test1.5;Name=test1.5\\n")\n-        f.write("chr1\\tquery\\ttest1.6\\t1500\\t1600\\t101\\t+\\t.\\tID=test1.6;Name=test1.6\\n")\n-        f.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/Test_FindOverlaps_optim.py
--- a/SMART/Java/Python/test/Test_FindOverlaps_optim.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,149 +0,0 @@
-import unittest
-import os
-from SMART.Java.Python.FindOverlapsOptim import FindOverlapsOptim
-from SMART.Java.Python.ncList.test.MockFindOverlapsWithSeveralIntervals import *
-
-class Test_FindOverlaps_optim(unittest.TestCase):
-
-    def setUp(self):
-        self._inputRefGff3FileName = 'sorted_Ref.gff3'
-        iMock = MockFindOverlapsWithServeralIntervals_case1()
-        iMock.write(self._inputRefGff3FileName)
-        self._inputQueryGff3FileName = 'sorted_Query.gff3'
-        self._writeQueryGff3File(self._inputQueryGff3FileName)
-        self._outputGff3FileName = 'overlaps.gff3'
-        self._iFOO = FindOverlaps_optim(self._inputRefGff3FileName, self._inputQueryGff3FileName)
-        self._iFOO.prepareIntermediateFiles_sorted()
-        self._iFOO.setOutputGff3FileName(self._outputGff3FileName)
-        
-    def tearDown(self):
-        os.remove(self._inputRefGff3FileName)
-        os.remove(self._inputQueryGff3FileName)
-        os.remove(self._outputGff3FileName)
-        self._iFOO.deletIntermediateFiles()
-    
-    def test_isOverlapping_true(self):
-        queryGff3Addr = 116
-        RefGff3Addr = 231
-        obs = self._iFOO.isOverlapping(queryGff3Addr, RefGff3Addr)
-        exp = 0
-        self.assertEquals(exp, obs)
-        
-    def test_isOverlapping_false_left(self):
-        queryGff3Addr = 116
-        RefGff3Addr = 58
-        obs = self._iFOO.isOverlapping(queryGff3Addr, RefGff3Addr)
-        exp = -1
-        self.assertEquals(exp, obs)
-    
-    def test_isOverlapping_false_right(self):
-        queryGff3Addr = 116
-        RefGff3Addr = 347
-        obs = self._iFOO.isOverlapping(queryGff3Addr, RefGff3Addr)
-        exp = 1
-        self.assertEquals(exp, obs) 
-           
-    def test_getHisFirstChild(self):
-        firstRefLAddr = 0
-        obsFirstChildLAddr = self._iFOO.getHisFirstChild(firstRefLAddr)
-        expFirstChildLAddr = 48
-        self.assertEquals(expFirstChildLAddr, obsFirstChildLAddr) 
-    
-    def test_isLastElement_true(self):
-        refLAddr = 96
-        obsBool = self._iFOO.isLastElement(refLAddr)
-        expBool = True
-        self.assertEquals(expBool, obsBool)
-    
-    def test_isLastElement_false(self):
-        refLAddr = 72
-        obsBool = self._iFOO.isLastElement(refLAddr)
-        expBool = False
-        self.assertEquals(expBool, obsBool)  
-        
-    def test_isLastElement_highestLevel_true(self):
-        refLAddr = 24
-        obsBool = self._iFOO.isLastElement(refLAddr)
-        expBool = True
-        self.assertEquals(expBool, obsBool)
-    
-    def test_isLastElement_highestLevel_false(self):
-        refLAddr = 0
-        obsBool = self._iFOO.isLastElement(refLAddr)
-        expBool = False
-        self.assertEquals(expBool, obsBool)           
-
-    def test_findOverlapIter(self):
-        queryGff3Addr = 175
-        firstRefLAddr = 0 
-        done = False
-        obsFirstOverlapLAddr = self._iFOO.findOverlapIter(queryGff3Addr, firstRefLAddr, done)
-        expFirstOverlapLAddr = 96, True
-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)
-        
-    def test_not_findOverlapIter(self):
-        queryGff3Addr = 295
-        firstRefLAddr = 24 
-        done = False
-        obsFirstOverlapLAddr = self._iFOO.findOverlapIter(queryGff3Addr, firstRefLAddr, done)
-        expFirstOverlapLAddr = None, False
-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)   
-        
-    def test_findOverlapIter_not_the_first_RefOverlap(self):
-        queryGff3Addr = 235
-        firstRefLAddr = 96 
-        done = True
-        obsFirstOverlapLAddr = self._iFOO.findOverlapIter(queryGff3Addr, firstRefLAddr, done)
-        expFirstOverlapLAddr = 24, False
-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr)  
-        
-    def test_changeToNewSubEndLAddr(self):
-        firstChildLAddr = 48
-        subEndLAddr = 48
-        expSubEndLAddr = 120
-        obsSubEndLAddr = self._iFOO.changeToNewSubEndLAddr(firstChildLAddr, subEndLAddr)
-        self.assertEquals(expSubEndLAddr, obsSubEndLAddr) 
-        
-    def test_defineSubEndLaddr(self):
-        parentLAddr = -1
-        expSubEndLAddr = 48
-        obsSubEndLAddr = self._iFOO.defineSubEndLaddr(parentLAddr)
-        self.assertEquals(expSubEndLAddr, obsSubEndLAddr)
-        
-    def test_getNextRefIntervalInCaseNotOverLap(self):
-        firstRefLAddr = 96
-        expRefLAddr = 24
-        obsRefLAddr = self._iFOO.getNext(firstRefLAddr)
-        self.assertEquals(expRefLAddr, obsRefLAddr)
-        
-
-    def test_not_findOverlapIter_between2RefIntervals(self):
-        inputQueryGff3FileName = 'query2.gff3'
-        self._writeQueryGff3File2(inputQueryGff3FileName)
-        self._iFOO.setQueryGff3FileName(inputQueryGff3FileName)
-        queryGff3Addr = 0
-        firstRefLAddr = 0
-        done = False
-        obsFirstOverlapLAddr = self._iFOO.findOverlapIter(queryGff3Addr, firstRefLAddr, done)
-        expFirstOverlapLAddr = 24, False
-        self.assertEquals(expFirstOverlapLAddr, obsFirstOverlapLAddr) 
-        os.remove(inputQueryGff3FileName) 
-
-    def _writeQueryGff3File2(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tquery\ttest1\t1100\t1150\t126\t+\t.\tID=test1.1;Name=test1.1\n")
-        f.write("chr1\tquery\ttest2\t1250\t1300\t781\t+\t.\tID=test1.2;Name=test1.2\n")
-        f.close()  
-        
-    def _writeQueryGff3File(self, fileName):
-        f = open(fileName, 'w')
-        f.write("chr1\tquery\ttest1.1\t25\t150\t126\t+\t.\tID=test1.1;Name=test1.1\n")
-        f.write("chr1\tquery\ttest1.2\t70\t850\t781\t+\t.\tID=test1.2;Name=test1.2\n")
-        f.write("chr1\tquery\ttest1.3\t550\t850\t201\t+\t.\tID=test1.3;Name=test1.3\n")
-        f.write("chr1\tquery\ttest1.4\t925\t1025\t101\t+\t.\tID=test1.4;Name=test1.4\n")
-        f.write("chr1\tquery\ttest1.5\t1201\t1210\t10\t+\t.\tID=test1.5;Name=test1.5\n")
-        f.write("chr1\tquery\ttest1.6\t1500\t1600\t101\t+\t.\tID=test1.6;Name=test1.6\n")
-        f.close()
-        
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test/timeResults.R
--- a/SMART/Java/Python/test/timeResults.R Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-data <- read.table("timeResults.dat", header=TRUE)
-attach(data)
-plot(numberOfReads, time, xlab="number of reads", ylab="time used")
-title("7 overlaps and random reference input fixed")
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test3.gff3
--- a/SMART/Java/Python/test3.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,501 +0,0 @@\n-chr1\tS-MART\tgene\t658657\t659771\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G02920;nbOverlaps=1;ID=AT1G02920;Name=AT1G02920\n-chr1\tS-MART\tgene\t306384\t306456\t.\t+\t.\tNote=tRNA;overlapWith=AT1G01870;nbOverlaps=1;ID=AT1G01870;Name=AT1G01870\n-chr1\tS-MART\tgene\t28500\t28706\t.\t+\t.\tNote=miRNA;overlapWith=AT1G01046,AT1G01040;nbOverlaps=2;ID=AT1G01046;Name=AT1G01046\n-chr1\tS-MART\tgene\t78932\t79032\t.\t-\t.\tNote=miRNA;overlapWith=AT1G01183;nbOverlaps=1;ID=AT1G01183;Name=AT1G01183\n-chr1\tS-MART\tgene\t31170\t33153\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01050,AT1G01040;nbOverlaps=2;ID=AT1G01050;Name=AT1G01050\n-chr1\tS-MART\tgene\t38752\t40944\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01070;nbOverlaps=1;ID=AT1G01070;Name=AT1G01070\n-chr1\tS-MART\tgene\t47485\t49286\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01090;nbOverlaps=1;ID=AT1G01090;Name=AT1G01090\n-chr1\tS-MART\tgene\t56624\t56740\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01115;nbOverlaps=1;ID=AT1G01115;Name=AT1G01115\n-chr1\tS-MART\tgene\t72339\t74096\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01160,AT1G01170;nbOverlaps=2;ID=AT1G01160;Name=AT1G01160\n-chr1\tS-MART\tgene\t73931\t74737\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01160,AT1G01170;nbOverlaps=2;ID=AT1G01170;Name=AT1G01170\n-chr1\tS-MART\tgene\t75583\t76758\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01180;nbOverlaps=1;ID=AT1G01180;Name=AT1G01180\n-chr1\tS-MART\tgene\t111890\t111961\t.\t-\t.\tNote=tRNA;overlapWith=AT1G01270;nbOverlaps=1;ID=AT1G01270;Name=AT1G01270\n-chr1\tS-MART\tgene\t88898\t89745\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01210;nbOverlaps=1;ID=AT1G01210;Name=AT1G01210\n-chr1\tS-MART\tgene\t91376\t95651\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01220;nbOverlaps=1;ID=AT1G01220;Name=AT1G01220\n-chr1\tS-MART\tgene\t95987\t97407\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01225;nbOverlaps=1;ID=AT1G01225;Name=AT1G01225\n-chr1\tS-MART\tgene\t97456\t99240\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01230;nbOverlaps=1;ID=AT1G01230;Name=AT1G01230\n-chr1\tS-MART\tgene\t99894\t101834\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01240;nbOverlaps=1;ID=AT1G01240;Name=AT1G01240\n-chr1\tS-MART\tgene\t104491\t105330\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01250;nbOverlaps=1;ID=AT1G01250;Name=AT1G01250\n-chr1\tS-MART\tgene\t108946\t111609\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01260;nbOverlaps=1;ID=AT1G01260;Name=AT1G01260\n-chr1\tS-MART\tgene\t163419\t166239\t.\t+\t.\tNote=other_RNA;overlapWith=AT1G01448,AT1G01450;nbOverlaps=2;ID=AT1G01448;Name=AT1G01448\n-chr1\tS-MART\tgene\t114286\t116108\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01290;nbOverlaps=1;ID=AT1G01290;Name=AT1G01290\n-chr1\tS-MART\tgene\t116943\t118764\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01300;nbOverlaps=1;ID=AT1G01300;Name=AT1G01300\n-chr1\tS-MART\tgene\t119397\t119997\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01305;nbOverlaps=1;ID=AT1G01305;Name=AT1G01305\n-chr1\tS-MART\tgene\t120154\t121130\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01310;nbOverlaps=1;ID=AT1G01310;Name=AT1G01310\n-chr1\tS-MART\tgene\t132328\t135831\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01340;nbOverlaps=1;ID=AT1G01340;Name=AT1G01340\n-chr1\tS-MART\tgene\t136124\t138162\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01350;nbOverlaps=1;ID=AT1G01350;Name=AT1G01350\n-chr1\tS-MART\tgene\t141971\t143183\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01360;nbOverlaps=1;ID=AT1G01360;Name=AT1G01360\n-chr1\tS-MART\tgene\t143564\t145684\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01370;nbOverlaps=1;ID=AT1G01370;Name=AT1G01370\n-chr1\tS-MART\tgene\t147153\t147942\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01380;nbOverlaps=1;ID=AT1G01380;Name=AT1G01380\n-chr1\tS-MART\tgene\t148120\t149806\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01390;nbOverlaps=1;ID=AT1G01390;Name=AT1G01390\n-chr1\tS-MART\tgene\t150689\t152210\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G01400;nbOverlaps=1;ID=AT1G01400;Name=AT1G01400\n-chr1\tS-MART\tgene\t153113\t154198\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G01410;nbOverlaps=1;ID=AT1G01410;Name=AT1G'..b'1G07050;nbOverlaps=1;ID=AT1G07050;Name=AT1G07050\n-chr1\tS-MART\tgene\t2177885\t2177958\t.\t+\t.\tNote=tRNA;overlapWith=AT1G07100;nbOverlaps=1;ID=AT1G07100;Name=AT1G07100\n-chr1\tS-MART\tgene\t2167107\t2168397\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07060;nbOverlaps=1;ID=AT1G07060;Name=AT1G07060\n-chr1\tS-MART\tgene\t2168564\t2169851\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07070;nbOverlaps=1;ID=AT1G07070;Name=AT1G07070\n-chr1\tS-MART\tgene\t2169982\t2172194\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07080;nbOverlaps=1;ID=AT1G07080;Name=AT1G07080\n-chr1\tS-MART\tgene\t2173952\t2174894\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07090;nbOverlaps=1;ID=AT1G07090;Name=AT1G07090\n-chr1\tS-MART\tgene\t2184347\t2186539\t.\t+\t.\tNote=other_RNA;overlapWith=AT1G07119,AT1G07120;nbOverlaps=2;ID=AT1G07119;Name=AT1G07119\n-chr1\tS-MART\tgene\t2187621\t2188417\t.\t-\t.\tNote=other_RNA;overlapWith=AT1G07128;nbOverlaps=1;ID=AT1G07128;Name=AT1G07128\n-chr1\tS-MART\tgene\t2184759\t2186580\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07119,AT1G07120;nbOverlaps=2;ID=AT1G07120;Name=AT1G07120\n-chr1\tS-MART\tgene\t2193941\t2195798\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07150;nbOverlaps=1;ID=AT1G07150;Name=AT1G07150\n-chr1\tS-MART\tgene\t2200123\t2201265\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07170;nbOverlaps=1;ID=AT1G07170;Name=AT1G07170\n-chr1\tS-MART\tgene\t2202330\t2202774\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07175;nbOverlaps=1;ID=AT1G07175;Name=AT1G07175\n-chr1\tS-MART\tgene\t2204320\t2206934\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07180;nbOverlaps=1;ID=AT1G07180;Name=AT1G07180\n-chr1\tS-MART\tgene\t2208013\t2208177\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07190;nbOverlaps=1;ID=AT1G07190;Name=AT1G07190\n-chr1\tS-MART\tgene\t2208719\t2212546\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07200;nbOverlaps=1;ID=AT1G07200;Name=AT1G07200\n-chr1\tS-MART\tgene\t2215223\t2216982\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07210;nbOverlaps=1;ID=AT1G07210;Name=AT1G07210\n-chr1\tS-MART\tgene\t2245758\t2246492\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07300;nbOverlaps=1;ID=AT1G07300;Name=AT1G07300\n-chr1\tS-MART\tgene\t2249133\t2250529\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07320;nbOverlaps=1;ID=AT1G07320;Name=AT1G07320\n-chr1\tS-MART\tgene\t2260389\t2262865\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07360;nbOverlaps=1;ID=AT1G07360;Name=AT1G07360\n-chr1\tS-MART\tgene\t2263140\t2264551\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07370;nbOverlaps=1;ID=AT1G07370;Name=AT1G07370\n-chr1\tS-MART\tgene\t2290201\t2290977\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07460;nbOverlaps=1;ID=AT1G07460;Name=AT1G07460\n-chr1\tS-MART\tgene\t2382251\t2382331\t.\t-\t.\tNote=snoRNA;overlapWith=AT1G07702;nbOverlaps=1;ID=AT1G07702;Name=AT1G07702\n-chr1\tS-MART\tgene\t2338904\t2339321\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07600,AT1G07590;nbOverlaps=2;ID=AT1G07600;Name=AT1G07600\n-chr1\tS-MART\tgene\t2349097\t2351692\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07630;nbOverlaps=1;ID=AT1G07630;Name=AT1G07630\n-chr1\tS-MART\tgene\t2354354\t2356227\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07640;nbOverlaps=1;ID=AT1G07640;Name=AT1G07640\n-chr1\tS-MART\tgene\t2367437\t2368385\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07645;nbOverlaps=1;ID=AT1G07645;Name=AT1G07645\n-chr1\tS-MART\tgene\t2408203\t2409580\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07770;nbOverlaps=1;ID=AT1G07770;Name=AT1G07770\n-chr1\tS-MART\tgene\t2410056\t2412677\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07780;nbOverlaps=1;ID=AT1G07780;Name=AT1G07780\n-chr1\tS-MART\tgene\t2412980\t2413708\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07790;nbOverlaps=1;ID=AT1G07790;Name=AT1G07790\n-chr1\tS-MART\tgene\t2414286\t2414967\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07795;nbOverlaps=1;ID=AT1G07795;Name=AT1G07795\n-chr1\tS-MART\tgene\t2421216\t2421947\t.\t-\t.\tNote=protein_coding_gene;overlapWith=AT1G07820;nbOverlaps=1;ID=AT1G07820;Name=AT1G07820\n-chr1\tS-MART\tgene\t2416265\t2420757\t.\t+\t.\tNote=protein_coding_gene;overlapWith=AT1G07810;nbOverlaps=1;ID=AT1G07810;Name=AT1G07810\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test3.png
b
Binary file SMART/Java/Python/test3.png has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/test3.png_I.png
b
Binary file SMART/Java/Python/test3.png_I.png has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/testInstall.py
--- a/SMART/Java/Python/testInstall.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,103 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""
-Test if the configuration is sound
-"""
-
-import sys
-import os
-import subprocess
-
-# Test Python files
-try :
-    from SMART.Java.Python.misc.RPlotter import *
-except:
-    print "Cannot find Python scripts! Update PYTHONPATH (currently %s) environment variable and see configuration in the documentation!" % (os.environ["PYTHONPATH"] if "PYTHONPATH" in os.environ else "empty")
-    sys.exit(3)
-
-try :
-    from SMART.Java.Python.mySql.MySqlTranscriptTable import *
-    from SMART.Java.Python.mySql.MySqlConnection import *
-except:
-    print "SQLite is not installed ! Please read the documentation!"
-    sys.exit(4)
-
-
-if __name__ == "__main__":
-    
-    print "Python scripts are correctly read."
-    
-    # Test mySQL
-    connection = MySqlConnection()
-    table = MySqlTranscriptTable(connection)
-
-    try:
-        table.createTranscriptTable()
-    except:
-        print "Cannot connect to the SQLite database! See configuration in the documentation!"
-        sys.exit(5)
-        
-    print "SQLite database is correctly set up."
-
-        
-    # Test R
-    fileName = "tmpFile.R"
-    file = open(fileName, "w")
-    file.write("?licence\n")
-    file.close()
-    rCommand = "R"
-    if "SMARTRPATH" in os.environ:
-        rCommand = os.environ["SMARTRPATH"]
-    command = "\"%s\" CMD BATCH %s" % (rCommand, fileName)
-    status    = subprocess.call(command, shell=True)
-    os.remove(fileName)
-    outputFileName = "%sout" % (fileName)
-    if os.path.exists(outputFileName):
-        os.remove(outputFileName)
-
-    if status != 0:
-        print "Problem with the execution of R script (command '%s' did not work, current directory is %s, status is %d)! See configuration in the documentation!" % (command, os.getcwd(), status)
-        sys.exit(6)
-
-    line = {0: 1, 1: 2}
-    pngFileName = "tmpFile.png"
-    plotter = RPlotter(pngFileName)
-    plotter.addLine(line)
-    try:
-        plotter.plot()
-    except:
-        print "Problem with the execution of R script: library 'RColorBrewer' is missing! See configuration in the documentation!"
-        sys.exit(7)
-    os.remove(pngFileName)
-
-    print "R is available."
-
-    print "Set up is fine! Enjoy S-MART!"
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/testOut.gff3
--- a/SMART/Java/Python/testOut.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,7949 +0,0 @@\n-X\tS-MART\ttranscript\t559416\t566828\t.\t+\t.\tseqedit=false;p_id=P3233;gene_id=YJR066W;tss_id=TSS6291;nbOverlaps=0;ID=YJR066W;Name=TOR1\n-X\tS-MART\ttranscript\t567019\t567444\t.\t-\t.\tseqedit=false;p_id=P5345;gene_id=YJR067C;tss_id=TSS1719;nbOverlaps=0;ID=YJR067C;Name=YAE1\n-X\tS-MART\ttranscript\t567643\t568704\t.\t+\t.\tseqedit=false;p_id=P3144;gene_id=YJR068W;tss_id=TSS5398;nbOverlaps=0;ID=YJR068W;Name=RFC2\n-X\tS-MART\ttranscript\t568806\t569399\t.\t-\t.\tseqedit=false;p_id=P860;gene_id=YJR069C;tss_id=TSS4309;nbOverlaps=0;ID=YJR069C;Name=HAM1\n-X\tS-MART\ttranscript\t569621\t570598\t.\t-\t.\tseqedit=false;p_id=P6450;gene_id=YJR070C;tss_id=TSS1494;nbOverlaps=0;ID=YJR070C;Name=LIA1\n-X\tS-MART\ttranscript\t570967\t572124\t.\t-\t.\tseqedit=false;p_id=P5586;gene_id=YJR072C;tss_id=TSS6083;nbOverlaps=0;ID=YJR072C;Name=NPA3\n-X\tS-MART\ttranscript\t570405\t570773\t.\t+\t.\tseqedit=false;p_id=P6019;gene_id=YJR071W;tss_id=TSS738;nbOverlaps=0;ID=YJR071W;Name=YJR071W\n-XV\tS-MART\ttranscript\t80348\t81190\t.\t+\t.\tseqedit=false;p_id=P4151;gene_id=YOL127W;tss_id=TSS4223;nbOverlaps=0;ID=YOL127W;Name=RPL25\n-XV\tS-MART\texon\t80348\t80360\t.\t+\t.\tID=YOL127W-exon1;Name=RPL25-exon1;Parent=YOL127W\n-XV\tS-MART\texon\t80775\t81190\t.\t+\t.\tID=YOL127W-exon2;Name=RPL25-exon2;Parent=YOL127W\n-X\tS-MART\ttranscript\t572315\t572935\t.\t-\t.\tseqedit=false;p_id=P2012;gene_id=YJR073C;tss_id=TSS710;nbOverlaps=0;ID=YJR073C;Name=OPI3\n-VI\tS-MART\ttranscript\t157916\t158007\t.\t-\t.\tseqedit=false;overlapsWith=HWUSI-EAS1656_0009_FC:3:74:18104:5842#0--HWUSI-EAS1656_0009_FC:3:56:11954:3071#0--HWUSI-EAS1656_0009;gene_id=tF(GAA)F;tss_id=TSS3380;nbOverlaps=184;ID=tF(GAA)F;Name=tF(GAA)F\n-VI\tS-MART\texon\t157916\t157951\t.\t-\t.\tID=tF(GAA)F-exon1;Name=tF(GAA)F-exon1;Parent=tF(GAA)F\n-VI\tS-MART\texon\t157971\t158007\t.\t-\t.\tID=tF(GAA)F-exon2;Name=tF(GAA)F-exon2;Parent=tF(GAA)F\n-X\tS-MART\ttranscript\t573095\t573751\t.\t+\t.\tseqedit=false;p_id=P5576;gene_id=YJR074W;tss_id=TSS2929;nbOverlaps=0;ID=YJR074W;Name=MOG1\n-X\tS-MART\ttranscript\t573981\t575171\t.\t+\t.\tseqedit=false;p_id=P1330;gene_id=YJR075W;tss_id=TSS857;nbOverlaps=0;ID=YJR075W;Name=HOC1\n-X\tS-MART\ttranscript\t575354\t576601\t.\t-\t.\tseqedit=false;p_id=P1404;gene_id=YJR076C;tss_id=TSS1037;nbOverlaps=0;ID=YJR076C;Name=CDC11\n-X\tS-MART\ttranscript\t577255\t578190\t.\t-\t.\tseqedit=false;p_id=P1673;gene_id=YJR077C;tss_id=TSS2801;nbOverlaps=0;ID=YJR077C;Name=MIR1\n-X\tS-MART\ttranscript\t578860\t580221\t.\t+\t.\tseqedit=false;p_id=P2480;gene_id=YJR078W;tss_id=TSS3796;nbOverlaps=0;ID=YJR078W;Name=BNA2\n-X\tS-MART\ttranscript\t580432\t581616\t.\t-\t.\tseqedit=false;p_id=P3568;gene_id=YJR080C;tss_id=TSS3503;nbOverlaps=0;ID=YJR080C;Name=AIM24\n-X\tS-MART\ttranscript\t580205\t581239\t.\t+\t.\tseqedit=false;p_id=P2074;gene_id=YJR079W;tss_id=TSS3170;nbOverlaps=0;ID=YJR079W;Name=YJR079W\n-X\tS-MART\texon\t580205\t580347\t.\t+\t.\tID=YJR079W-exon1;Name=YJR079W-exon1;Parent=YJR079W\n-X\tS-MART\texon\t581053\t581239\t.\t+\t.\tID=YJR079W-exon2;Name=YJR079W-exon2;Parent=YJR079W\n-X\tS-MART\ttranscript\t581914\t582255\t.\t-\t.\tseqedit=false;p_id=P5333;gene_id=YJR082C;tss_id=TSS407;nbOverlaps=0;ID=YJR082C;Name=EAF6\n-X\tS-MART\ttranscript\t582608\t583537\t.\t-\t.\tseqedit=false;p_id=P1588;gene_id=YJR083C;tss_id=TSS3054;nbOverlaps=0;ID=YJR083C;Name=ACF4\n-X\tS-MART\ttranscript\t586068\t586400\t.\t+\t.\tseqedit=false;p_id=P636;gene_id=YJR086W;tss_id=TSS1801;nbOverlaps=0;ID=YJR086W;Name=STE18\n-X\tS-MART\ttranscript\t583733\t585004\t.\t+\t.\tseqedit=false;p_id=P6689;gene_id=YJR084W;tss_id=TSS4164;nbOverlaps=0;ID=YJR084W;Name=YJR084W\n-X\tS-MART\ttranscript\t585120\t585437\t.\t-\t.\tseqedit=false;p_id=P4342;gene_id=YJR085C;tss_id=TSS1798;nbOverlaps=0;ID=YJR085C;Name=YJR085C\n-X\tS-MART\ttranscript\t586495\t587373\t.\t-\t.\tseqedit=false;p_id=P5189;gene_id=YJR088C;tss_id=TSS5912;nbOverlaps=0;ID=YJR088C;Name=EMC2\n-X\tS-MART\ttranscript\t586400\t586750\t.\t+\t.\tseqedit=false;p_id=P5459;gene_id=YJR087W;tss_id=TSS2614;nbOverlaps=0;ID=YJR087W;Name=YJR087W\n-X\tS-MART\ttranscript\t587718\t590582\t.\t+\t.\tseqedit=false;p_id=P2527;gene_id=YJR089W;tss_id=TSS2068;nbOverlaps=0;ID=YJR089W;Name=BIR1\n-XV\tS-MART\ttranscript\t78352\t79479\t.\t-\t.\tseqedit=false;p_id=P637'..b'0;ID=YML081W;Name=TDA9\n-III\tS-MART\ttranscript\t186489\t190169\t.\t+\t.\tseqedit=false;p_id=P5337;gene_id=YCR033W;tss_id=TSS5862;nbOverlaps=0;ID=YCR033W;Name=SNT1\n-XIII\tS-MART\ttranscript\t97941\t99400\t.\t-\t.\tseqedit=false;p_id=P3499;gene_id=YML085C;tss_id=TSS4259;nbOverlaps=0;ID=YML085C;Name=TUB1\n-XIII\tS-MART\texon\t97941\t99259\t.\t-\t.\tID=YML085C-exon1;Name=TUB1-exon1;Parent=YML085C\n-XIII\tS-MART\texon\t99376\t99400\t.\t-\t.\tID=YML085C-exon2;Name=TUB1-exon2;Parent=YML085C\n-XIII\tS-MART\ttranscript\t101862\t103811\t.\t+\t.\tseqedit=false;p_id=P2203;gene_id=YML082W;tss_id=TSS1709;nbOverlaps=0;ID=YML082W;Name=YML082W\n-XIII\tS-MART\ttranscript\t99794\t101050\t.\t-\t.\tseqedit=false;p_id=P5692;gene_id=YML083C;tss_id=TSS948;nbOverlaps=0;ID=YML083C;Name=YML083C\n-XIII\tS-MART\ttranscript\t99489\t99797\t.\t+\t.\tseqedit=false;p_id=P3429;gene_id=YML084W;tss_id=TSS5738;nbOverlaps=0;ID=YML084W;Name=YML084W\n-VII\tS-MART\ttranscript\t790459\t793053\t.\t-\t.\tseqedit=false;p_id=P4647;gene_id=YGR150C;tss_id=TSS3036;nbOverlaps=0;ID=YGR150C;Name=CCM1\n-VII\tS-MART\ttranscript\t789031\t790329\t.\t+\t.\tseqedit=false;p_id=P267;gene_id=YGR149W;tss_id=TSS6312;nbOverlaps=0;ID=YGR149W;Name=YGR149W\n-XVI\tS-MART\ttranscript\t919381\t920487\t.\t+\t.\tseqedit=false;p_id=P2858;gene_id=YPR191W;tss_id=TSS1183;nbOverlaps=0;ID=YPR191W;Name=QCR2\n-XIV\tS-MART\ttranscript\t340352\t340858\t.\t+\t.\tseqedit=false;p_id=P3270;gene_id=YNL157W;tss_id=TSS5800;nbOverlaps=0;ID=YNL157W;Name=IGO1\n-XIII\tS-MART\ttranscript\t95791\t97371\t.\t-\t.\tseqedit=false;p_id=P392;gene_id=YML086C;tss_id=TSS3973;nbOverlaps=0;ID=YML086C;Name=ALO1\n-XIII\tS-MART\ttranscript\t94431\t95369\t.\t-\t.\tseqedit=false;p_id=P6540;gene_id=YML087C;tss_id=TSS643;nbOverlaps=0;ID=YML087C;Name=AIM33\n-XIII\tS-MART\ttranscript\t92235\t94241\t.\t+\t.\tseqedit=false;p_id=P4927;gene_id=YML088W;tss_id=TSS3372;nbOverlaps=0;ID=YML088W;Name=UFO1\n-XIII\tS-MART\ttranscript\t87123\t90731\t.\t-\t.\tseqedit=false;p_id=P927;gene_id=YML091C;tss_id=TSS632;nbOverlaps=0;ID=YML091C;Name=RPM2\n-XIII\tS-MART\ttranscript\t91041\t91409\t.\t-\t.\tseqedit=false;p_id=P4844;gene_id=YML089C;tss_id=TSS911;nbOverlaps=0;ID=YML089C;Name=YML089C\n-XIII\tS-MART\ttranscript\t90744\t91130\t.\t+\t.\tseqedit=false;p_id=P849;gene_id=YML090W;tss_id=TSS5516;nbOverlaps=0;ID=YML090W;Name=YML090W\n-XIII\tS-MART\ttranscript\t85987\t86739\t.\t-\t.\tseqedit=false;p_id=P5314;gene_id=YML092C;tss_id=TSS3012;nbOverlaps=0;ID=YML092C;Name=PRE8\n-XIII\tS-MART\ttranscript\t83090\t85789\t.\t+\t.\tseqedit=false;p_id=P3589;gene_id=YML093W;tss_id=TSS794;nbOverlaps=0;ID=YML093W;Name=UTP14\n-XIII\tS-MART\ttranscript\t82219\t82620\t.\t-\t.\tseqedit=false;p_id=P3718;gene_id=YML094C-A;tss_id=TSS4585;nbOverlaps=0;ID=YML094C-A;Name=YML094C-A\n-VII\tS-MART\ttranscript\t817747\t823015\t.\t-\t.\tseqedit=false;overlapsWith=HWUSI-EAS1656_0009_FC:3:49:2195:2756#0--HWUSI-EAS1656_0009_FC:3:12:10443:12335#0--HWUSI-EAS1656_0009;p_id=P3114;gene_id=YGR161C-D;tss_id=TSS2033;nbOverlaps=431;ID=YGR161C-D;Name=YGR161C-D\n-VII\tS-MART\texon\t817747\t821709\t.\t-\t.\tID=YGR161C-D-exon1;Name=YGR161C-D-exon1;Parent=YGR161C-D\n-VII\tS-MART\texon\t821711\t823015\t.\t-\t.\tID=YGR161C-D-exon2;Name=YGR161C-D-exon2;Parent=YGR161C-D\n-XIII\tS-MART\ttranscript\t82275\t82849\t.\t+\t.\tseqedit=false;p_id=P1742;gene_id=YML094W;tss_id=TSS1864;nbOverlaps=0;ID=YML094W;Name=GIM5\n-XIII\tS-MART\texon\t82275\t82290\t.\t+\t.\tID=YML094W-exon1;Name=GIM5-exon1;Parent=YML094W\n-XIII\tS-MART\texon\t82374\t82849\t.\t+\t.\tID=YML094W-exon2;Name=GIM5-exon2;Parent=YML094W\n-VII\tS-MART\ttranscript\t807073\t807684\t.\t+\t.\tseqedit=false;p_id=P5320;gene_id=YGR160W;tss_id=TSS4839;nbOverlaps=0;ID=YGR160W;Name=YGR160W\n-VII\tS-MART\ttranscript\t821693\t823015\t.\t-\t.\tseqedit=false;overlapsWith=HWUSI-EAS1656_0009_FC:3:69:10212:8264#0--HWUSI-EAS1656_0009_FC:3:26:11389:20176#0--HWUSI-EAS1656_000;p_id=P2676;gene_id=YGR161C-C;tss_id=TSS2033;nbOverlaps=12;ID=YGR161C-C;Name=YGR161C-C\n-XII\tS-MART\ttranscript\t660716\t662833\t.\t+\t.\tseqedit=false;p_id=P1153;gene_id=YLR258W;tss_id=TSS609;nbOverlaps=0;ID=YLR258W;Name=GSY2\n-XIII\tS-MART\ttranscript\t81481\t82113\t.\t-\t.\tseqedit=false;p_id=P6599;gene_id=YML095C;tss_id=TSS4095;nbOverlaps=0;ID=YML095C;Name=RAD10\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/toolLauncher/RnaFoldLauncher.py
--- a/SMART/Java/Python/toolLauncher/RnaFoldLauncher.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,379 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os\n-import sys\n-import random\n-import subprocess\n-from SMART.Java.Python.structure.TranscriptList import TranscriptList\n-from SMART.Java.Python.structure.Transcript import Transcript\n-from SMART.Java.Python.misc.Progress import Progress\n-from commons.core.parsing.FastaParser import FastaParser\n-\n-\n-class RnaFoldStructure(object):\n-    """\n-    A structure to store the output of RNAFold\n-    @ivar name:         the name of the sequence\n-    @type name:         string\n-    @ivar sequence:     the sequence (with gaps)\n-    @type sequence:     string\n-    @ivar structure:    the bracket structure\n-    @type structure:    string\n-    @ivar energy:       the energy of the fold\n-    @type energy:       float\n-    @ivar interactions: the interactions inside the structure\n-    @ivar interactions: the interactions inside the structure\n-    """\n-\n-    def __init__(self, name, sequence, structure, energy):\n-        """\n-        Initialize the structure\n-        @param name       the name of the sequence\n-        @type  name:      string\n-        @param sequence:  the sequence (with gaps)\n-        @type  sequence:  string\n-        @param structure: the bracket structure\n-        @type  structure: string\n-        @param energy:    the energy of the fold\n-        @type  energy:    float\n-        """\n-        self.name         = name        \n-        self.sequence     = sequence\n-        self.structure    = structure\n-        self.energy       = energy\n-        self.interactions = None\n-\n-    \n-    def analyze(self):\n-        """\n-        Analyze the output, assign the interactions\n-        """\n-        if len(self.sequence) != len(self.structure):\n-            sys.exit("Sizes of sequence and structure differ (\'%s\' and \'%s\')!\\n" % (self.sequence, self.structure))\n-        stack                         = []\n-        self.interactions = [None for i in range(len(self.sequence))]\n-        for i in range(len(self.sequence)):\n-            if self.structure[i] == "(":\n-                stack.append(i)\n-            elif self.structure[i] == ")":\n-                if not stack:\n-                    sys.exit("Something wrong in the interaction line \'%s\'!\\n" % (self.structure))\n-                otherI = stack.pop()\n-                self.interactions[i]      = otherI\n-                self.interactions[otherI] = i\n-        if stack:\n-            sys.exit("Something wrong in the interaction line \'%s\'!\\n" % (self.structure))\n-\n-\n-    def getNbBulges(self, sta'..b'\n-        @type rnaFoldOutput: class L{RnaFoldStructure<RnaFoldStructure>}\n-        @ivar reverse:       invert the extensions\n-        @type reverse:       bool\n-        @return:             a t-uple of energy, number of insertions, number of bulges, strand\n-        """\n-        rnaFoldOutput.analyze()\n-        transcriptSize     = transcript.end - transcript.start + 1\n-        start              = fivePrimeExtension if not reverse else threePrimeExtension\n-        end                = start + transcriptSize\n-        energy             = rnaFoldOutput.energy\n-        nbBulges           = rnaFoldOutput.getNbBulges(start, end)\n-        (minStar, maxStar) = rnaFoldOutput.getStar(start, end)\n-        minStar           += transcript.start - start\n-        maxStar           += transcript.start - start\n-        if self.verbosity > 100:\n-            print "Getting structure with energy %d, nbBulges %d, miRna* %d-%d, strand %s" % (energy, nbBulges, minStar, maxStar, "-" if reverse else "+")\n-        return (energy, nbBulges, minStar, maxStar, reverse)\n-\n-    \n-    def fold(self, transcript):\n-        """\n-        Fold a transcript (in each strand)\n-        @ivar transcript: a transcript\n-        @type transcript: class L{Transcript<Transcript>}\n-        @return:          a t-uple of energy, number of insertions, number of bulges, strand\n-        """\n-        results     = [None] * self.nbStrands\n-        strands     = [False, True] if self.nbStrands == 2 else [False]\n-        minNbBulges = 1000000\n-        for i, reverse in enumerate(strands):\n-            self.writeInputFile(transcript, reverse, self.fivePrimeExtension, self.threePrimeExtension)\n-            self.startRnaFold()\n-            output = self.parseRnaFoldOutput()\n-            results[i]  = self.analyzeRnaFoldOutput(transcript, output, reverse, self.fivePrimeExtension, self.threePrimeExtension)\n-            minNbBulges = min(minNbBulges, results[i][1])\n-        for result in results:\n-            if result[1] == minNbBulges:\n-                return result\n-        return None\n-\n-\n-    def refold(self, transcript):\n-        """\n-        Fold a transcript, knowing where the miRNA starts and end\n-        @ivar transcript: a transcript\n-        @type transcript: class L{Transcript<Transcript>}\n-        @return:          the energy\n-        """\n-        miStar              = transcript.getTagValue("miRnaStar")\n-        startMiStar         = int(miStar.split("-")[0])\n-        endMiStart          = int(miStar.split("-")[1])\n-        fivePrimeExtension  = max(0, transcript.start - startMiStar) + 5\n-        threePrimeExtension = max(0, endMiStart - transcript.end) + 5\n-        self.writeInputFile(transcript, False, fivePrimeExtension, threePrimeExtension)\n-        self.startRnaFold()\n-        output = self.parseRnaFoldOutput()\n-        result = self.analyzeRnaFoldOutput(transcript, output, False, fivePrimeExtension, threePrimeExtension)\n-        return result[0]\n-\n-\n-    def computeResults(self):\n-        """\n-        Fold all and fill an output transcript list with the values\n-        """\n-        progress = Progress(self.inputTranscriptList.getNbTranscripts(), "Handling transcripts", self.verbosity)\n-        self.outputTranscriptList = TranscriptList()\n-        for transcript in self.inputTranscriptList.getIterator():\n-            result = self.fold(transcript)\n-            transcript.setTagValue("nbBulges", result[1])\n-            transcript.setTagValue("miRnaStar", "%d-%d" % (result[2], result[3]))\n-            transcript.setTagValue("miRNAstrand", result[4])\n-            transcript.setTagValue("energy", self.refold(transcript))\n-            self.outputTranscriptList.addTranscript(transcript)\n-            progress.inc()\n-        progress.done()\n-\n-\n-    def getResults(self):\n-        """\n-        Get an output transcript list with the values\n-        """\n-        if self.outputTranscriptList == None:\n-            self.computeResults()\n-        return self.outputTranscriptList\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/trimAdaptor.py
--- a/SMART/Java/Python/trimAdaptor.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,107 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Trim the sequences from a 5' adaptor"""
-
-import sys
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import FastaParser
-from commons.core.parsing.FastqParser import FastqParser
-from commons.core.writer.FastaWriter import FastaWriter
-from commons.core.writer.FastqWriter import FastqWriter
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc import Utils
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Trim Adaptor v1.0.1: Remove the 3' adaptor of a list of reads. [Category: Data Modification]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",               type="string", help="input file [compulsory] [format: file in sequence format given by -f]")
-    parser.add_option("-f", "--format",    dest="format",         action="store",               type="string", help="format of file [compulsory] [format: sequence file format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",               type="string", help="output file [compulsory] [format: output file in sequence format given by -f]")
-    parser.add_option("-a", "--adaptor",   dest="adaptor",        action="store",               type="string", help="adaptor [compulsory] [format: string]")
-    parser.add_option("-e", "--errors",    dest="errors",         action="store", default=0,    type="int" ,   help="number of errors in percent [format: int] [default: 0]")
-    parser.add_option("-n", "--noAdaptor", dest="noAdaptor",      action="store", default=None, type="string", help="file name where to print sequences with no adaptor [format: output file in sequence format given by -f]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,    type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    minSize = 2
-
-    if options.format == "fasta":
-        parser = FastaParser(options.inputFileName, options.verbosity)
-    elif options.format == "fastq":
-        parser = FastqParser(options.inputFileName, options.verbosity)
-    else:
-        sys.exit("Cannot handle files with '%s' format." % (options.format))
-
-    if options.format == "fasta":
-        writer = FastaWriter(options.outputFileName, options.verbosity)
-    elif options.format == "fastq":
-        writer = FastqWriter(options.outputFileName, options.verbosity)
-    else:
-        sys.exit("Cannot handle files with '%s' format." % (options.format))
-
-    writerNoAdaptor = None
-    if options.noAdaptor != None:
-        if options.format == "fasta":
-            writerNoAdaptor = FastaWriter(options.noAdaptor, options.verbosity)
-        elif options.format == "fastq":
-            writerNoAdaptor = FastqWriter(options.noAdaptor, options.verbosity)
-        else:
-            sys.exit("Cannot handle files with '%s' format." % (options.format))
-
-    nbFound = 0
-        
-    progress = Progress(parser.getNbSequences(), "Reading %s" % (options.inputFileName), options.verbosity)
-    for sequence in parser.getIterator():
-        progress.inc()
-        nucleotides = sequence.getSequence()
-        found       = False
-        for i in range(len(nucleotides) - minSize):
-            nucleotidesPart = nucleotides[i:]
-            adaptorPart     = options.adaptor if len(nucleotidesPart) >= len(options.adaptor) else options.adaptor[:len(nucleotidesPart)]
-            nucleotidesPart = nucleotidesPart if len(adaptorPart) == len(nucleotidesPart) else nucleotidesPart[:len(adaptorPart)]
-            if Utils.getHammingDistance(adaptorPart, nucleotidesPart) <= int(options.errors / 100.0 * len(adaptorPart)):
-                nbFound += 1
-                sequence.shrinkToFirstNucleotides(i)
-                writer.addSequence(sequence)
-                found = True
-                break
-        if not found:
-            writer.addSequence(sequence)
-            if writerNoAdaptor != None:
-                writerNoAdaptor.addSequence(sequence)
-    progress.done()
-
-    print "%d sequences with adaptors on %d (%.2f%%)" % (nbFound, parser.getNbSequences(), float(nbFound) / parser.getNbSequences() * 100)
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/trimSequence.py
--- a/SMART/Java/Python/trimSequence.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,102 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Remove sequences with low reliability"""
-
-from optparse import OptionParser
-from commons.core.parsing.SequenceListParser import *
-from commons.core.writer.FastaWriter import *
-from SMART.Java.Python.misc.Progress import *
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Trim Sequences v1.0.1: Remove sequences with low reliability: low occurrences and highly repeted. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",         dest="inputFileName",    action="store",                                         type="string", help="input file [compulsory] [format: file in FASTA format]")
-    parser.add_option("-o", "--output",        dest="outputFileName", action="store",                                         type="string", help="output file [compulsory] [format: output file in FASTA format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",            action="store",            default=1,         type="int",        help="trace level [format: int]")
-    parser.add_option("-l", "--log",             dest="log",                        action="store_true", default=False,                                help="write a log file [format: bool] [default: false]")
-    (options, args) = parser.parse_args()
-
-    parser            = SequenceListParser(options.inputFileName, options.verbosity)
-    nbSequences = parser.getNbSequences()
-    progress        = Progress(nbSequences, "Parsing file %s" % (options.inputFileName), options.verbosity)
-    
-    writer = FastaWriter(options.outputFileName, options.verbosity)
-    if options.log:
-        logHandle = open("log.txt", "w")
-    
-    letters                            = ("A", "C", "G", "T")
-    nbLowComplexity            = 0
-    nbTooManyOccurrences = 0
-    
-    for sequence in parser.getIteractor():
-        halfSize                     = len(sequence.sequence) / 2
-        occurrences                = set()
-        nbOccurrences            = dict(zip(letters, [0 for letter in letters]))
-        tooManyOccurrences = False
-        good                             = True
-        
-        for char in sequence.sequence:
-            if char in letters:
-                occurrences.add(char)
-                nbOccurrences[char] += 1
-                
-            
-        if len(occurrences) < 4:
-            nbLowComplexity += 1
-            if options.log:
-                logHandle.write("Low complexity for %s\n" % (sequence.sequence))
-            good = False
-
-        if good:
-            for letter, nbOccurrence in nbOccurrences.iteritems():
-                if nbOccurrence > halfSize:
-                    if not tooManyOccurrences:
-                        nbTooManyOccurrences += 1
-                        if options.log:
-                            logHandle.write("Too many occurrences for %s\n" % (sequence.sequence))
-                    tooManyOccurrences = True
-                    good = False
-            
-        if good:
-            writer.addSequence(sequence)
-                    
-        progress.inc()
-    progress.done()
-    
-    if options.log:
-        logHandle.close()
-        
-    print "%d out of %d have low complexity (%f%%)"             % (nbLowComplexity, nbSequences, (float(nbLowComplexity) / nbSequences * 100))        
-    print "%d out of %d have too many occurrences (%f%%)" % (nbTooManyOccurrences, nbSequences, (float(nbTooManyOccurrences) / nbSequences * 100))
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/trimSequences.py
--- a/SMART/Java/Python/trimSequences.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,149 +0,0 @@\n-#! /usr/bin/env python\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-from optparse import OptionParser\n-from commons.core.parsing.FastaParser import FastaParser\n-from commons.core.parsing.FastqParser import FastqParser\n-from commons.core.writer.FastaWriter import FastaWriter\n-from commons.core.writer.FastqWriter import FastqWriter\n-from SMART.Java.Python.misc.Progress import Progress\n-from SMART.Java.Python.misc import Utils\n-\n-\n-if __name__ == "__main__":\n-    \n-    # parse command line\n-    description = "Trim Sequences v1.0.3: Remove the 5\' and/or 3\' adaptors of a list of reads. [Category: Data Modification]"\n-\n-    parser = OptionParser(description = description)\n-    parser.add_option("-i", "--input",         dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in sequence format given by -f]")\n-    parser.add_option("-f", "--format",        dest="format",         action="store",                     type="string", help="format of file [compulsory] [format: sequence file format]")\n-    parser.add_option("-o", "--output",        dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in sequence format given by -f]")\n-    parser.add_option("-3", "--threePAdaptor", dest="threePAdaptor",  action="store",      default=None,  type="string", help="3\' adaptor [format: string] [default: None]")\n-    parser.add_option("-5", "--fivePAdaptor",  dest="fivePAdaptor",   action="store",      default=None,  type="string", help="5\' adaptor [format: string] [default: None]")\n-    parser.add_option("-e", "--errors",        dest="errors",         action="store",      default=0,     type="int",    help="number of errors in percent [format: int] [default: 0]")\n-    parser.add_option("-d", "--indels",        dest="indels",         action="store_true", default=False,                help="also accept indels [format: bool] [default: False]")\n-    parser.add_option("-n", "--noAdaptor5p",   dest="noAdaptor5p",    action="store",      default=None,  type="string", help="print sequences with no 5\' adaptor [format: output file in sequence format given by -f]")\n-    parser.add_option("-m", "--noAdaptor3p",   dest="noAdaptor3p",    action="store",      default=None,  type="string", help="print sequences with no 3\' adaptor [format: output file in sequence format given by -f]")\n-    parser.add_option("-v", "--verbosity",     dest="verbosity",      action="store",      default'..b'ormat))\n-\n-\n-    if options.noAdaptor5p != None:\n-        if options.format == "fasta":\n-            writer5pNoAdaptor = FastaWriter(options.noAdaptor5p, options.verbosity)\n-        elif options.format == "fastq":\n-            writer5pNoAdaptor = FastqWriter(options.noAdaptor5p, options.verbosity)\n-        else:\n-            raise Exception("Cannot handle files with \'%s\' format." % (options.format))\n-    nbFound5p = 0\n-    \n-    if options.noAdaptor3p != None:\n-        if options.format == "fasta":\n-            writer3pNoAdaptor = FastaWriter(options.noAdaptor3p, options.verbosity)\n-        elif options.format == "fastq":\n-            writer3pNoAdaptor = FastqWriter(options.noAdaptor3p, options.verbosity)\n-        else:\n-            raise Exception("Cannot handle files with \'%s\' format." % (options.format))\n-    nbFound3p = 0\n-            \n-    progress = Progress(parser.getNbSequences(), "Reading %s" % (options.inputFileName), options.verbosity)\n-    for sequence in parser.getIterator():\n-        progress.inc()\n-        if options.threePAdaptor != None:\n-            nucleotides = sequence.sequence\n-            found       = False\n-            bestScore   = 10000\n-            bestRegion  = 0\n-            for i in range(len(nucleotides) - minSize):\n-                nucleotidesPart = nucleotides[i:]\n-                adaptorPart     = options.threePAdaptor if len(nucleotidesPart) >= len(options.threePAdaptor) else options.threePAdaptor[:len(nucleotidesPart)]\n-                nucleotidesPart = nucleotidesPart if len(adaptorPart) == len(nucleotidesPart) else nucleotidesPart[:len(adaptorPart)]\n-                if options.indels:\n-                    score = Utils.getLevenshteinDistance(adaptorPart, nucleotidesPart)\n-                else:\n-                    score = Utils.getHammingDistance(adaptorPart, nucleotidesPart)\n-                if score <= int(options.errors / 100.0 * len(adaptorPart)) and score < bestScore:\n-                    bestScore  = score\n-                    bestRegion = i\n-                    found      = True\n-            if found:\n-                nbFound3p += 1\n-                sequence.shrinkToFirstNucleotides(bestRegion)\n-            elif options.noAdaptor3p:\n-                writer3pNoAdaptor.addSequence(sequence)\n-        if options.fivePAdaptor != None:\n-            nucleotides = sequence.sequence\n-            found       = False\n-            bestScore   = 10000\n-            bestRegion  = 0\n-            for i in reversed(range(minSize, len(nucleotides))):\n-                nucleotidesPart = nucleotides[:i]\n-                adaptorPart     = options.fivePAdaptor if len(nucleotidesPart) >= len(options.fivePAdaptor) else options.fivePAdaptor[-len(nucleotidesPart):]\n-                nucleotidesPart = nucleotidesPart if len(adaptorPart) == len(nucleotidesPart) else nucleotidesPart[-len(adaptorPart):]\n-                if options.indels:\n-                    score = Utils.getLevenshteinDistance(adaptorPart, nucleotidesPart)\n-                else:\n-                    score = Utils.getHammingDistance(adaptorPart, nucleotidesPart)\n-                if score <= int(options.errors / 100.0 * len(adaptorPart)) and score < bestScore:\n-                    bestScore  = score\n-                    bestRegion = i\n-                    found      = True\n-            if found:\n-                nbFound5p += 1\n-                sequence.shrinkToLastNucleotides(len(nucleotides) - bestRegion)\n-            elif options.noAdaptor5p:\n-                writer5pNoAdaptor.addSequence(sequence)\n-        writer.addSequence(sequence)\n-    progress.done()\n-    writer.close()\n-\n-    print "%d sequences" % (parser.getNbSequences())\n-    if options.fivePAdaptor != None:\n-        print "%d sequences with 5\' adaptors (%.2f%%)" % (nbFound5p, float(nbFound5p) / parser.getNbSequences() * 100)\n-    if options.threePAdaptor != None:\n-        print "%d sequences with 3\' adaptors (%.2f%%)" % (nbFound3p, float(nbFound3p) / parser.getNbSequences() * 100)\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/txtToFasta.py
--- a/SMART/Java/Python/txtToFasta.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,63 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Transform a plain text file to a FASTA file"""
-
-import os
-from optparse import OptionParser
-from SMART.Java.Python.structure.Sequence import *
-from SMART.Java.Python.misc.Progress import *
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    parser = OptionParser()
-    description = "Txt to Fasta v1.0.1: Convert a Txt file (one sequence per line) into Fasta file. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",     dest="inputFileName",  action="store",            type="string", help="input file [compulsory] [format: file in TXT format]")
-    parser.add_option("-o", "--output",    dest="outputFileName", action="store",            type="string", help="output file [compulsory] [format: output file in FASTA format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1, type="int",    help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    inputFile = open(options.inputFileName)
-    outputFile = open(options.outputFileName, "w")
-    
-    for line in inputFile:
-        line = line.strip()
-        splittedLine = line.split()
-        sequence = splittedLine[0]
-        nb = 1 if len(splittedLine) == 1 else int(splittedLine[1])
-        for i in range(nb):
-            outputFile.write(">%s\n%s\n" % (sequence, sequence))
-    
-    inputFile.close()
-    outputFile.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/updateQual.py
--- a/SMART/Java/Python/updateQual.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,86 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Update a .qual file given a .fasta file"""
-
-from optparse import OptionParser
-from commons.core.parsing.FastaParser import *
-from SMART.Java.Python.misc.Progress import *
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Update Qual v1.0.1: Remove the sequence in a Qual file which are not in the corresponding Fasta file. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-f", "--fasta",         dest="fastaFile",    action="store",                                         type="string", help="fasta file [compulsory] [format: file in FASTA format]")
-    parser.add_option("-q", "--qual",            dest="qualFile",     action="store",                                         type="string", help="qual file [compulsory] [format: file in QUAL format]")
-    parser.add_option("-o", "--output",        dest="outputFile", action="store",                                         type="string", help="output file [compulsory] [format: output file in QUAL format]")
-    parser.add_option("-v", "--verbosity", dest="verbosity",    action="store",            default=1,         type="int",        help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    parser             = SequenceListParser(options.fastaFile, options.verbosity)
-    nbSequences    = parser.getNbSequences()
-    progress         = Progress(nbSequences, "Parsing file %s" % (options.fastaFile), options.verbosity)
-    qualHandle     = open(options.qualFile)
-    outputHandle = open(options.outputFile, "w")
-    nbRefused        = 0
-    nbTotal            = 0
-    
-    names = []
-    while parser.getNextSequence():
-        sequence = parser.getCurrentSequence()
-        nbTotal += 1
-    
-        found = False
-        name    = None
-        for line in qualHandle:
-            line = line.strip()
-            if line[0] == ">":
-                name = line[1:]
-                if name == sequence.name:
-                    found = True
-                else:
-                    nbRefused += 1
-            else:
-                if found:
-                    outputHandle.write(">%s\n%s\n" % (name, line))
-                    found = False
-                    name    = None
-                    break
-        progress.inc()
-    progress.done()
-
-    
-    outputHandle.close()
-    qualHandle.close()
-    
-    print "%d out of %d are refused (%f%%)"             % (nbRefused, nbTotal, (float(nbRefused) / nbTotal * 100))        
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/wigExploder.py
--- a/SMART/Java/Python/wigExploder.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,99 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Explode wig files into several files, one for each chromosome"""
-
-import os, re, sys
-from optparse import OptionParser
-
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Wig Exploder v1.0.1: Explode a big WIG file into several smaller WIG files (one per chromosome). [Category: Personal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",                        dest="inputFileName", action="store",                                         type="string", help="input file [compulsory] [format: file in WIG format]")
-    parser.add_option("-o", "--output",                     dest="output",                action="store",            default=None,    type="string", help="output directory [compulsory] [format: directory]")
-    parser.add_option("-s", "--strand",                     dest="strand",                action="store",            default=None,    type="string", help="strand of the input WIG file (if any) [format: choice (+, -)]")
-    parser.add_option("-v", "--verbosity",                dest="verbosity",         action="store",            default=1,         type="int",        help="trace level [format: int]")
-    (options, args) = parser.parse_args()
-
-    inputFile = open(options.inputFileName)
-
-    files         = {}
-    file            = None
-    trackLine = None
-    strand        = ""
-    if options.strand != None:
-        strand = options.strand
-
-    for line in inputFile:
-        line = line.strip()
-
-        if line.startswith("track"):
-            trackLine = line
-            continue
-
-        m1 = re.search(r"^\s*fixedStep\s+chrom=(\S+)\s+start=\d+\s+step=\d+\s*$", line)
-        m2 = re.search(r"^\s*fixedStep\s+chrom=(\S+)\s+start=\d+\s+step=\d+\s+span=\d+\s*$", line)
-        m3 = re.search(r"^\s*variableStep\s+chrom=(\S+)\s*$", line)
-        m4 = re.search(r"^\s*variableStep\s+chrom=(\S+)span=\d+\s*$", line)
-
-        m = None
-        if m1 != None:
-            m = m1
-        elif m2 != None:
-            m = m2
-        elif m3 != None:
-            m = m3
-        elif m4 != None:
-            m = m4
-
-        if m != None:
-            chromosome = m.group(1)
-
-            if chromosome in files:
-                file = files[chromosome]
-            else:
-                file = open("%s%s%s%s.wig" % (options.output, os.sep, chromosome, strand), "w")
-                files[chromosome] = file
-                if trackLine != None:
-                    file.write("%s\n" % (trackLine))
-
-        if file == None:
-            sys.exit("Header is missing (current first line is '%s')! Aborting..." % (line))
-
-        file.write("%s\n" % (line))
-
-    inputFile.close()
-
-    for chromosome in files:
-        files[chromosome].close()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Python/wrongFastqToQual.py
--- a/SMART/Java/Python/wrongFastqToQual.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,81 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Convert a pseudo-FASTQ file to QUAL files"""
-
-import os
-from optparse import OptionParser
-from SMART.Java.Python.misc.Progress import *
-from math import *
-
-if __name__ == "__main__":
-    
-    # parse command line
-    description = "Wrong FastQ to Qual v1.0.1: Convert a pseudo-FastQ (i.e. a FastQ file with a wrong format) into a Qual file. [Category: Personnal]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",            dest="inputFileName",         action="store",                                                type="string", help="input file [compulsory] [format: file in FASTQ format]")
-    parser.add_option("-o", "--output",         dest="outputFileName",        action="store",                                                type="string", help="output file [compulsory] [format: output file in QUAL format]")
-    parser.add_option("-v", "--verbosity",    dest="verbosity",                 action="store",            default=1,                type="int",        help="trace level [format: int] [default: 1]")
-    (options, args) = parser.parse_args()
-
-    inputFile             = open(options.inputFileName)
-    outputFastaFile = open("%s.fasta" % (options.outputFileName), "w")
-    outputQualFile    = open("%s.qual" % (options.outputFileName), "w")
-    
-    inSequence     = False
-    inQuality        = True
-    sequenceName = None
-    for line in inputFile:
-        line = line.strip()
-        if line[0] == "@":
-            if inQuality == False:
-                sys.exit("Quality of %s is missing" % (sequenceName))
-            inSequence     = True
-            inQuality        = False
-            sequenceName = line[1:]
-            outputFastaFile.write(">%s\n" % (sequenceName))
-        elif line[0] == "+":
-            if inSequence == False:
-                sys.exit("Sequence of %s is missing" % (line[1:]))
-            inSequence     = False
-            inQuality        = True
-            if sequenceName != line[1:]:
-                sys.exit("Names in sequence and qual are different (%s, %s)" % (sequenceName, line[1:]))
-            outputQualFile.write(">%s\n" % (sequenceName))
-        else:
-            if inSequence:
-                outputFastaFile.write("%s\n" % (line))
-            elif inQuality:
-                outputQualFile.write("%s\n" % (line))
-    
-    inputFile.close()
-    outputFastaFile.close()
-    outputQualFile.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/PythonHelperReader.java
--- a/SMART/Java/PythonHelperReader.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,336 +0,0 @@\n-/**\n- *\n- * Copyright INRA-URGI 2009-2010\n- * \n- * This software is governed by the CeCILL license under French law and\n- * abiding by the rules of distribution of free software. You can use,\n- * modify and/ or redistribute the software under the terms of the CeCILL\n- * license as circulated by CEA, CNRS and INRIA at the following URL\n- * "http://www.cecill.info".\n- * \n- * As a counterpart to the access to the source code and rights to copy,\n- * modify and redistribute granted by the license, users are provided only\n- * with a limited warranty and the software\'s author, the holder of the\n- * economic rights, and the successive licensors have only limited\n- * liability.\n- * \n- * In this respect, the user\'s attention is drawn to the risks associated\n- * with loading, using, modifying and/or developing or reproducing the\n- * software by the user in light of its specific status of free software,\n- * that may mean that it is complicated to manipulate, and that also\n- * therefore means that it is reserved for developers and experienced\n- * professionals having in-depth computer knowledge. Users are therefore\n- * encouraged to load and test the software\'s suitability as regards their\n- * requirements in conditions enabling the security of their systems and/or\n- * data to be ensured and, more generally, to use and operate it in the\n- * same conditions as regards security.\n- * \n- * The fact that you are presently reading this means that you have had\n- * knowledge of the CeCILL license and that you accept its terms.\n- *\n- */\n-import java.util.*;\n-import java.io.File;\n-import java.io.*;\n-import java.util.regex.*;\n-\n-public class PythonHelperReader {\n-\n-  String         fileName;\n-  Program        program;\n-  BufferedReader reader;\n-  String         message;\n-\n-  public PythonHelperReader(String fileName) {\n-    this.fileName = fileName;  \n-    this.reader   = reader;\n-    this.message  = null;\n-  }\n-\n-  public void setReader(BufferedReader reader) {\n-    this.reader = reader;\n-  }\n-  \n-  public void run() {\n-    this.program                     = new Program();\n-    boolean         inBeginning      = true;\n-    boolean         inUsage          = false;\n-    boolean         afterUsage       = false;\n-    boolean         inDescription    = false;\n-    boolean         afterDescription = false;\n-    boolean         inOptions        = false;\n-    boolean         inOptionBlank    = false;\n-    boolean         inError          = false;\n-    String          usage            = null;\n-    String          description      = null;\n-    String          option           = null;\n-    Vector <String> options          = new Vector < String > ();\n-    String[]        optionSplitted;\n-\n-    // Parse file\n-    try {\n-      String line = null;\n-\n-      while ((line = reader.readLine()) != null) {\n-        line = line.trim();\n-        if (line.startsWith("Traceback")) {\n-          this.message     = "Problem with header of \'" + this.fileName + "\':\\n" + line + "\\n";\n-          inError          = true;\n-          inBeginning      = false;\n-          inUsage          = false;\n-          afterUsage       = false;\n-          inDescription    = false;\n-          afterDescription = false;\n-          inOptions        = false;\n-          inOptionBlank    = false;\n-        }\n-        else if (inError) {\n-          this.message += line + "\\n";\n-        }\n-        else if (inBeginning) {\n-          if (line.startsWith("Usage:")) {\n-            inUsage     = true;\n-            inBeginning = false;\n-            usage       = line;\n-          }\n-        }\n-        else if (inUsage) {\n-          if ("".equals(line)) {\n-            inUsage    = false;\n-            afterUsage = true;\n-          }\n-          else {\n-            usage += " " + line;\n-          }\n-        }\n-        else if (afterUsage) {\n-          if (! "".equals(line)) {\n-            description   = line;\n-            afterUsage    = false;\n-            inDescription = true;\n-          }\n- '..b'     int pos     = value.indexOf(" ");\n-              currentWord = value.substring(0, pos);\n-              rest        = value.substring(pos+1);\n-            }\n-            else {\n-              currentWord = value;\n-            }\n-            // Output file type\n-            if ("output".compareToIgnoreCase(currentWord) == 0) {\n-              programOption.setInput(false);\n-              int pos     = rest.indexOf(" ");\n-              currentWord = rest.substring(0, pos).trim();\n-              rest        = rest.substring(pos+1).trim();\n-            }\n-            // File (input or output file)\n-            if ("file".compareToIgnoreCase(currentWord) == 0) {\n-              programOption.setType("file");\n-              // Format given by an associated option (to be found later)\n-              if (rest.startsWith("in format given by ")) {\n-                associatedOption.put(programOption, rest.substring(rest.indexOf("format given by ") + "format given by ".length() + 1).trim());\n-              }\n-              else {\n-                if (! rest.startsWith("in ")) {\n-                  this.message = "Descriptor " + option + " does not have a proper format.\\n";\n-                  return;\n-                }\n-                rest = rest.substring("in ".length());\n-                int pos = rest.indexOf(" format");\n-                if (pos == -1) {\n-                  this.message = "Descriptor " + option + " does not have a proper format.\\n";\n-                  return;\n-                }\n-                programOption.setFormat(rest.substring(0, pos).trim().toLowerCase().split(" or "));\n-              }\n-            }\n-            // Format type\n-            else if (rest.endsWith("file format")) {\n-              programOption.setFormat((currentWord + " " + rest.substring(0, rest.indexOf("file format"))).trim().toLowerCase().split(" or "));\n-              programOption.setType("format");\n-            }\n-            // Choice type\n-            else if ("choice".compareToIgnoreCase(currentWord) == 0) {\n-              programOption.setChoices(rest.replace("(", "").replace(")", "").split(", "));\n-              programOption.setType("choice");\n-            }\n-            // Boolean type\n-            else if ("bool".compareToIgnoreCase(currentWord) == 0) {\n-              programOption.setType("boolean");\n-            }\n-            // Other type\n-            else {\n-              if (currentWord == null) {\n-                this.message = "Program \'" + this.fileName + "\' has a problem concerning the type of option \'" + identifier + "\'.\\n";\n-                return;\n-              }\n-              programOption.setType(currentWord);\n-            }\n-          }\n-          // Default value\n-          else if ("default".compareToIgnoreCase(type) == 0) {\n-            programOption.setDefault(value);\n-          }\n-          else {\n-            this.message = "Do not understand option descriptor \'" + inner + "\'.\\n";\n-            return;\n-          }\n-        }\n-        else {\n-          // Compulsory option\n-          if ("compulsory".compareToIgnoreCase(inner) == 0) {\n-            programOption.setCompulsory(true);\n-          }\n-          else {\n-            this.message = "Do not understand option descriptor \'" + inner + "\'.\\n";\n-            return;\n-          }\n-        }\n-      }\n-      if (! programOption.checkSettings()) {\n-        this.message = "Program \'" + this.fileName + "\' has a problem concerning option \'" + identifier + "\'.\\n";\n-        return;\n-      }\n-      program.addOption(programOption);\n-    }\n-\n-    // Set associated option\n-    Iterator it = associatedOption.keySet().iterator();\n-    while (it.hasNext()) {\n-      ProgramOption programOption = (ProgramOption) it.next();\n-      programOption.setAssociatedOption(identifierToOptions.get(associatedOption.get(programOption)));\n-    }\n-  }\n-\n-  public String getMessage () {\n-    return this.message;\n-  }\n-\n-  public Program getProgram () {\n-    return this.program;\n-  }\n-}\n-\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/PythonProgramFinder.java
--- a/SMART/Java/PythonProgramFinder.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,92 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.io.*;
-import java.util.*;
-
-public class PythonProgramFinder {
-
-  String             dirName;
-  Vector < Program > programs;
-
-  public PythonProgramFinder(String dirName) {
-    this.dirName = dirName;  
-  }
-
-  public String findPrograms() {
-    java.io.File directory = new java.io.File(this.dirName);
-    String[] files         = directory.list(new FilenameFilter() {public boolean accept(java.io.File dir, String name) {return ((! name.startsWith(".")) && (! name.startsWith("test")) && ((new java.io.File(dir, name)).isFile()) && (name.endsWith(".py")) && (name.compareToIgnoreCase("__init__.py") != 0));}});
-    this.programs          = new Vector < Program > ();
-
-    for (int i = 0; i < files.length; i++) {
-      String[] commandList  = {Global.pythonCommand, "Python" + java.io.File.separator + files[i], "-h"};
-      String command = "";
-      for (int j = 0; j < commandList.length; j++) {
-        command += commandList[j] + " ";
-      }
-      ProcessBuilder pb = new ProcessBuilder(commandList);
-      pb = pb.redirectErrorStream(true);
-      Map<String, String> env = pb.environment();
-      env.put("PYTHONPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
-      env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
-      env.put("SMARTMYSQLPATH", Global.mysqlCommand);
-      env.put("SMARTRPATH", Global.rCommand);
-
-      PythonHelperReader helperReader = new PythonHelperReader(files[i]);
-      try {
-        final Process process = pb.start();
-        InputStream is        = process.getInputStream();
-        InputStreamReader isr = new InputStreamReader(is);
-        BufferedReader br     = new BufferedReader(isr);
-        helperReader.setReader(br);
-        helperReader.run();
-      }
-      catch (IOException e) {
-        final Writer result = new StringWriter();
-        final PrintWriter printWriter = new PrintWriter(result);
-        e.printStackTrace(printWriter);
-        return "Command '" + command + "' failed (I/O error)...\n" + result.toString();
-      }
-      String comments = helperReader.getMessage();
-      if (comments != null) return comments;
-      Program program = helperReader.getProgram();
-      if (("Personnal".compareToIgnoreCase(program.getSection()) != 0) && ("Personal".compareToIgnoreCase(program.getSection()) != 0)) {
-        this.programs.add(program);
-      }
-    }
-    return null;
-  }
-
-  public Vector <Program> getPrograms () {
-    return this.programs;
-  }
-}
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/README.txt
--- a/SMART/Java/README.txt Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,51 +0,0 @@
-----------
-|  NAME  |
-----------
-S-MART
-
-
-Description
------------
-Several tools are now available for mapping high-throughput sequencing data from a genome, but few can extract biological knowledge from the mapped reads. We have developed a toolbox, S-MART, which handles mapped RNA-Seq and ChIP-Seq data.
-
-S-MART is an intuitive and lightweight tool, performing several tasks that are usually required during the analysis of mapped RNA-Seq and ChIP-Seq reads, including data selection and data visualization.
-
-S-MART does not require a computer science background and thus can be used by all biologists through a graphical interface. S-MART can run on any personal computer, yielding results within an hour for most queries. 
-
-
-Instructions
-------------
-Installation instructions and the user guide are available in the file "doc.pdf".
-
-
-Copyright
----------
-Copyright INRA-URGI 2009-2010
-
-
-Authors
--------
-Matthias Zytnicki
-
-
-Contact
--------
-urgi-contact@versailles.inra.fr
-
-
-License
--------
-This library is distributed under the terms of the CeCILL license 
-(http://www.cecill.info/index.en.html).
-See the LICENSE.txt file.
-
-
-Acknowledgements
-----------------
-This product needs the following softwares :
- * R, under the GNU General Public License
- * MySQL, under the GNU General Public License
- * Python, under the Python License, compatible with the GNU General Public License
- * MySQL for Python, under the GNU General Public License
- * Java, under the GNU General Public License
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/File.java
--- a/SMART/Java/Sav/File.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,55 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-public class File {
-  String name;
-  String formatType;
-  String format;
-
-
-  public File(String name, String type, String format) {
-    this.name       = name;
-    this.formatType = type;
-    this.format     = format;
-  }
-
-  public String getName() {
-    return this.name;
-  }
-
-  public String getFormatType() {
-    return this.formatType;
-  }
-
-  public String getFormat() {
-    return this.format;
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/Files.java
--- a/SMART/Java/Sav/Files.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,71 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-
-public class Files {
-  HashMap <String, File> files;  
-
-  public Files () {
-    files = new HashMap < String, File> ();
-  }
-
-  public void addFile(String fileName, String type, String format) {
-    this.addFile(new File(fileName, type, format));
-  }
-
-  public void addFile(File file) {
-    files.put(file.name, file);
-  }
-
-  public String getType(String fileName) {
-    if (fileName == null) {
-      System.out.println("Error! Looking for format of empty file name!");
-    }
-    if (! files.containsKey(fileName)) {
-      System.out.println("Oops! Format type of file " + fileName + " is not found!");
-      return null;
-    }
-    return files.get(fileName).formatType;
-  }
-
-  public String getFormat(String fileName) {
-    if (fileName == null) {
-      System.out.println("Error! Looking for format of empty file name!");
-    }
-    if (! files.containsKey(fileName)) {
-      System.out.println("Oops! Format of file " + fileName + " is not found!");
-      return null;
-    }
-    return files.get(fileName).format;
-  }
-}
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/FormatType.java
--- a/SMART/Java/Sav/FormatType.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,64 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-
-public class FormatType {
-  String type;
-  Vector < String > formats;
-
-  public FormatType (String type) {
-    this.type    = type;
-    this.formats = new Vector < String > ();
-  }
-
-  public String getType () {
-    return this.type;
-  }
-
-  public void addFormat (String format) {
-    formats.add(format);
-  }
-
-  public boolean containsFormat (String format) {
-    for (int i = 0; i < formats.size(); i++) {
-      if (((String) formats.get(i)).compareToIgnoreCase(format) == 0) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  public Vector < String > getFormats () {
-    return formats;
-  }
-}
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/FormatsContainer.java
--- a/SMART/Java/Sav/FormatsContainer.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,81 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-
-public class FormatsContainer {
-
-  HashMap < String, FormatType > formatTypes;
-
-
-  public FormatsContainer() {
-    this.formatTypes = new HashMap < String, FormatType > ();
-  }
-
-
-  public void addFormat(String type, String format) {
-    FormatType formatType;
-    if (formatTypes.containsKey(type)) {
-      formatType = this.formatTypes.get(type);
-    }
-    else {
-      formatType = new FormatType(type);
-      this.formatTypes.put(type, formatType);
-    }
-    formatType.addFormat(format);
-  }
-
-
-  public Vector < String > getFormatTypes () {
-    Vector < String > v = new Vector < String > ();
-    v.addAll(this.formatTypes.keySet());
-    return v;
-  }
-
-
-  public FormatType getFormats (String type) {
-    return formatTypes.get(type);
-  }
-
-
-  public String getFormatType (String format) {
-    for (Iterator it = formatTypes.keySet().iterator(); it.hasNext(); ) {
-      Object type       =  it.next();
-      Object formatType = formatTypes.get(type);
-      if (((FormatType) formatType).containsFormat(format)) {
-        return (String) type;
-      }
-    }
-    return null;
-  }
-}
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/FormatsReader.java
--- a/SMART/Java/Sav/FormatsReader.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,83 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-import java.io.File;
-import java.io.*;
-
-
-public class FormatsReader {
-
-  String fileName;
-  Vector < FormatType > formatTypes;
-  Vector < String > typeNames;
-
-
-  public FormatsReader(String fileName) {
-    this.fileName    = fileName;  
-    this.formatTypes = new Vector < FormatType > ();
-  }
-
-
-  public boolean read() {
-    File file = new File(this.fileName);
-
-    try {
-      BufferedReader reader = new BufferedReader(new FileReader(file));
-      String     line = null;
-      String[]   lineElements;
-      String[]   formats;
-      String     typeName;
-
-      while ((line = reader.readLine()) != null) {
-        if (line.length() > 0) {
-          lineElements = line.split(":");
-          typeName     = lineElements[0].trim();
-          formats      = lineElements[1].split(",");
-          for (int i = 0; i < formats.length; i++) {
-            Global.formats.addFormat(typeName, formats[i].trim());
-          }
-        }
-      }
-
-      reader.close();
-    }
-    catch (FileNotFoundException e) {
-      return false;
-    }
-    catch (IOException e) {
-      return false;
-    }
-
-    return true;
-  }
-}
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/Global.java
--- a/SMART/Java/Sav/Global.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,70 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.Vector;
-import java.util.HashMap;
-import javax.swing.DefaultListModel;
-import javax.swing.JButton;
-import javax.swing.JTextField;
-
-public class Global {
-
-  public static int logAreaSize = 100;
-
-  public static String smartConfFileName = "smart.conf";
-
-  public static String smartProgramsFileName = "programs.txt";
-
-  public static String smartFormatsFileName = "formats.txt";
-
-  public static String pythonPath = new String();
-
-  public static String pythonCommand = "python";
-
-  public static String mysqlCommand = "mysql";
-
-  public static String rCommand = "R";
-
-  public static Files files = new Files();
-
-  public static DefaultListModel fileNames = new DefaultListModel();
-
-  public static FormatsContainer formats = new FormatsContainer();
-
-  public static boolean programRunning = false;
-
-  public static HashMap < JButton, JTextField > otherFilesChooser = new HashMap < JButton, JTextField >();
-
-  public static HashMap < JButton, JTextField > otherDirectoriesChooser = new HashMap < JButton, JTextField >();
-
-  public static HashMap < JButton, JTextField > otherFileConcatenationChooser = new HashMap < JButton, JTextField >();
-
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/Program.java
--- a/SMART/Java/Sav/Program.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,175 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-import java.awt.*;
-import javax.swing.*;
-
-
-public class Program {
-  String                 shortName;
-  String                 name;
-  String                 section;
-  String                 description;
-  Vector <ProgramOption> options;
-  JPanel                 panel;
-  JButton                button;
-
-
-  public Program() {
-    this.shortName = null;  
-    this.name      = null;  
-    this.options   = new Vector <ProgramOption> ();  
-  }
-
-
-  public void setShortName(String shortName) {
-    this.shortName = shortName;
-  }
-
-
-  public void setName(String name) {
-    this.name = name;
-  }
-
-
-  public void setSection(String section) {
-    this.section = section;
-  }
-
-  public void setDescription(String description) {
-    this.description = description;
-  }
-
-
-  public void addOption(ProgramOption option) {
-    options.add(option);
-  }
-
-
-  public String getShortName() {
-    return this.shortName;
-  }
-
-
-  public String getName() {
-    return this.name;
-  }
-
-
-  public String getSection() {
-    return this.section;
-  }
-
-  public String getDescription() {
-    return this.description;
-  }
-
-
-  public String checkValues() {
-    for (int i = 0; i < options.size(); i++) {
-      String comment = options.get(i).checkValue();
-      if (comment != null) {
-        return comment;
-      }
-    }
-    return null;
-  }
-
-
-  public LinkedList<String> getCommand() {
-    LinkedList<String> parameterList = new LinkedList<String>();
-    parameterList.add(Global.pythonCommand);
-    parameterList.add("Python" + java.io.File.separator + this.shortName);
-    for (int i = 0; i < options.size(); i++) {
-      ProgramOption option = options.get(i);
-      parameterList.addAll(option.getCommand());
-    }
-    return parameterList;
-  }
-
-
-  public JPanel getPanel() {
-    if (this.panel != null) {
-      return this.panel;
-    }
-    
-    this.panel = new JPanel(false);
-    this.panel.setLayout(new FlowLayout());
-    Box box = Box.createVerticalBox();
-
-    JPanel descriptionPanel = new JPanel(false);
-    JLabel descriptionLabel = new JLabel(this.description);
-    descriptionPanel.add(descriptionLabel);
-    box.add(descriptionPanel);
-
-    for (int i = 0; i < options.size(); i++) {
-      ProgramOption option = options.get(i);
-      JPanel        panel  = option.getPanel();
-      if (panel == null) {
-        System.out.println("Problem with Python program '" + this.shortName + "'.");
-        return null;
-      }
-      box.add(option.getPanel());
-    }
-
-    JPanel buttonPanel = new JPanel(false);
-    this.button = new JButton("GO!");
-
-    buttonPanel.add(button);
-
-    box.add(buttonPanel);
-
-    this.panel.add(box);
-
-    return this.panel;
-  }
-
-
-  public JButton getButton() {
-    if (this.button == null) {
-      this.getPanel();
-    }
-    return this.button;
-  }
-
-  
-  public Vector < File > getOutputFiles() {
-    Vector < File > files = new Vector < File > ();
-    for (int i = 0; i < options.size(); i++) {
-      ProgramOption option = options.get(i);
-      if (! option.isInput()) {
-        files.add(option.getOutputFile());
-      }
-    }
-    return files;
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/ProgramFileReader.java
--- a/SMART/Java/Sav/ProgramFileReader.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,174 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-import java.io.File;
-import java.io.*;
-
-
-public class ProgramFileReader {
-  String fileName;
-  Vector <Program> programs;
-
-
-  public ProgramFileReader(String fileName) {
-    this.fileName = fileName;  
-    this.programs = new Vector <Program> ();
-  }
-
-
-  public boolean read() {
-//  File    file    = new File(this.fileName);
-//  Program program = null;
-//  int     step    = 0;
-//  TreeMap <String, ProgramOption> options = new TreeMap <String, ProgramOption> ();
-
-//  try {
-//    BufferedReader reader = new BufferedReader(new FileReader(file));
-//    String line    = null;
-//    String section = null;
-
-//    while ((line = reader.readLine()) != null) {
-
-//      line = line.trim();
-
-//      if (line.length() == 0) {
-//        if (program != null) {
-//          programs.add(program);
-//        }
-//        program = null;
-//        step = 0;
-//        continue;
-//      }
-
-//      if ((line.charAt(0) == '[') && (line.charAt(line.length() - 1) == ']')) {
-//        section = line.substring(1, line.length() - 1).trim();
-//        continue;
-//      }
-//      switch (step) {
-//        case 0:
-//        program = new Program();
-//          program.setName(line);
-//          if (section == null) {
-//            System.out.println("Error! Section of program '" + line + "' is not set!");
-//          }
-//          program.setSection(section);
-//          step = 1;
-//          break;
-//        case 1:
-//          program.setShortName(line);
-//          step = 2;
-//          break;
-//        case 2:
-//          ProgramOption option = new ProgramOption();
-
-//          String[] elements    = line.split(":");
-//          boolean  input       = elements[0].trim().equalsIgnoreCase("input")? true: false;
-//          String[] subElements = elements[1].split(";");
-//          String   identifier = subElements[0].trim();
-
-//          option.setInput(input);
-
-//          if (input) {
-
-//            if (subElements.length < 4) {
-//              System.out.println("Line '" + line + "' is weird...");
-//            }
-
-//            String   type       = subElements[1].trim();
-//            String   comment    = subElements[2].trim();
-//            boolean  compulsory = subElements[3].trim().equalsIgnoreCase("0")? false: true;
-
-//            option.setIdentifier(identifier);
-//            option.setType(type);
-//            option.setComment(comment);
-//            option.setCompulsory(compulsory);
-
-//            if ("file".compareToIgnoreCase(type) == 0) {
-//              if (subElements.length < 5) {
-//                System.out.println("Line '" + line + "' is weird...");
-//              }
-
-//              String formatIdentifier = subElements[4].trim();
-//              option.setFormatIdentifier(formatIdentifier);
-//            }
-//            else if ("choice".compareToIgnoreCase(type) == 0) {
-//              if (subElements.length < 5) {
-//                System.out.println("Line '" + line + "' is weird...");
-//              }
-
-//              String[] choices = subElements[4].trim().split(",");
-//              for (int i = 0; i < choices.length; i++) {
-//                choices[i] = choices[i].trim();
-//              }
-//              option.setChoices(choices);
-//            }
-//            options.put(identifier, option);
-//          }
-//          else {
-//            String format = subElements[1].trim();
-
-//            option.setFormat(format);
-//            option.setAssociatedOption(options.get(identifier));
-//          }
-
-//          program.addOption(option);
-
-//          break;
-//        default:
-//          return false;
-//      }
-//    }
-
-//    reader.close();
-//  }
-//  catch (FileNotFoundException e) {
-//    return false;
-//  }
-//  catch (IOException e) {
-//    return false;
-//  }
-
-//  if (program != null) {
-//    programs.add(program);
-//  }
-
-    return true;
-  }
-
-  public int getNbPrograms() {
-    return programs.size();
-  }
-
-  public Program getProgram(int i) {
-    return programs.get(i);
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/ProgramLauncher.java
--- a/SMART/Java/Sav/ProgramLauncher.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,191 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.util.*;
-import java.io.*;
-import javax.swing.SwingUtilities;
-import javax.swing.*;
-import java.util.concurrent.CountDownLatch;
-
-public class ProgramLauncher extends SwingWorker<Boolean, String>  {
-
-  String[]     command;
-  JTextArea    logArea;
-  JLabel       messageField;
-  JProgressBar progressBar;
-  JLabel       etaField;
-  int          exitValue;
-  
-
-  public ProgramLauncher (LinkedList <String> c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
-    command       = new String[c.size()];
-    logArea       = la;
-    messageField  = mf;
-    progressBar   = pb;
-    etaField      = ef;
-    exitValue     = -1;
-    c.toArray(command);
-  }
-
-
-  public ProgramLauncher (String[] c, JTextArea la, JLabel mf, JProgressBar pb, JLabel ef) {
-    command       = c;
-    logArea       = la;
-    messageField  = mf;
-    progressBar   = pb;
-    etaField      = ef;
-    exitValue     = -1;
-  }
-
-
-  @Override
-  public Boolean doInBackground() {
-    ProcessBuilder pb           = new ProcessBuilder(command);
-    Process process             = null;
-    BufferedReader outputReader = null;
-    pb                          = pb.redirectErrorStream(true);
-    Map<String, String> env     = pb.environment();
-    env.put("PYTHONPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
-    env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
-    env.put("SMARTMYSQLPATH", Global.mysqlCommand);
-    env.put("SMARTRPATH", Global.rCommand);
-    String commandJoined = Arrays.toString(command);
-
-    try {
-      publish("=== Starting command '" + commandJoined.trim() + "' ===\n");
-      process = pb.start();
-
-      BufferedInputStream outputStream = new BufferedInputStream(process.getInputStream());
-      InputStream is                   = process.getInputStream();
-      InputStreamReader isr            = new InputStreamReader(is);
-      outputReader                     = new BufferedReader(isr);
-    }
-    catch (Exception exception) {
-      publish("!Process cannot be started (command is '" + commandJoined + "')!\n");
-      exception.printStackTrace();
-      return Boolean.FALSE;
-    }
-    if (outputReader == null) {
-      publish("!Problem in the output of the command!\n");
-      return Boolean.FALSE;
-    }
-    else {
-      try {
-        String line;
-        while ((line = outputReader.readLine()) != null) {
-          publish(line + "\n");
-        }
-      }
-      catch (IOException e) {
-        e.printStackTrace();
-        publish("!Cannot get the output of the command!\n");
-        return Boolean.FALSE;
-      }
-    }
-    try {
-      process.waitFor();
-    }
-    catch (InterruptedException e) {
-      e.printStackTrace();
-      publish("!Cannot wait for the end of the command!\n");
-      return Boolean.FALSE;
-    }
-    try {
-      exitValue = process.exitValue();
-      System.out.println(exitValue);
-    }
-    catch (IllegalThreadStateException e) {
-      e.printStackTrace();
-      publish("!Cannot get the exit value of the command!\n");
-      return Boolean.FALSE;
-    }
-    if (exitValue != 0) {
-      publish("!Problem during the execution of the command '" + commandJoined + "'!\n");
-      return Boolean.FALSE;
-    }
-    publish("=== Ending command '" + commandJoined.trim() + "' ===\n");
-    return Boolean.TRUE;
-  }
-
-
-  @Override
-  protected void process(List<String> chunks) {
-    String message = "";
-    String text    = logArea.getText();
-    for (String chunk: chunks) {
-      text += chunk;
-    }
-    for (String lineSeparatedByCarriageReturn: text.split("\n")) {
-      for (String line: lineSeparatedByCarriageReturn.split("\r")) {
-        boolean progressLine = false;
-        if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*")) {
-          String[] ratioElements = line.split("\\]")[1].trim().split("/");
-          int      current       = Integer.parseInt(ratioElements[0].trim());
-          int      aim           = Integer.parseInt(ratioElements[1].trim());
-          messageField.setText(line.split("\\[")[0].trim());
-          progressBar.setValue(current * 100 / aim);
-          etaField.setText("");
-          progressLine = true;
-        }
-        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*/\\d*\\s*ETA:\\s*.*")) {
-          String[] ratioElements = line.split("\\]")[1].split("E")[0].trim().split("/");
-          int      current       = Integer.parseInt(ratioElements[0].trim());
-          int      aim           = Integer.parseInt(ratioElements[1].trim());
-          String   eta           = line.split("ETA:")[1].trim();
-          messageField.setText(line.split("\\[")[0].trim());
-          progressBar.setValue(current * 100 / aim);
-          etaField.setText("ETA: " + eta);
-          progressLine = true;
-        }
-        else if (line.matches(".*\\[=*\\s*\\]\\s*\\d*\\s*completed in.*")) {
-          String nbElements = line.split("\\]")[1].split("completed")[0].trim();
-          String timeSpent  = line.split("completed in")[1].trim();
-          message          += line.split("\\[")[0].trim() + ": " + nbElements + " elements completed in " + timeSpent + "\n";
-          messageField.setText(line.split("\\[")[0].trim());
-          progressLine = true;
-        }
-        if (! progressLine) {
-          message += line + "\n";
-        }
-      }
-    }
-    String lines[]     = message.split("\n");
-    String toBeWritten = "";
-    for (int i = Math.max(0, lines.length - Global.logAreaSize); i < lines.length; i++) {
-      toBeWritten += lines[i] + "\n";
-    }
-    logArea.setText(toBeWritten);
-  }
-
-  public int getExitValue() {
-    return exitValue;
-  }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/ProgramOption.java
--- a/SMART/Java/Sav/ProgramOption.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,329 +0,0 @@\n-/**\n- *\n- * Copyright INRA-URGI 2009-2010\n- * \n- * This software is governed by the CeCILL license under French law and\n- * abiding by the rules of distribution of free software. You can use,\n- * modify and/ or redistribute the software under the terms of the CeCILL\n- * license as circulated by CEA, CNRS and INRIA at the following URL\n- * "http://www.cecill.info".\n- * \n- * As a counterpart to the access to the source code and rights to copy,\n- * modify and redistribute granted by the license, users are provided only\n- * with a limited warranty and the software\'s author, the holder of the\n- * economic rights, and the successive licensors have only limited\n- * liability.\n- * \n- * In this respect, the user\'s attention is drawn to the risks associated\n- * with loading, using, modifying and/or developing or reproducing the\n- * software by the user in light of its specific status of free software,\n- * that may mean that it is complicated to manipulate, and that also\n- * therefore means that it is reserved for developers and experienced\n- * professionals having in-depth computer knowledge. Users are therefore\n- * encouraged to load and test the software\'s suitability as regards their\n- * requirements in conditions enabling the security of their systems and/or\n- * data to be ensured and, more generally, to use and operate it in the\n- * same conditions as regards security.\n- * \n- * The fact that you are presently reading this means that you have had\n- * knowledge of the CeCILL license and that you accept its terms.\n- *\n- */\n-import java.util.*;\n-import java.awt.*;\n-import java.awt.event.ActionEvent;\n-import java.awt.event.ActionListener;\n-import java.io.*;\n-import javax.swing.*;\n-import javax.swing.filechooser.*;\n-import javax.swing.border.*;\n-import javax.swing.SwingUtilities;\n-\n-\n-public class ProgramOption {\n-  boolean       input;\n-  String        identifier;\n-  String        type;\n-  String        comment;\n-  boolean       compulsory;\n-  String[]      format;\n-  String        formatIdentifier;\n-  ProgramOption associatedOption;\n-  String        defaultValue;\n-  String[]      choices;\n-  JComponent    component;\n-  JPanel        panel;\n-\n-\n-  public ProgramOption() {\n-    this.input            = true;\n-    this.identifier       = null;\n-    this.type             = null;\n-    this.comment          = null;\n-    this.compulsory       = false;\n-    this.format           = null;\n-    this.formatIdentifier = null;\n-    this.associatedOption = null;\n-    this.defaultValue     = "";\n-    this.choices          = null;\n-    this.component        = null;\n-    this.panel            = null;\n-  }\n-\n-\n-  public void setInput(boolean input) {\n-    this.input = input;\n-  }\n-\n-\n-  public void setIdentifier(String identifier) {\n-    this.identifier = identifier;\n-  }\n-\n-\n-  public void setType(String type) {\n-    this.type = type;\n-  }\n-\n-\n-  public void setComment(String comment) {\n-    this.comment = comment;\n-  }\n-\n-\n-  public void setCompulsory(boolean compulsory) {\n-    this.compulsory = compulsory;\n-  }\n-\n-\n-  public void setFormat(String[] format) {\n-    this.format = format;\n-  }\n-\n-\n-  public void setFormat(String format) {\n-    this.format    = new String[1];\n-    this.format[0] = format;\n-  }\n-\n-\n-  public void setFormatIdentifier(String formatIdentifier) {\n-    this.formatIdentifier = formatIdentifier;\n-  }\n-\n-\n-  public void setAssociatedOption(ProgramOption option) {\n-    this.associatedOption = option;\n-  }\n-\n-\n-  public void setChoices(String[] choices) {\n-    this.choices = choices;\n-  }\n-\n-\n-  public void setDefault(String defaultValue) {\n-    this.defaultValue = defaultValue;\n-  }\n-\n-\n-  public boolean isInput() {\n-    return this.input;\n-  }\n-\n-\n-  public JPanel getPanel() {\n-    if (this.panel != null) {\n-      return this.panel;\n-    }\n-    String comment = this.comment;\n-    if (this.compulsory) {\n-      comment += " [*]";\n-    }\n-\n-    GridLayout horizontalLayout = new GridLayout(1, 0);\n-    this.panel = ne'..b'component = new JTextField();\n-      label.setLabelFor(this.component);\n-      this.panel.add(label);\n-      JPanel rightPanel = new JPanel(false);\n-      rightPanel.setLayout(new BoxLayout(rightPanel, BoxLayout.LINE_AXIS));\n-      rightPanel.add(this.component);\n-      rightPanel.add(button);\n-      this.panel.add(rightPanel);\n-      Global.otherDirectoriesChooser.put(button, (JTextField) this.component);\n-    }\n-    else if ("choice".compareToIgnoreCase(this.type) == 0) {\n-      this.component = new JComboBox(this.choices);\n-      label.setLabelFor(this.component);\n-      this.panel.add(label);\n-      this.panel.add(this.component);\n-    }\n-    else {\n-      System.out.println("Do not know how to read type " + this.type);\n-    }\n-\n-    return this.panel;\n-  }\n-\n-\n-  public JComponent getComponent() {\n-    if (component == null) {\n-      this.getPanel();\n-    }\n-    return this.component;\n-  }\n-\n-\n-  private String getValue() {\n-    if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("directory".equals(this.type)) || ("files".equals(this.type)))  {\n-      String s = ((JTextField) this.component).getText();\n-      if ("None".equals(s)) {\n-        return "";\n-      }\n-      return s;\n-    }\n-    if ("file".equals(this.type)) {\n-      return (String) ((JList) this.component).getSelectedValue();\n-    }\n-    if ("boolean".equals(this.type)) {\n-      return ((JCheckBox) this.component).isSelected()? "true": "false";\n-    }\n-    if ("format".equals(this.type)) {\n-      return (String) ((JComboBox) this.component).getSelectedItem();\n-    }\n-    return null;\n-  }\n-\n-\n-  public String checkValue() {\n-    String value = this.getValue();\n-    if ((this.compulsory) && ((value == null) || ("".equals(value)))) {\n-      return "Option \'" + this.comment + "\' has no value... Please specify it.\\n";\n-    }\n-    if ("int".equals(this.type)) {\n-      if ((value != null) && (! "".equals(value)) && (! "None".equals(value))) {\n-        try {\n-          int i = Integer.parseInt(value);\n-        }\n-        catch (NumberFormatException e) {\n-          return "Option \'" + this.comment + "\' should be an integer... Please correct it.\\n";\n-        }\n-      }\n-    }\n-    else if ("float".equals(this.type)) {\n-      if ((value != null) && (! "".equals(value))) {\n-        try {\n-          float i = Float.parseFloat(value);\n-        }\n-        catch (NumberFormatException e) {\n-          return "Option \'" + this.comment + "\' should be a float... Please correct it.\\n";\n-        }\n-      }\n-    }\n-    return null;\n-  }\n-\n-\n-  public LinkedList <String> getCommand() {\n-    LinkedList <String> list = new LinkedList <String> ();\n-\n-    if (("int".equals(this.type)) || ("float".equals(this.type)) || ("string".equals(this.type)) || (("file".equals(this.type)) && (! this.input)) || ("format".equals(this.type)) || ("directory".equals(this.type)) || ("files".equals(this.type))) {\n-      String value = this.getValue();\n-      if (value.length() == 0) {\n-        return list;\n-      }\n-      list.add(this.identifier);\n-      list.add(value);\n-      return list;\n-    }\n-    if ("file".equals(this.type)) {\n-      String fileName = (String) ((JList) this.component).getSelectedValue();\n-      if (fileName == null) {\n-        return list;\n-      }\n-      list.add(this.identifier);\n-      list.add(this.getValue());\n-      return list;\n-    }\n-    if ("boolean".equals(this.type)) {\n-      if ("true".equals(this.getValue())) {\n-        list.add(this.identifier);\n-      }\n-      return list;\n-    }\n-    return null;\n-  }\n-\n-\n-  public File getOutputFile() {\n-    if (this.input) return null;\n-    String format = "";\n-    if (this.format != null) {\n-      format = this.format[0];\n-    }\n-    if (this.associatedOption != null) {\n-      format = this.associatedOption.getValue();\n-    }\n-    return new File(this.getValue() + "." + format, Global.formats.getFormatType(format), format);\n-  }\n-}\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/PythonHelperReader.java
--- a/SMART/Java/Sav/PythonHelperReader.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,323 +0,0 @@\n-/**\n- *\n- * Copyright INRA-URGI 2009-2010\n- * \n- * This software is governed by the CeCILL license under French law and\n- * abiding by the rules of distribution of free software. You can use,\n- * modify and/ or redistribute the software under the terms of the CeCILL\n- * license as circulated by CEA, CNRS and INRIA at the following URL\n- * "http://www.cecill.info".\n- * \n- * As a counterpart to the access to the source code and rights to copy,\n- * modify and redistribute granted by the license, users are provided only\n- * with a limited warranty and the software\'s author, the holder of the\n- * economic rights, and the successive licensors have only limited\n- * liability.\n- * \n- * In this respect, the user\'s attention is drawn to the risks associated\n- * with loading, using, modifying and/or developing or reproducing the\n- * software by the user in light of its specific status of free software,\n- * that may mean that it is complicated to manipulate, and that also\n- * therefore means that it is reserved for developers and experienced\n- * professionals having in-depth computer knowledge. Users are therefore\n- * encouraged to load and test the software\'s suitability as regards their\n- * requirements in conditions enabling the security of their systems and/or\n- * data to be ensured and, more generally, to use and operate it in the\n- * same conditions as regards security.\n- * \n- * The fact that you are presently reading this means that you have had\n- * knowledge of the CeCILL license and that you accept its terms.\n- *\n- */\n-import java.util.*;\n-import java.io.File;\n-import java.io.*;\n-import java.util.regex.*;\n-\n-public class PythonHelperReader {\n-\n-  String         fileName;\n-  Program        program;\n-  BufferedReader reader;\n-  String         message;\n-\n-  public PythonHelperReader(String fileName) {\n-    this.fileName = fileName;  \n-    this.reader   = reader;\n-    this.message  = null;\n-  }\n-\n-  public void setReader(BufferedReader reader) {\n-    this.reader = reader;\n-  }\n-  \n-  public void run() {\n-    this.program                     = new Program();\n-    boolean         inBeginning      = true;\n-    boolean         inUsage          = false;\n-    boolean         afterUsage       = false;\n-    boolean         inDescription    = false;\n-    boolean         afterDescription = false;\n-    boolean         inOptions        = false;\n-    boolean         inOptionBlank    = false;\n-    boolean         inError          = false;\n-    String          usage            = null;\n-    String          description      = null;\n-    String          option           = null;\n-    Vector <String> options          = new Vector < String > ();\n-    String[]        optionSplitted;\n-\n-    // Parse file\n-    try {\n-      String line = null;\n-\n-      while ((line = reader.readLine()) != null) {\n-        line = line.trim();\n-        if (line.startsWith("Traceback")) {\n-          this.message     = "Problem with header of \'" + this.fileName + "\':\\n" + line + "\\n";\n-          inError          = true;\n-          inBeginning      = false;\n-          inUsage          = false;\n-          afterUsage       = false;\n-          inDescription    = false;\n-          afterDescription = false;\n-          inOptions        = false;\n-          inOptionBlank    = false;\n-        }\n-        else if (inError) {\n-          this.message += line + "\\n";\n-        }\n-        else if (inBeginning) {\n-          if (line.startsWith("Usage:")) {\n-            inUsage     = true;\n-            inBeginning = false;\n-            usage       = line;\n-          }\n-        }\n-        else if (inUsage) {\n-          if ("".equals(line)) {\n-            inUsage    = false;\n-            afterUsage = true;\n-          }\n-          else {\n-            usage += " " + line;\n-          }\n-        }\n-        else if (afterUsage) {\n-          if (! "".equals(line)) {\n-            description   = line;\n-            afterUsage    = false;\n-            inDescription = true;\n-          }\n- '..b'her.end()-1);\n-        if (inner.contains(":")) {\n-          String type  = inner.substring(0, inner.indexOf(":")).trim();\n-          String value = inner.substring(inner.indexOf(":")+1).trim();\n-          // Types of the options\n-          if ("format".compareToIgnoreCase(type) == 0) {\n-            String currentWord = "";\n-            String rest        = "";\n-            if (value.contains(" ")) {\n-              int pos     = value.indexOf(" ");\n-              currentWord = value.substring(0, pos);\n-              rest        = value.substring(pos+1);\n-            }\n-            else {\n-              currentWord = value;\n-            }\n-            // Output file type\n-            if ("output".compareToIgnoreCase(currentWord) == 0) {\n-              programOption.setInput(false);\n-              int pos     = rest.indexOf(" ");\n-              currentWord = rest.substring(0, pos).trim();\n-              rest        = rest.substring(pos+1).trim();\n-            }\n-            // File (input or output file)\n-            if ("file".compareToIgnoreCase(currentWord) == 0) {\n-              programOption.setType("file");\n-              // Format given by an associated option (to be found later)\n-              if (rest.startsWith("in format given by ")) {\n-                associatedOption.put(programOption, rest.substring(rest.indexOf("format given by ") + "format given by ".length() + 1).trim());\n-              }\n-              else {\n-                if (! rest.startsWith("in ")) {\n-                  this.message = "Descriptor " + option + " does not have a proper format.";\n-                  return;\n-                }\n-                rest = rest.substring("in ".length());\n-                int pos = rest.indexOf(" format");\n-                if (pos == -1) {\n-                  this.message = "Descriptor " + option + " does not have a proper format.";\n-                  return;\n-                }\n-                programOption.setFormat(rest.substring(0, pos).trim().toLowerCase().split(" or "));\n-              }\n-            }\n-            // Format type\n-            else if (rest.endsWith("file format")) {\n-              programOption.setFormat((currentWord + " " + rest.substring(0, rest.indexOf("file format"))).trim().toLowerCase().split(" or "));\n-              programOption.setType("format");\n-            }\n-            // Choice type\n-            else if ("choice".compareToIgnoreCase(currentWord) == 0) {\n-              programOption.setChoices(rest.replace("(", "").replace(")", "").split(", "));\n-              programOption.setType("choice");\n-            }\n-            // Boolean type\n-            else if ("bool".compareToIgnoreCase(currentWord) == 0) {\n-              programOption.setType("boolean");\n-            }\n-            // Other type\n-            else {\n-              programOption.setType(currentWord);\n-            }\n-          }\n-          // Default value\n-          else if ("default".compareToIgnoreCase(type) == 0) {\n-            programOption.setDefault(value);\n-          }\n-          else {\n-            this.message = "Do not understand option descriptor \'" + inner + "\'.";\n-            return;\n-          }\n-        }\n-        else {\n-          // Compulsory option\n-          if ("compulsory".compareToIgnoreCase(inner) == 0) {\n-            programOption.setCompulsory(true);\n-          }\n-          else {\n-            this.message = "Do not understand option descriptor \'" + inner + "\'.";\n-            return;\n-          }\n-        }\n-      }\n-      program.addOption(programOption);\n-    }\n-\n-    // Set associated option\n-    Iterator it = associatedOption.keySet().iterator();\n-    while (it.hasNext()) {\n-      ProgramOption programOption = (ProgramOption) it.next();\n-      programOption.setAssociatedOption(identifierToOptions.get(associatedOption.get(programOption)));\n-    }\n-  }\n-\n-  public String getMessage () {\n-    return this.message;\n-  }\n-\n-  public Program getProgram () {\n-    return this.program;\n-  }\n-}\n-\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/PythonProgramFinder.java
--- a/SMART/Java/Sav/PythonProgramFinder.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,90 +0,0 @@
-/**
- *
- * Copyright INRA-URGI 2009-2010
- * 
- * This software is governed by the CeCILL license under French law and
- * abiding by the rules of distribution of free software. You can use,
- * modify and/ or redistribute the software under the terms of the CeCILL
- * license as circulated by CEA, CNRS and INRIA at the following URL
- * "http://www.cecill.info".
- * 
- * As a counterpart to the access to the source code and rights to copy,
- * modify and redistribute granted by the license, users are provided only
- * with a limited warranty and the software's author, the holder of the
- * economic rights, and the successive licensors have only limited
- * liability.
- * 
- * In this respect, the user's attention is drawn to the risks associated
- * with loading, using, modifying and/or developing or reproducing the
- * software by the user in light of its specific status of free software,
- * that may mean that it is complicated to manipulate, and that also
- * therefore means that it is reserved for developers and experienced
- * professionals having in-depth computer knowledge. Users are therefore
- * encouraged to load and test the software's suitability as regards their
- * requirements in conditions enabling the security of their systems and/or
- * data to be ensured and, more generally, to use and operate it in the
- * same conditions as regards security.
- * 
- * The fact that you are presently reading this means that you have had
- * knowledge of the CeCILL license and that you accept its terms.
- *
- */
-import java.io.*;
-import java.util.*;
-
-public class PythonProgramFinder {
-
-  String             dirName;
-  Vector < Program > programs;
-
-  public PythonProgramFinder(String dirName) {
-    this.dirName = dirName;  
-  }
-
-  public String findPrograms() {
-    java.io.File directory = new java.io.File(this.dirName);
-    String[] files         = directory.list(new FilenameFilter() {public boolean accept(java.io.File dir, String name) {return ((! name.startsWith(".")) && (! name.startsWith("test")) && ((new java.io.File(dir, name)).isFile()) && (name.endsWith(".py")));}});
-    this.programs          = new Vector < Program > ();
-
-    for (int i = 0; i < files.length; i++) {
-      String[] commandList  = {Global.pythonCommand, "Python" + java.io.File.separator + files[i], "-h"};
-      String command = "";
-      for (int j = 0; j < commandList.length; j++) {
-        command += commandList[j] + " ";
-      }
-      ProcessBuilder pb = new ProcessBuilder(commandList);
-      pb = pb.redirectErrorStream(true);
-      Map<String, String> env = pb.environment();
-      env.put("PYTHONPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
-      env.put("SMARTPATH", System.getProperty("user.dir") + java.io.File.separator + "Python");
-      env.put("SMARTMYSQLPATH", Global.mysqlCommand);
-      env.put("SMARTRPATH", Global.rCommand);
-
-      PythonHelperReader helperReader = new PythonHelperReader(files[i]);
-      try {
-        final Process process = pb.start();
-        InputStream is        = process.getInputStream();
-        InputStreamReader isr = new InputStreamReader(is);
-        BufferedReader br     = new BufferedReader(isr);
-        helperReader.setReader(br);
-        helperReader.run();
-      }
-      catch (IOException e) {
-        e.printStackTrace();
-        return "Command '" + command + "' failed (I/O error)...\n";
-      }
-      String comments = helperReader.getMessage();
-      if (comments != null) return comments;
-      Program program = helperReader.getProgram();
-      if ("Personnal".compareToIgnoreCase(program.getSection()) != 0) {
-        this.programs.add(program);
-      }
-    }
-    return null;
-  }
-
-  public Vector <Program> getPrograms () {
-    return this.programs;
-  }
-}
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Sav/Smart.java
--- a/SMART/Java/Sav/Smart.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,489 +0,0 @@\n-/**\n- *\n- * Copyright INRA-URGI 2009-2010\n- * \n- * This software is governed by the CeCILL license under French law and\n- * abiding by the rules of distribution of free software. You can use,\n- * modify and/ or redistribute the software under the terms of the CeCILL\n- * license as circulated by CEA, CNRS and INRIA at the following URL\n- * "http://www.cecill.info".\n- * \n- * As a counterpart to the access to the source code and rights to copy,\n- * modify and redistribute granted by the license, users are provided only\n- * with a limited warranty and the software\'s author, the holder of the\n- * economic rights, and the successive licensors have only limited\n- * liability.\n- * \n- * In this respect, the user\'s attention is drawn to the risks associated\n- * with loading, using, modifying and/or developing or reproducing the\n- * software by the user in light of its specific status of free software,\n- * that may mean that it is complicated to manipulate, and that also\n- * therefore means that it is reserved for developers and experienced\n- * professionals having in-depth computer knowledge. Users are therefore\n- * encouraged to load and test the software\'s suitability as regards their\n- * requirements in conditions enabling the security of their systems and/or\n- * data to be ensured and, more generally, to use and operate it in the\n- * same conditions as regards security.\n- * \n- * The fact that you are presently reading this means that you have had\n- * knowledge of the CeCILL license and that you accept its terms.\n- *\n- */\n-import java.util.*;\n-import java.awt.*;\n-import java.awt.event.ActionEvent;\n-import java.awt.event.ActionListener;\n-import java.io.*;\n-import javax.swing.*;\n-import javax.swing.filechooser.*;\n-import javax.swing.border.*;\n-import javax.swing.SwingUtilities;\n-\n-\n-public class Smart extends JPanel implements ActionListener {\n-\n-  String version = "1.0.2";\n-\n-  JFrame mainFrame;\n-  JButton openButton;\n-  JButton comparisonGoButton;\n-\n-  JComboBox formatTypes;\n-  JComboBox fileFormats;\n-  String[]  emptyFormats = {"Choose a type first..."};\n-\n-  JFrame  askFrame;\n-  JButton pythonButton;\n-  JButton mySqlButton;\n-  JButton rButton;\n-\n-  HashMap <JButton, Program> callingProgram;\n-\n-  // comparison\n-  JList comparisonFile1List;\n-  JList comparisonFile2List;\n-  JTextField comparisonOutputTextField;\n-  JTextField comparisonFiveQueryExtensionTextField;\n-  JCheckBox comparisonColinearCheckBox;\n-  JCheckBox comparisonAntisenseCheckBox;\n-  JCheckBox comparisonInvertCheckBox;\n-\n-  JList        fileList;\n-  JTextArea    logArea;\n-\n-  // progress bar\n-  JLabel       messageField;\n-  JProgressBar progressBar;\n-  JLabel       etaField;\n-\n-  // process\n-  Program           currentProgram;\n-  Process           process;\n-  javax.swing.Timer processTimer;\n-\n-\n-  int previousStatus;\n-\n-  public Smart() {\n-    super(new BorderLayout());\n-\n-    callingProgram = new HashMap <JButton, Program> ();\n-\n-    previousStatus = -1;\n-\n-    processTimer = new javax.swing.Timer(1000, this);\n-    processTimer.stop();\n-\n-    // Ask frame buttons\n-    pythonButton = new JButton("find...");\n-    mySqlButton  = new JButton("find...");\n-    rButton      = new JButton("find...");\n-\n-    // Get available formats\n-    FormatsReader formatReader = new FormatsReader(Global.smartFormatsFileName);\n-    if (! formatReader.read()) {\n-      System.out.println("Something was wrong while reading file format...");\n-    }\n-\n-    // Get screen size\n-    Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n-\n-    // Log\n-    logArea = new JTextArea(512, Global.logAreaSize);\n-    logArea.setPreferredSize(new Dimension(screenSize.width, (int) (screenSize.height * 0.22)));\n-    logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));\n-    JScrollPane logScroll  = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);\n-    TitledBorder logBorder '..b'\\n");\n-        }\n-      }\n-      this.checkConfiguration();\n-    }\n-    // Format type\n-    else if (e.getSource() == formatTypes) {\n-      fileFormats.removeAllItems();\n-      Vector < String > selectedFormats = Global.formats.getFormats((String) formatTypes.getSelectedItem()).getFormats();\n-      for (int i = 0; i < selectedFormats.size(); i++) {\n-        fileFormats.addItem(selectedFormats.get(i));\n-      }\n-    }\n-    // Main file chooser\n-    else if (e.getSource() == openButton) {\n-      JFileChooser chooser = new JFileChooser();\n-      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n-        String fileName = chooser.getSelectedFile().getPath();\n-        Global.fileNames.addElement(fileName);\n-        Global.files.addFile(fileName, (String) formatTypes.getSelectedItem(), (String) fileFormats.getSelectedItem());\n-      }\n-    }\n-    // Other file choosers\n-    else if (Global.otherFilesChooser.containsKey(e.getSource())) {\n-      JTextField textField = Global.otherFilesChooser.get(e.getSource());\n-      JFileChooser chooser = new JFileChooser();\n-      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n-        textField.setText(chooser.getSelectedFile().getPath());\n-      }\n-    }\n-    // Other directories choosers\n-    else if (Global.otherDirectoriesChooser.containsKey(e.getSource())) {\n-      JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n-      JFileChooser chooser = new JFileChooser();\n-      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n-      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n-        textField.setText(chooser.getSelectedFile().getPath());\n-      }\n-    }\n-    else if (Global.otherFileConcatenationChooser.containsKey(e.getSource())) {\n-      JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n-      JFileChooser chooser = new JFileChooser();\n-      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n-      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n-        String text = textField.getText();\n-        if ((text == null) || ("".equals(text))) {\n-          textField.setText(chooser.getSelectedFile().getPath());\n-        }\n-        else {\n-          textField.setText(text + "," + chooser.getSelectedFile().getPath());\n-        }\n-      }\n-    }\n-    // Programs\n-    else {\n-      currentProgram = callingProgram.get(e.getSource());\n-      String comment = currentProgram.checkValues();\n-      if (comment != null) {\n-        logArea.append(comment);\n-        return;\n-      }\n-      LinkedList <String> command = currentProgram.getCommand();\n-      ProgramLauncher launcher = new ProgramLauncher(command, logArea, messageField, progressBar, etaField);\n-      launcher.execute();\n-      Vector < File > outputFiles = currentProgram.getOutputFiles();\n-      for (int i = 0; i < outputFiles.size(); i++) {\n-        File file = outputFiles.get(i);\n-        if (file.getFormatType().compareToIgnoreCase("other") != 0) {\n-          Global.fileNames.addElement(file.getName());\n-          Global.files.addFile(file);\n-        }\n-      }\n-      currentProgram = null;\n-    }\n-  }\n-\n-\n-  private static void createAndShowGUI() {\n-    // Create and set up the window.\n-    JFrame mainFrame = new JFrame("S-Mart");\n-    mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);\n-\n-    //Create and set up the content pane.\n-    JComponent newContentPane = new Smart();\n-    newContentPane.setOpaque(true);\n-    mainFrame.setContentPane(newContentPane);\n-\n-    // Display the window.\n-    mainFrame.pack();\n-    mainFrame.setVisible(true);\n-    Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n-    mainFrame.setBounds(0, 0, screenSize.width, screenSize.height);\n-  }\n-\n-\n-  public static void main(String[] args) {\n-    javax.swing.SwingUtilities.invokeLater(new Runnable() {\n-      public void run() {\n-        createAndShowGUI();\n-      }\n-    });\n-  }\n-}\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Smart.jar
b
Binary file SMART/Java/Smart.jar has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/Smart.java
--- a/SMART/Java/Smart.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,668 +0,0 @@\n-/**\n- *\n- * Copyright INRA-URGI 2009-2010\n- * \n- * This software is governed by the CeCILL license under French law and\n- * abiding by the rules of distribution of free software. You can use,\n- * modify and/ or redistribute the software under the terms of the CeCILL\n- * license as circulated by CEA, CNRS and INRIA at the following URL\n- * "http://www.cecill.info".\n- * \n- * As a counterpart to the access to the source code and rights to copy,\n- * modify and redistribute granted by the license, users are provided only\n- * with a limited warranty and the software\'s author, the holder of the\n- * economic rights, and the successive licensors have only limited\n- * liability.\n- * \n- * In this respect, the user\'s attention is drawn to the risks associated\n- * with loading, using, modifying and/or developing or reproducing the\n- * software by the user in light of its specific status of free software,\n- * that may mean that it is complicated to manipulate, and that also\n- * therefore means that it is reserved for developers and experienced\n- * professionals having in-depth computer knowledge. Users are therefore\n- * encouraged to load and test the software\'s suitability as regards their\n- * requirements in conditions enabling the security of their systems and/or\n- * data to be ensured and, more generally, to use and operate it in the\n- * same conditions as regards security.\n- * \n- * The fact that you are presently reading this means that you have had\n- * knowledge of the CeCILL license and that you accept its terms.\n- *\n- */\n-import java.util.*;\n-import java.awt.*;\n-import java.awt.event.ActionEvent;\n-import java.awt.event.ActionListener;\n-import java.awt.event.WindowEvent;\n-import java.awt.event.WindowAdapter;\n-import java.io.*;\n-import javax.swing.*;\n-import javax.swing.filechooser.*;\n-import javax.swing.border.*;\n-import javax.swing.SwingUtilities;\n-import java.util.prefs.BackingStoreException;\n-\n-\n-public class Smart extends JPanel implements ActionListener {\n-\n-  String version = "1.1.0";\n-\n-  JFrame mainFrame;\n-  JButton openButton;\n-  JButton resetFileButton;\n-\n-  JComboBox formatTypes;\n-  JComboBox fileFormats;\n-  String[]  emptyFormats = {"Choose a type first..."};\n-\n-  JFrame  askFrame;\n-  JButton pythonButton;\n-  JButton mySqlButton;\n-  JButton rButton;\n-\n-  HashMap <JButton, Program> callingProgram;\n-\n-  static JList        fileList;\n-  static JTextArea    logArea;\n-\n-  // progress bar\n-  static JLabel       messageField;\n-  static JProgressBar progressBar;\n-  static JLabel       etaField;\n-\n-  // process\n-  Program           currentProgram;\n-  Process           process;\n-  javax.swing.Timer processTimer;\n-\n-\n-  int previousStatus;\n-\n-  public Smart() {\n-    super(new BorderLayout());\n-\n-    callingProgram = new HashMap <JButton, Program> ();\n-\n-    previousStatus = -1;\n-\n-    processTimer = new javax.swing.Timer(1000, this);\n-    processTimer.stop();\n-\n-    // Ask frame buttons\n-    pythonButton = new JButton("find...");\n-    mySqlButton  = new JButton("find...");\n-    rButton      = new JButton("find...");\n-\n-    // Get available formats\n-    FormatsReader formatReader = new FormatsReader(Global.smartFormatsFileName);\n-    if (! formatReader.read()) {\n-      System.out.println("Something was wrong while reading file format...");\n-    }\n-\n-    // Get screen size\n-    Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n-\n-    // Log\n-    logArea = new JTextArea(512, Global.logAreaSize);\n-    logArea.setPreferredSize(new Dimension(screenSize.width, (int) (screenSize.height * 0.22)));\n-    logArea.setFont(new Font("Monospaced", logArea.getFont().getStyle(), logArea.getFont().getSize()));\n-    JScrollPane logScroll  = new JScrollPane(logArea, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED);\n-    TitledBorder logBorder = BorderFactory.createTitledBorder("Log");\n-    logScroll.setBorder(logBorder);\n-    logArea.append("Using S-MART " + version + "\\n");\n-\n-    '..b'extField = Global.otherFilesChooser.get(e.getSource());\n-      JFileChooser chooser = new JFileChooser();\n-      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n-        textField.setText(chooser.getSelectedFile().getPath());\n-      }\n-    }\n-    // Other directories choosers\n-    else if (Global.otherDirectoriesChooser.containsKey(e.getSource())) {\n-      JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n-      JFileChooser chooser = new JFileChooser();\n-      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n-      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n-        textField.setText(chooser.getSelectedFile().getPath());\n-      }\n-    }\n-    else if (Global.otherFileConcatenationChooser.containsKey(e.getSource())) {\n-      JTextField textField = Global.otherDirectoriesChooser.get(e.getSource());\n-      JFileChooser chooser = new JFileChooser();\n-      chooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);\n-      if (chooser.showOpenDialog(mainFrame) == JFileChooser.APPROVE_OPTION) {\n-        String text = textField.getText();\n-        if ((text == null) || ("".equals(text))) {\n-          textField.setText(chooser.getSelectedFile().getPath());\n-        }\n-        else {\n-          textField.setText(text + "," + chooser.getSelectedFile().getPath());\n-        }\n-      }\n-    }\n-    // Programs\n-    else {\n-      currentProgram = callingProgram.get(e.getSource());\n-      String comment = currentProgram.checkValues();\n-      if (comment != null) {\n-        logArea.append(comment);\n-        return;\n-      }\n-      LinkedList <String> command = currentProgram.getCommand();\n-      ProgramLauncher launcher = new ProgramLauncher(command, logArea, messageField, progressBar, etaField);\n-      launcher.execute();\n-      Vector < File > outputFiles = currentProgram.getOutputFiles();\n-      for (int i = 0; i < outputFiles.size(); i++) {\n-        File file = outputFiles.get(i);\n-        if (file.getFormatType().compareToIgnoreCase("other") != 0) {\n-          Global.fileNames.addElement(file.getName());\n-          Global.files.addFile(file);\n-        }\n-      }\n-      currentProgram = null;\n-    }\n-  }\n-\n-\n-  private static void removeTmpFiles() {\n-    logArea.append("You want to quit already?\\nRemoving temporary files...");\n-    String[]        command  = {Global.pythonCommand, "Python" + java.io.File.separator + "removeAllTmpTables.py"};\n-    ProgramLauncher launcher = new ProgramLauncher(command, logArea, messageField, progressBar, etaField);\n-    launcher.execute();\n-    logArea.append(" done.\\nNow quitting.\\nBye!");\n-  }\n-\n-\n-  private static void printJavaVersions() {\n-    String[] pro = {"java.version", "java.vm.version", "java.runtime.version"};\n-\n-    Properties properties = System.getProperties();\n-    for (int i = 0; i < pro.length; i++) {\n-      logArea.append(pro[i] + ": " + properties.getProperty(pro[i]) + "\\n");\n-    }\n-  }\n-  \n-  private static void createAndShowGUI() {\n-    // Create and set up the window.\n-    JFrame mainFrame = new JFrame("S-Mart");\n-    mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);\n-\n-    //Create and set up the content pane.\n-    JComponent newContentPane = new Smart();\n-    newContentPane.setOpaque(true);\n-    mainFrame.setContentPane(newContentPane);\n-\n-    // Display the window.\n-    mainFrame.pack();\n-    mainFrame.setVisible(true);\n-    Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();\n-    mainFrame.setBounds(0, 0, screenSize.width, screenSize.height);\n-    printJavaVersions();\n-\n-    // Remove tmp file while quitting.\n-    mainFrame.addWindowListener(new WindowAdapter() {\n-      @Override\n-      public void windowClosing(WindowEvent e) {\n-        removeTmpFiles();\n-      }\n-    });\n-  }\n-\n-\n-  public static void main(String[] args) {\n-    javax.swing.SwingUtilities.invokeLater(new Runnable() {\n-      public void run() {\n-        createAndShowGUI();\n-      }\n-    });\n-  }\n-}\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/SmartInstaller.jar
b
Binary file SMART/Java/SmartInstaller.jar has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/WindowsRegistry.java
--- a/SMART/Java/WindowsRegistry.java Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,387 +0,0 @@\n-import java.lang.reflect.InvocationTargetException;\n-import java.lang.reflect.Method;\n-import java.util.HashMap;\n-import java.util.Map;\n-import java.util.ArrayList;\n-import java.util.List;\n-import java.util.prefs.Preferences;\n-\n-public class WindowsRegistry {\n-  public static final int HKEY_CURRENT_USER = 0x80000001;\n-  public static final int HKEY_LOCAL_MACHINE = 0x80000002;\n-  public static final int REG_SUCCESS = 0;\n-  public static final int REG_NOTFOUND = 2;\n-  public static final int REG_ACCESSDENIED = 5;\n-\n-  private static final int KEY_ALL_ACCESS = 0xf003f;\n-  private static final int KEY_READ = 0x20019;\n-  private static Preferences userRoot = Preferences.userRoot();\n-  private static Preferences systemRoot = Preferences.systemRoot();\n-  private static Class<? extends Preferences> userClass = userRoot.getClass();\n-  private static Method regOpenKey = null;\n-  private static Method regCloseKey = null;\n-  private static Method regQueryValueEx = null;\n-  private static Method regEnumValue = null;\n-  private static Method regQueryInfoKey = null;\n-  private static Method regEnumKeyEx = null;\n-  private static Method regCreateKeyEx = null;\n-  private static Method regSetValueEx = null;\n-  private static Method regDeleteKey = null;\n-  private static Method regDeleteValue = null;\n-\n-  static {\n-    try {\n-      regOpenKey = userClass.getDeclaredMethod("WindowsRegOpenKey",\n-          new Class[] { int.class, byte[].class, int.class });\n-      regOpenKey.setAccessible(true);\n-      regCloseKey = userClass.getDeclaredMethod("WindowsRegCloseKey",\n-          new Class[] { int.class });\n-      regCloseKey.setAccessible(true);\n-      regQueryValueEx = userClass.getDeclaredMethod("WindowsRegQueryValueEx",\n-          new Class[] { int.class, byte[].class });\n-      regQueryValueEx.setAccessible(true);\n-      regEnumValue = userClass.getDeclaredMethod("WindowsRegEnumValue",\n-          new Class[] { int.class, int.class, int.class });\n-      regEnumValue.setAccessible(true);\n-      regQueryInfoKey = userClass.getDeclaredMethod("WindowsRegQueryInfoKey1",\n-          new Class[] { int.class });\n-      regQueryInfoKey.setAccessible(true);\n-      regEnumKeyEx = userClass.getDeclaredMethod(  \n-          "WindowsRegEnumKeyEx", new Class[] { int.class, int.class,  \n-              int.class });  \n-      regEnumKeyEx.setAccessible(true);\n-      regCreateKeyEx = userClass.getDeclaredMethod(  \n-          "WindowsRegCreateKeyEx", new Class[] { int.class,  \n-              byte[].class });  \n-      regCreateKeyEx.setAccessible(true);  \n-      regSetValueEx = userClass.getDeclaredMethod(  \n-          "WindowsRegSetValueEx", new Class[] { int.class,  \n-              byte[].class, byte[].class });  \n-      regSetValueEx.setAccessible(true); \n-      regDeleteValue = userClass.getDeclaredMethod(  \n-          "WindowsRegDeleteValue", new Class[] { int.class,  \n-              byte[].class });  \n-      regDeleteValue.setAccessible(true); \n-      regDeleteKey = userClass.getDeclaredMethod(  \n-          "WindowsRegDeleteKey", new Class[] { int.class,  \n-              byte[].class });  \n-      regDeleteKey.setAccessible(true); \n-    }\n-    catch (Exception e) {\n-      e.printStackTrace();\n-    }\n-  }\n-\n-  private WindowsRegistry() {  }\n-\n-  /**\n-   * Read a value from key and value name\n-   * @param hkey   HKEY_CURRENT_USER/HKEY_LOCAL_MACHINE\n-   * @param key\n-   * @param valueName\n-   * @return the value\n-   * @throws IllegalArgumentException\n-   * @throws IllegalAccessException\n-   * @throws InvocationTargetException\n-   */\n-  public static String readString(int hkey, String key, String valueName) \n-    throws IllegalArgumentException, IllegalAccessException,\n-    InvocationTargetException \n-  {\n-    if (hkey == HKEY_LOCAL_MACHINE) {\n-      return readString(systemRoot, hkey, key, valueName);\n-    }\n-    else if (hkey == HKEY_CURRENT_USER) {\n-      return readString(userRoot, hkey, key, valueName);\n-    }\n-    else {\n-      throw new Illeg'..b'   return rc;  // can REG_NOTFOUND, REG_ACCESSDENIED, REG_SUCCESS\n-  }\n-\n-  private static String readString(Preferences root, int hkey, String key, String value)\n-    throws IllegalArgumentException, IllegalAccessException,\n-    InvocationTargetException \n-  {\n-    int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n-        new Integer(hkey), toCstr(key), new Integer(KEY_READ) });\n-    if (handles[1] != REG_SUCCESS) {\n-      return null; \n-    }\n-    byte[] valb = (byte[]) regQueryValueEx.invoke(root, new Object[] {\n-        new Integer(handles[0]), toCstr(value) });\n-    regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n-    return (valb != null ? new String(valb).trim() : null);\n-  }\n-\n-  private static Map<String,String> readStringValues\n-    (Preferences root, int hkey, String key)\n-    throws IllegalArgumentException, IllegalAccessException,\n-    InvocationTargetException \n-  {\n-    HashMap<String, String> results = new HashMap<String,String>();\n-    int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n-        new Integer(hkey), toCstr(key), new Integer(KEY_READ) });\n-    if (handles[1] != REG_SUCCESS) {\n-      return null;\n-    }\n-    int[] info = (int[]) regQueryInfoKey.invoke(root,\n-        new Object[] { new Integer(handles[0]) });\n-\n-    int count = info[2]; // count  \n-    int maxlen = info[3]; // value length max\n-    for(int index=0; index<count; index++)  {\n-      byte[] name = (byte[]) regEnumValue.invoke(root, new Object[] {\n-          new Integer\n-            (handles[0]), new Integer(index), new Integer(maxlen + 1)});\n-      String value = readString(hkey, key, new String(name));\n-      results.put(new String(name).trim(), value);\n-    }\n-    regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n-    return results;\n-  }\n-\n-  private static List<String> readStringSubKeys\n-    (Preferences root, int hkey, String key)\n-    throws IllegalArgumentException, IllegalAccessException,\n-    InvocationTargetException \n-  {\n-    List<String> results = new ArrayList<String>();\n-    int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n-        new Integer(hkey), toCstr(key), new Integer(KEY_READ) \n-        });\n-    if (handles[1] != REG_SUCCESS) {\n-      return null;\n-    }\n-    int[] info = (int[]) regQueryInfoKey.invoke(root,\n-        new Object[] { new Integer(handles[0]) });\n-\n-    int count = info[2]; // count  \n-    int maxlen = info[3]; // value length max\n-    for(int index=0; index<count; index++)  {\n-      byte[] name = (byte[]) regEnumKeyEx.invoke(root, new Object[] {\n-          new Integer\n-            (handles[0]), new Integer(index), new Integer(maxlen + 1)\n-          });\n-      results.add(new String(name).trim());\n-    }\n-    regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n-    return results;\n-  }\n-\n-  private static int [] createKey(Preferences root, int hkey, String key)\n-    throws IllegalArgumentException, IllegalAccessException,\n-    InvocationTargetException \n-  {\n-    return  (int[]) regCreateKeyEx.invoke(root,\n-        new Object[] { new Integer(hkey), toCstr(key) });\n-  }\n-\n-  private static void writeStringValue \n-    (Preferences root, int hkey, String key, String valueName, String value) \n-    throws IllegalArgumentException, IllegalAccessException,\n-    InvocationTargetException \n-  {\n-    int[] handles = (int[]) regOpenKey.invoke(root, new Object[] {\n-        new Integer(hkey), toCstr(key), new Integer(KEY_ALL_ACCESS) });\n-\n-    regSetValueEx.invoke(root,  \n-        new Object[] { \n-          new Integer(handles[0]), toCstr(valueName), toCstr(value) \n-          }); \n-    regCloseKey.invoke(root, new Object[] { new Integer(handles[0]) });\n-  }\n-\n-  // utility\n-  private static byte[] toCstr(String str) {\n-    byte[] result = new byte[str.length() + 1];\n-\n-    for (int i = 0; i < str.length(); i++) {\n-      result[i] = (byte) str.charAt(i);\n-    }\n-    result[str.length()] = 0;\n-    return result;\n-  }\n-}\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/__init__.pyc
b
Binary file SMART/Java/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/doc.pdf
b
Binary file SMART/Java/doc.pdf has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/formats.txt
--- a/SMART/Java/formats.txt Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,4 +0,0 @@
-sequence: fasta, fastq
-transcript: bed, gff, gff2, gff3, csv
-mapping: axt, blast, bowtie, exo, maq, nucmer, psl, sam, seqmap, shrimp, soap, soap2
-other: txt, wig, png, nclist
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/Java/manifest.txt
--- a/SMART/Java/manifest.txt Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-Manifest-Version: 1.0
-Created-By: Matthias Zytnicki
-Main-Class: Smart
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/__init__.pyc
b
Binary file SMART/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/changeName.py
--- a/SMART/bacteriaRegulatoryRegion_Detection/changeName.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,99 +0,0 @@
-#! /usr/bin/env python
-
-import optparse, os, sys, subprocess, tempfile, shutil
-from optparse import OptionParser
-
-def stop_err(msg):
-    sys.stderr.write('%s\n' % msg)
-    sys.exit()
-    
-def changeName(fileName, format, name, outputName):
-    file = open(fileName, 'r')
-    line = file.readline()
-    if format == "fasta":
-        while not line.startswith('>'):
-            line = file.readline()
-        old_name = line[1:]
-    elif format == "gff":
-        while line.startswith('#'):
-            line = file.readline()
-        old_name = (line.split('\t'))[0]
-    elif format == "sam":
-        while line.startswith('@'):
-            line = file.readline()
-        old_name = (line.split('\t'))[2]
-    file.close()    
-    cmd = "sed \"s/%s/%s/g\" %s >%s " % (old_name.strip(), name.strip(), fileName, outputName)
-    proc = subprocess.Popen(cmd, shell=True)
-    proc.communicate()
-    if proc.returncode != 0:
-        raise Exception("ERROR when launching '%s'" % cmd)
-    
-def getName(fileName, format):
-    file = open(fileName, 'r')
-    line = file.readline()
-    if format == "gff":
-        while line.startswith('#'):
-            line = file.readline()
-        old_name = (line.split('\t'))[0]
-    elif format == "sam":
-        while line.startswith('@') or line.startswith('#'):
-            line = file.readline()
-        old_name = (line.split('\t'))[2]
-    file.close()    
-    return old_name
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option("", "--input1", dest="inputFile1", default=None, help="Choose a fasta file.")
-    parser.add_option("", "--input2", dest="inputFile2", default=None, help="Choose a gff file.")
-    parser.add_option("", "--input3", dest="inputFile3", default=None, help="Choose a sam file.")
-    parser.add_option("", "--name", dest="name", default=None, help="Change to a new name.[compulsory] if there is only one input.")
-    parser.add_option("", "--output1", dest="outputFile1", default=None, help="OutputFile1")
-    parser.add_option("", "--output2", dest="outputFile2", default=None, help="OutputFile2")
-    parser.add_option("", "--output3", dest="outputFile3", default=None, help="OutputFile3")
-    (options, args) = parser.parse_args() 
-    
-#TODO:write raise Exception!!
-    
-    #In case only one input
-    if options.name == None:
-        #find a default_name to unify the name for all input files
-        if options.inputFile1 != None:
-                if options.inputFile2 == None and options.inputFile3 == None:
-                    raise Exception("ERROR, only one input, you should identify a new name to modify.")
-                elif options.inputFile2 != None and options.outputFile2 != None:
-                    default_name = getName(options.inputFile2, 'gff')
-                    changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
-                    changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
-                    if options.inputFile3 != None and options.outputFile3 != None:
-                        changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
-                elif options.inputFile3 != None and options.outputFile3 != None:
-                        default_name = getName(options.inputFile3, 'sam')
-                        changeName(options.inputFile3, 'sam', default_name, options.outputFile3)
-                        changeName(options.inputFile1, 'fasta', default_name, options.outputFile1)
-                        if options.inputFile2 != None and options.outputFile2 != None:
-                            changeName(options.inputFile2, 'gff', default_name, options.outputFile2)
-    else:
-        if options.inputFile1 != None and options.outputFile1 != None:      
-            changeName(options.inputFile1, 'fasta', options.name, options.outputFile1)
-        if options.inputFile2 != None and options.outputFile2 != None:
-            changeName(options.inputFile2, 'gff', options.name, options.outputFile2)
-        if options.inputFile3 != None and options.outputFile3 != None:
-            changeName(options.inputFile3, 'sam', options.name, options.outputFile3)    
-       
-if __name__ == '__main__':__main__()
-
-
-#test commands: 
-#only one input:
-#python changeName.py --input1 NC_011744.fna --name NC_test --output1 out.fna
-#several inputs:
-#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --output1 out.fna --output2 out.gff
-#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff --name NC_test --output1 out.fna --output2 out.gff
-#python changeName.py --input1 NC_011744.fna --input2 NC_011744.gff  --input3 NC_011744.sam --name NC_test2 --output1 out.fna --output2 out.gff --output3 out.sam
-#python changeName.py --input1 NC_011744.fna --input3 out.sam --output1 out.fna --output3 out.sam
-
-
-    
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/changeName.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/changeName.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,80 +0,0 @@
-<tool id="changeName" name="changeName">
- <description>Change the chromosome name or gene name of a singla fasta, gff or sam file. For this tool, it can not treat mutiple-chromosome, gene files.</description>
- <command interpreter="python">
- changeName.py 
- #if $optionFasta.fastaFile == 'Yes':
- --input1 $optionFasta.fasta --output1 $outputFasta
- #end if
- #if $optionGff.gffFile == 'Yes':
- --input2 $optionGff.gff --output2 $outputGff 
- #end if
- #if $optionSam.samFile == 'Yes':
- --input3 $optionSam.sam --output3 $outputSam 
- #end if
- #if $optionName.name == 'Yes':
- --name $optionName.nameValue 
- #end if
- </command>
-
- <inputs>
- <conditional name="optionFasta">
- <param name="fastaFile" type="select" label="You can choose a fasta input file to change the name." >
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="fasta" format="fasta" type="data" label="Identify you fasta input file."/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionGff">
- <param name="gffFile" type="select" label="You can choose a supplementary gff input file to change the name." >
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="gff" format="gff" type="data" label="Identify you gff input file."/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionSam">
- <param name="samFile" type="select" label="You can choose a supplementary sam input file to change the name." >
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="sam" format="sam" type="data" label="Identify you sam input file."/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionName">
- <param name="name" type="select" label="Identify a new name to change." >
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="nameValue" type="text" value="None" label="Identify the new name."/>
- </when>
- <when value="No">
- </when>
- </conditional>
- </inputs>
-
- <outputs>
- <data name="outputFasta" format="fasta">
- <filter>optionFasta['fastaFile'] == 'Yes'</filter>
- </data>
- <data format="gff" name="outputGff" >
-         <filter>optionGff['gffFile'] == 'Yes'</filter>
-     </data>
- <data format="sam" name="outputSam" >
-         <filter>optionSam['samFile'] == 'Yes'</filter>
-     </data>     
- </outputs> 
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/colorGff.pl
--- a/SMART/bacteriaRegulatoryRegion_Detection/colorGff.pl Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,71 +0,0 @@
-#!/usr/bin/perl -w
-###
-# But : ajout ou modif de couleur d'un gff
-# 
-# Entrees : fichier gff
-#
-# Sortie : gff affiche a l'ecran
-#
-###------------------------------------------------------
-use vars qw($USAGE);                      
-use strict;                               
-
-=head1 NAME
-
-colorGff.pl - add or change color of a gff file
-
-=head1 SYNOPSIS
-
-% colorGff.pl -i file.gff -c color [-h] 
-
-=head1 DESCRIPTION
-This script will parse DOOR repport file and write information in gff3 format.
-
-    -i|--input fileName  gff input file name
-    -c|--color RGBcode   RGB code for color
-    -o|--output fileName gff3 output file name
-   [-h|--help]           help mode then die                              
-
-=head1 AUTHOR - Claire Toffano-Nioche - jan.11
-
-=cut
-#-----------------------
-my ($fileName, $colourGff, $outFileName) = ("", "", "colorOut.gff3") ;
-   # command line check
-    foreach my $num (0 .. $#ARGV) {
-        SWITCH: for ($ARGV[$num]) {
-        /--input|-i/ && do { 
- $fileName=$ARGV[$num+1]; 
- open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ; 
- last };
-        /--color|-c/ && do {
- $colourGff =$ARGV[$num+1]." ".$ARGV[$num+2]." ".$ARGV[$num+3];
- last };
-#     /--output|-o/ && do { 
-# $outFileName=$ARGV[$num+1]; 
-# last };
-        /--help|-h/ && do { exec("pod2text $0\n") ; die };
-        }
-    }
-#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
-    # informations retrieval
-    my @lines = <fichierGff> ; 
-    close fichierGff ;
-    # treatment
- #print "gff file read ; number of lines : $#lines\n";
-    for (my $i=0 ; $i <= $#lines ; $i++) {
- if ($lines[$i] =~ /;/) {
-    if ($lines[$i] =~ /color=/) {
-      $lines[$i] =~ s/color=.*;/color=$colourGff;/ ;
- } else { # add colour
-      $lines[$i] =~ s/;/;color=$colourGff;/ ;
-      } 
-     } else { # (no = gff bug if col9 begin with semi-coma ?) or only one tag : add color tag
-      chomp($lines[$i]) ;
-      $lines[$i] .= "; color=".$colourGff.";\n";
- }
-# print OUT $lines[$i] ;
- print $lines[$i];
-    }
-# close OUT ;
-exit(0);
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/colorGff.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/colorGff.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,19 +0,0 @@
-<tool id="colorGff" name="colorGff">
- <description> Parses a DOOR report file and writes the information in a gff3 out file. </description>
- <command interpreter="perl"> 
- colorGff.pl -i $inputFile -c $RGBcode > $outputFile  
- </command>
-
- <inputs>
- <param name="inputFile" type="data" label="Input File" format="gff"/>
- <param name="RGBcode" type="text" value="250 128 114" help="RGB code is necessary for choosing the color."/>
- </inputs>
-
- <outputs>
- <data format="gff3" name="outputFile" label="[colorGff] Output File"/>
- </outputs>
-
- <help>
- Command example: perl colorGff.pl -i trans_covUp5_nbEUp10_lgUp50.gff3 -c "250 128 114" > trans_covUp5_nbEUp10_lgUp50_c.gff3
- </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl
--- a/SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,74 +0,0 @@
-#!/usr/bin/perl -w
-###
-# But : ajout ou modif de la couverture d'un gff
-# 
-# Entrees : fichier gff
-#
-# Sortie : gff affiche a l'ecran
-#
-###------------------------------------------------------
-
-#!/usr/bin/perl -w                                                                                                                                                     
-              
-use vars qw($USAGE);                      
-use strict;                               
-
-=head1 NAME
-
-coverageGff.pl - add or compute the coverage of a gff file
-
-=head1 SYNOPSIS
-
-% coverageGff.pl -i file.gff -l readLength [-h] 
-
-=head1 DESCRIPTION
-This script will parse gff file, compute read coverage form the "nbElements" tag and write coverage in gff3 format.
-
-    -i|--input fileName     gff input file name
-    -l|--length ReadLength  lenght of the reads in bp [38 default]
-    -o|--output fileName    gff3 output file name
-   [-h|--help]              help mode then die                              
-
-=head1 AUTHOR - Claire Toffano-Nioche - fev.11
-
-=cut
-#-----------------------
-my ($fileName, $length, $outFileName) = ("", 38, "coverageOut.gff3") ;
-   # command line check
-    foreach my $num (0 .. $#ARGV) {
-        SWITCH: for ($ARGV[$num]) {
-        /--input|-i/ && do { 
- $fileName=$ARGV[$num+1]; 
- open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ; 
- last };
-        /--length|-l/ && do { 
- $length=$ARGV[$num+1]; 
- last };
-        /--help|-h/ && do { exec("pod2text $0\n") ; die };
-        }
-    }
-    # informations retrieval
-#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
-    my @lines = <fichierGff> ; 
-    close fichierGff ;
-    # treatment
- #print "gff file read ; number of lines : $#lines\n";
-    for (my $i=0 ; $i <= $#lines ; $i++) {
- # compute coverage :
- if ($lines[$i] =~ /nbElements=/) {
- my ($nbE)=($lines[$i] =~ /nbElements=(\d+)/) ;
- my @gffCol=split("\t", $lines[$i]) ;
- # print "ligne : $i, nbE : $nbE, length : $length, debut : $gffCol[3], fin : $gffCol[4].\n";
- my $cover=$length*$nbE/($gffCol[4]-$gffCol[3]+1) ;
- $cover=int(100*$cover+0.5)/100 ; # arronri sup. precision 2 chiffres 
- if ($lines[$i] =~ /coverage=/) { # replace coverage
-     $lines[$i] =~ s/coverage=.*;/coverage=$cover;/ ;
- } else { # add coverage
-     $lines[$i] =~ s/;/;coverage=$cover;/ ;
- }
- }
-# print OUT $lines[$i] ;
- print $lines[$i] ;
-    }
-#close OUT ;
-exit(0);
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/coverageGff.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/coverageGff.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,17 +0,0 @@
-<tool id="coverageGff" name="coverage Gff">
-  <description>Computes reads coverage form a "nbElements" tag and writes the calculated coverage in a gff3 out file.</description>
-  <command interpreter="perl"> coverageGff.pl -i $inputFile -l $readSize > $outputFile  </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff"/>
-    <param name="readSize" type="integer" value="38" help="The size of read, default: 38nt"/>
-
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[coveragePl] Output File"/>
-  </outputs>
-
-  <help>
- command example: perl coverageGff.pl -i *_trans_inIG.gff > *_trans_inIG_cov.gff
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/interElementGff.pl
--- a/SMART/bacteriaRegulatoryRegion_Detection/interElementGff.pl Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,187 +0,0 @@
-#!/usr/bin/perl -w 
-###
-# But : protocol permettant la detection d'RNA non codant potentiel
-# 
-# Entrees : fichier de mapping Smart gff3
-# fichier gff des gènes
-# fichier gff des clusters Cis regulateur potentiel
-#
-# Sortie : fichier gff des clusters ARN nc
-#
-###------------------------------------------------------
-use vars qw($USAGE);
-use strict; 
-                            
-=head1 NAME
-
-interElementGff.pl - creation of a new Gff corresponding to the region of two successive Elements
-
-=head1 SYNOPSIS
-
-% interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 [-s 50] [-a 20] [-n seqName] [-h] 
-
-=head1 DESCRIPTION
-This script will determine cluster ok ncRNA.
-
-    -i|--input  fileName   gff input file name
-    -o|--output fileName   gff output file name
-    -n|--name seqName      sequence name
-    -p|--print    print parameters used
-
-    -f5ff n   number of nt to exclude from 5' seed when gene before is Forward, seed is Forward and next gene is Forward [default 0]
-    -ff3f n   number... " ...[default 0]
-
-    -f5fr n   number... " ...[default 0] 
-    -ff3r n   number... " ...[default 0]
-     
-    -fr3f n   number... " ...[default 0]        
-    -fr5f n   number... " ...[default 0]
-    
-    -f3rr n   number... " ...[default 0]
-    -fr5r n   number... " ...[default 0]
-
-    -r5ff n   number... " ...[default 0]
-    -rf3f n   number... " ...[default 0]
-
-    -r5fr n   number... " ...[default 0]        
-    -rf3r n   number... " ...[default 0]
-
-    -r3rf n   number... " ...[default 0]
-    -rr5f n   number... " ...[default 0]
-
-    -r3rr n   number... " ...[default 0]
-    -rr5r n   number... " ...[default 0]
-
-   [-h|--help]           help mode then die                              
-
-
-USAGE_CASE
-
-% interElementGff.pl -i inputFile.gff3 -o outputFile.gff3 -ff 53 -rr 23 -n NC_011744
-
-BUG
-
-Caution : input file needs to be sorted on positions
-
-Caution : for -f/r options add +3 bp to include stop codon if not in input file
-
-=head1 AUTHOR - CTN - apr.11
-(from RNA-Vibrio/protocol_NC_V2.pl - Claire KUCHLY)
-
-=cut
-#----------------------------------------------------------------------------
-# check command line :
-my ($IDfile, $OutputFileName, $f5ff, $ff3f, $f5fr, $ff3r, $f3rf, $fr5f, $f3rr,$fr5r, $r5ff, $rf3f, $r5fr, $rf3r, $r3rf, $rr5f, $r3rr, $rr5r, $seqName, $printParameters) = 
-   (undef, undef , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", 0) ;
-if ($#ARGV==0) {
- die (exec("pod2text $0\n"));
-} else {
-    foreach my $num (0 .. $#ARGV) {
- SWITCH: for ($ARGV[$num]) {
- /--input|-i/ && do { $IDfile=$ARGV[$num+1]; 
- open(F,"<$IDfile") or die "Error: Can't open \"$IDfile\", $!"; 
- last; };
- /-f5ff/ && do { $f5ff=$ARGV[$num+1]+1; last; }; # need +1 for intervall computations
- /-ff3f/ && do { $ff3f=$ARGV[$num+1]+1; last; }; 
-
- /-f5fr/ && do { $f5fr=$ARGV[$num+1]+1; last; }; 
- /-ff3r/ && do { $ff3r=$ARGV[$num+1]+1; last; }; 
-
- /-f3rf/ && do { $f3rf=$ARGV[$num+1]+1; last; };
- /-fr5f/ && do { $fr5f=$ARGV[$num+1]+1; last; };
-
- /-f3rr/ && do { $f3rr=$ARGV[$num+1]+1; last; };
- /-fr5r/ && do { $fr5r=$ARGV[$num+1]+1; last; };
-
- /-r5ff/ && do { $r5ff=$ARGV[$num+1]+1; last; }; 
- /-rf3f/ && do { $rf3f=$ARGV[$num+1]+1; last; }; 
-
- /-r5fr/ && do { $r5fr=$ARGV[$num+1]+1; last; }; 
- /-rf3r/ && do { $rf3r=$ARGV[$num+1]+1; last; }; 
-
- /-r3rf/ && do { $r3rf=$ARGV[$num+1]+1; last; };
- /-rr5f/ && do { $rr5f=$ARGV[$num+1]+1; last; };
-
- /-r3rr/ && do { $r3rr=$ARGV[$num+1]+1; last; };
- /-rr5r/ && do { $rr5r=$ARGV[$num+1]+1; last; };
-
-# /--name|-n/ && do { $seqName=$ARGV[$num+1]; last; };
- /--print|-p/ && do { $printParameters=1; last; };
- /--output|-o/ && do { $OutputFileName=$ARGV[$num+1]; 
- open(S,">$OutputFileName") or die "Error : Can't open result file \"$OutputFileName\", $!";
- last; };
- /--help|-h/ && do { exec("pod2text $0\n") ; die };
- }
-    }
- if ($printParameters) {
- print "
-        --> f5ff ",$f5ff-1," --> ff3f ",$ff3f-1,"  --> ; 
-        --> f5fr ",$f5fr-1," --> ff3r ",$ff3r-1,"  <-- ; 
-        --> f3rf ",$f3rf-1," <-- fr5f ",$fr5f-1,"  --> ;  
-        --> f3rr ",$f3rr-1," <-- fr5r ",$fr5r-1,"  <-- ; 
-        <-- r5ff ",$r5ff-1," --> rf3f ",$rf3f-1,"  --> ;
-        <-- r5fr ",$r5fr-1," --> rf3r ",$rf3r-1,"  <-- ;
-        <-- r3rf ",$r3rf-1," <-- rr5f ",$rr5f-1,"  --> ; 
-        <-- r3rr ",$r3rr-1," <-- rr5r ",$rr5r-1,"  <-- ;\n";
-   }
-   ##NC_011753.2 RefSeq gene 367 834 . - . locus_tag=VS_0001;db_xref=GeneID:7162789
-   my $finSeedSens;
-   my $finSeedAntisens;
-   my $debSeedSens;
-   my $debSeedAntisens;
-   my $info_gene="";
-   my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist 
-   my @chromList;
-   while(my $ligne = <F>){
- chomp($ligne);
- my @list = split(/\t/,$ligne);
- if ((scalar(@chromList) == 0) or ($chromList[$#chromList] ne $list[0])){
- push(@chromList, $list[0]);
- my $finSeedSens;
-    my $finSeedAntisens;
-    my $debSeedSens;
-    my $debSeedAntisens;
-    my $info_gene="";
-    my $sensGeneAvant = "+" ; # 1rst seed definition : geneAvant (gene[i-1]) doesn't exist 
- }
- if (($sensGeneAvant eq "+") and ($list[6] eq "+")) { #CTN ie geneavant == f, geneapres == f
- $debSeedSens += $f5ff;
- $finSeedSens = $list[3]- $ff3f;
- $debSeedAntisens += $f3rf;
- $finSeedAntisens = $list[3]- $fr5f;
- } elsif (($sensGeneAvant eq "+") and ($list[6] eq "-")) { #CTN ie geneaavant == f, geneapres == r
- $debSeedSens += $f5fr;
- $finSeedSens = $list[3]- $ff3r;
- $debSeedAntisens += $f3rr;
- $finSeedAntisens = $list[3]- $fr5r;
- } elsif (($sensGeneAvant eq "-") and ($list[6] eq "+")) { #CTN ie geneaavant == r, geneapres == f
- $debSeedSens += $r5ff;
- $finSeedSens = $list[3]- $rf3f;
- $debSeedAntisens += $r3rf;
- $finSeedAntisens = $list[3]- $rr5f;
- } else {                    #CTN ie geneaavant == r, geneapres == r
- $debSeedSens += $r5fr;
- $finSeedSens = $list[3]- $rf3r;
- $debSeedAntisens += $r3rr;
- $finSeedAntisens = $list[3]- $rr5r;
- }
- if ($debSeedSens <= 0) { $debSeedSens=1 ; } # 1srt 
- if ($debSeedAntisens <= 0) { $debSeedAntisens=1 ; }
- if($debSeedSens < $finSeedSens){ # only "real" seed 
- #print "$gene_avant\nNC_011753\tperso\tseed\t$deb_seed\t$fin_seed\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n$ligne\n\n";
-    # 
-
- print S "$list[0]\tperso\tseedIR\t$debSeedSens\t$finSeedSens\t.\t+\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n";
- }
- if ($debSeedAntisens < $finSeedAntisens){
- print S "$list[0]\tperso\tseedIR\t$debSeedAntisens\t$finSeedAntisens\t.\t-\t.\tgeneavant=$info_gene;geneapres=$list[@list-1]\n";
- }
- $sensGeneAvant = $list[6] ; # GFF : column 6 gives strand
- $debSeedSens = $list[4];
- $debSeedAntisens = $list[4];
- $info_gene = $list[@list-1];
-   }
-   close F;
-   close S;
-   exit(0);
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/interElementGff.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/interElementGff.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,284 +0,0 @@
-<tool id="interElementGff" name="interElementGff">
- <description>Creates a new Gff output, which corresponds to the region of two successive Elements. </description>
- <command interpreter="perl">
- interElementGff.pl -i $inputFile 
-
- #if $Optionf5ff.option == "Yes":
- -f5ff $Optionf5ff.f5ffValue
- #end if
-
- #if $Optionff3f.option == "Yes":
- -ff3f $Optionff3f.ff3fValue
- #end if
-
- #if $Optionf5fr.option == "Yes":
- -f5fr $Optionf5fr.f5frValue
- #end if
-
- #if $Optionff3r.option == "Yes":
- -ff3r $Optionff3r.ff3rValue
- #end if
-
- #if $Optionf3rf.option == "Yes":
- -f3rf $Optionf3rf.f3rfValue
- #end if
-
- #if $Optionfr5f.option == "Yes":
- -fr5f $Optionfr5f.fr5fValue
- #end if
-
- #if $Optionf3rr.option == "Yes":
- -f3rr $Optionf3rr.f3rrValue
- #end if
-
- #if $Optionfr5r.option == "Yes":
- -fr5r $Optionfr5r.fr5rValue
- #end if
-
- #if $Optionr5ff.option == "Yes":
- -r5ff $Optionr5ff.r5ffValue
- #end if
-
- #if $Optionrf3f.option == "Yes":
- -rf3f $Optionrf3f.rf3fValue
- #end if
-
- #if $Optionr5fr.option == "Yes":
- -r5fr $Optionr5fr.r5frValue
- #end if
-
- #if $Optionrf3r.option == "Yes":
- -rf3r $Optionrf3r.rf3rValue
- #end if
-
- #if $Optionr3rf.option == "Yes":
- -r3rf $Optionr3rf.r3rfValue
- #end if
-
- #if $Optionrr5f.option == "Yes":
- -rr5f $Optionrr5f.rr5fValue
- #end if
-
- #if $Optionr3rr.option == "Yes":
- -r3rr $Optionr3rr.r3rrValue
- #end if
-
- #if $Optionrr5r.option == "Yes":
- -rr5r $Optionrr5r.rr5rValue
- #end if
-
- -o $outputFile
- </command>
-
- <inputs>
- <param name="inputFile" type="data" label="Input File" format="gff"/>
-
- <conditional name="Optionf5ff">
- <param name="option" type="select" label="Option[f5ff]" help="number of nt to exclude from 5' seed when gene before is Forward, seed is Forward and next gene is Forward [default 0]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="f5ffValue" type="integer" value="10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionff3f">
- <param name="option" type="select" label="Option[ff3f]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="ff3fValue" type="integer" value="30"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionf5fr">
- <param name="option" type="select" label="Option[f5fr]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="f5frValue" type="integer" value="10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionff3r">
- <param name="option" type="select" label="Option[ff3r]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="ff3rValue" type="integer" value="-10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionf3rf">
- <param name="option" type="select" label="Option[f3rf]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="f3rfValue" type="integer" value="-10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionfr5f">
- <param name="option" type="select" label="Option[fr5f]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="fr5fValue" type="integer" value="10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionf3rr">
- <param name="option" type="select" label="Option[f3rr]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="f3rrValue" type="integer" value="-10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionfr5r">
- <param name="option" type="select" label="Option[fr5r]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="fr5rValue" type="integer" value="10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionr5ff">
- <param name="option" type="select" label="Option[r5ff]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="r5ffValue" type="integer" value="10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionrf3f">
- <param name="option" type="select" label="Option[rf3f]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="rf3fValue" type="integer" value="30"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionr5fr">
- <param name="option" type="select" label="Option[r5fr]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="r5frValue" type="integer" value="10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionrf3r">
- <param name="option" type="select" label="Option[rf3r]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="rf3rValue" type="integer" value="-10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionr3rf">
- <param name="option" type="select" label="Option[r3rf]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="r3rfValue" type="integer" value="30"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionrr5f">
- <param name="option" type="select" label="Option[rr5f]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="rr5fValue" type="integer" value="10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionr3rr">
- <param name="option" type="select" label="Option[r3rr]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="r3rrValue" type="integer" value="30"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="Optionrr5r">
- <param name="option" type="select" label="Option[rr5r]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="rr5rValue" type="integer" value="10"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
- <outputs>
- <data name="outputFile" format="gff" label="[interElementGff] Output File"/>
- </outputs> 
-
- <help>
- command example: interElementGff.pl -i ${i}_annot.gff -o ${i}_trans_IG.gff -f5ff 10 -ff3f 30 -f5fr 10 -ff3r -10 -f3rf -10 -fr5f 10 -f3rr -10 -fr5r 10 -r5ff 10 -rf3f 30 -r5fr 10 -rf3r -10 -r3rf 30 -rr5f 10 -r3rr 30 -rr5r 10
- </help>
-
-</tool>
-
-
-
-
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/listGff.sh
--- a/SMART/bacteriaRegulatoryRegion_Detection/listGff.sh Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,3 +0,0 @@
-#!/bin/bash
-awk '{print $3}' $1 | grep "[[:alpha:]]" | sort -n | uniq -c
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.sh
--- a/SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.sh Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,5 +0,0 @@
-#!/bin/bash
-python $GALAXY_ROOT/tools/repet_pipe/SMART/Java/Python/clusterize.py -f gff -i $1 -o intermedia.gff3 -c -d 150
-awk '{if ($3!="exon") {print $0}}' intermedia.gff3 > intermedia.gff
-#perl sortGff.pl -i intermedia.gff > $2
-python $GALAXY_ROOT/tools/repet_pipe/SMART/Java/Python/CollapseReads.py -i intermedia.gff -f gff -o $2
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/prepareAnnot.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="prepareAnnotation" name="prepare annotation file">
-  <description>Prepares Annotation file -> clusterizes, filters exon and sorts annotations.</description>
-  <command interpreter="bash"> prepareAnnot.sh $inputFile $outputFile  </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[prepareAnnotation] Output File"/>
-  </outputs>
-
-  <help>
- command example: sh prepareAnnot.sh NC_011744r_annot_tmp1.gff NC_011744r_annot_pre1.gff
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl
--- a/SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,91 +0,0 @@
-#!/usr/bin/perl -w
-###
-# But : extension des UTR5 Ã  partir des clusters de reads
-# 
-# Entrees : fichier gff annotation + cluster
-#
-# Sortie : UTR5.gff
-#
-###------------------------------------------------------      
-use vars qw($USAGE);                      
-use strict;   
-use Getopt::Long;                            
-
-=head1 NAME
-
-seedGff.pl  
-
-=head1 SYNOPSIS
-
-% seedGff.pl -i annotation.gff -p BeginPosFromAtg [-l lgSeed | -e EndPosFromAtg ] [-h] 
-
-=head1 DESCRIPTION
-This script will parse input gff file and write information in gff3 format.
-
-    -i|--input fileName       gff input file name of annotations
-    -p|--pos BeginPosFromAtg  greather positive number for the begin position of the seed from Atg 
-   [-l|--length seedLength]    lentgth of the seed to compute (default 4nt)
-   [-e|--end seedEnd]       end of the seed to compute (smaller positive number)
-    -o|--output fileName       gff output file name
-   [-h|--help]                help mode then die                              
-
-=head1 AUTHOR - Claire Toffano-Nioche - mar.11
-    from Claire Kuchly initial script
-
-=cut
-#-----------------------
-my ($inFileName, $beginSeed, $endSeed, $lgSeed, $outFileName) = ("", 0, 0, 0, "SEED.gff") ;
-   # command line check
-    foreach my $num (0 .. $#ARGV) {
-        SWITCH: for ($ARGV[$num]) {
-        /--input|-i/ && do { 
- $inFileName=$ARGV[$num+1]; 
- open (INGFF, "< $inFileName" ) or die "Can't open gff file: \"$inFileName\"\n" ; 
- last };
-        /--pos|-p/ && do { 
- $beginSeed=$ARGV[$num+1]; 
- last };
-        /--end|-e/ && do { 
- $endSeed=$ARGV[$num+1]; 
- last };
-        /--length|-l/ && do { 
- $lgSeed=$ARGV[$num+1]; 
- last };
-        /--output|-o/ && do { 
- $outFileName=$ARGV[$num+1]; 
- last };
-        /--help|-h/ && do { exec("pod2text $0\n") ; die };
-        }
-    }
-    open(UTR5,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
-    if (($endSeed > 0) and ($lgSeed > 0)) {
- print "Error : only -e or -l definition, not both\n";
- exec("pod2text $0\n") ; die ;
-    } elsif ($lgSeed > 0) {
- print "ERROR : Lg Seed => TODO \n";
-    }
-
-    #Création des fichiers de filtres (séquences UTR) :
-        #print "Création des fichiers de séquences !\n";
-###Creer les fichiers des séquences en 5' et 3' des gènes.
-###Seed pour les clusters en 5' : il faut qu'ils soient encrés sur le -20 par rapport Ã  l'ATG. Donc seed de -22/-18.
-    while(my $ligne = <INGFF>){
- chomp($ligne);
- my @list = split(/\t/,$ligne) ;
- my $finUTR5 ;
- my $debUTR5 ;
- my $strand = $list[6] ;
- if($strand eq "+"){
- $finUTR5 = $list[3]-$endSeed;
- $debUTR5 = $list[3]-$beginSeed;
- } elsif($strand eq "-"){
- $debUTR5 = $list[4]+$endSeed;
- $finUTR5 = $list[4]+$beginSeed;
- }
- if($debUTR5 < 0){$debUTR5 =0;}
- if($finUTR5 < 0){$finUTR5 =0;}
- print UTR5 "$list[0]\t$list[1]\t5UTR\t$debUTR5\t$finUTR5\t$list[5]\t$list[6]\t$list[7]\t$list[8]\n"; 
-    }
-    close INGFF;
-    close UTR5;
-exit(0);
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/seedGff.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/seedGff.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,35 +0,0 @@
-<tool id="seedGff" name="seedGff">
-  <description>Creates the seed from -15 to -25 bp before ATG</description>
-  <command interpreter="perl"> seedGff.pl -i $inputFile -p $startPosFromAtg -e $endPosSeed
-    #if $optionSeedSize.seedSize == "Yes":
-        -l $optionSeedSize.seedLength
-    #end if
-    -o $outputFile
-  </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff"/>
-    <param name="startPosFromAtg" type="integer" value="25" help="greather positive number for the start position of the seed from Atg "/>
-    <param name="endPosSeed" type="integer" value="15" help="end of the seed (smaller than the sart of the seed, positive number)"/>
-    <conditional name="optionSeedSize">
- <param name="seedSize" type="select" label="The length of seed." help="If you have choosed the value of start/end position of seed, you don't need to fill this option.">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="seedLength" type="integer" value="4" label="The length of seed, default: 4nt" />
- </when>
- <when value="No">
- </when>
-    </conditional>
-    
-
-  </inputs>
-
-  <outputs>
-    <data format="gff" name="outputFile" label="[seedGff] Output File"/>
-  </outputs>
-
-  <help>
- command example: perl seedGff.pl -i input_annot.gff -p 25 -e 15 -o output_cis_seed.gff
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/sortGff.pl
--- a/SMART/bacteriaRegulatoryRegion_Detection/sortGff.pl Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,88 +0,0 @@
-#!/usr/bin/perl -w
-###
-# But : ajout ou modif de couleur d'un gff
-# 
-# Entrees : fichier gff
-#
-# Sortie : gff affiche a l'ecran
-#
-###------------------------------------------------------
-
-#!/usr/bin/perl -w                                                                                                                                                     
-              
-use vars qw($USAGE);                      
-use strict;                               
-
-=head1 NAME
-
-sortGff.pl - sort a gff file
-
-=head1 SYNOPSIS
-
-% sortGff.pl -i file.gff [-h] 
-
-=head1 DESCRIPTION
-This script will sort a gff file (only when inversion of two successive lines).
-
-    -i|--input fileName  gff input file name
-    -o|--output fileName  gff3 output file name
-   [-h|--help]           help mode then die                              
-
-=head1 AUTHOR - Claire Toffano-Nioche - mar.11
-
-=cut
-
-#-----------------------
-my ($fileName, $colourGff, $outFileName) = ("", "", "sortOut.gff3") ;
-   # command line check
-    foreach my $num (0 .. $#ARGV) {
-        SWITCH: for ($ARGV[$num]) {
-        /--input|-i/ && do { 
- $fileName=$ARGV[$num+1]; 
- open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ; 
- last };
-# /--output|-o/ && do { 
-# $outFileName=$ARGV[$num+1]; 
-# last };
-        /--help|-h/ && do { exec("pod2text $0\n") ; die };
-        }
-    }
-    # informations retrieval
-#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
-    my @lines = <fichierGff> ; 
-    close fichierGff ;
-    # treatment
- #print "gff file read ; number of lines : $#lines\n";
-    my $previous = 0;
-    my $i = 0;
- #print "$#lines\n" ;
-    while ($i <= $#lines) {
- my @infos = split('\t', $lines[$i]) ;
- #print "info[3]:$infos[3]; prv:$previous!\n";
- if ($infos[3] < $previous) {
-     &exchange($i, $infos[3]) ;
-     $previous=$infos[3] ; 
-     $i--;
- } 
- $previous=$infos[3];
- $i++;
-    }
-    for (my $i=0 ; $i <= $#lines ; $i++) {
-# print OUT $lines[$i] ;
- print $lines[$i] ;
-    }
-#close OUT ;
-exit(0);
-#-----------------------
-sub exchange {
- my ($index, $position) = @_ ;
- my @info_col = split("\t", $lines[$index-1]) ;
- if ($info_col[3] > $position) {
- #print "$lines[$index]";
- my $line_to_push = $lines[$index-1] ;
- $lines[$index-1] = $lines[$index] ;
- $lines[$index] = $line_to_push ;
- } else {
- print "TODO : push > one line\n" ;
- }
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/sortGff.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/sortGff.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="sortGff" name="sortGff">
-  <description>Sorts a gff file.</description>
-  <command interpreter="perl"> sortGff.pl -i $inputFile > $outputFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[sortGff] Output File"/>
-  </outputs>
-
-  <help>
- command example: perl sortGff.pl -i *_unsort.gff3 > *_sort.gff3
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.pl
--- a/SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.pl Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,189 +0,0 @@\n-#!/usr/bin/perl -w\n-###\n-# Main : defining utr and intergenic operonic intervalles from a transcripts file following a referencies file \n-# \n-# Input : 2 gff files to intersect, transcript queries vs referencies\n-#\n-# Output : resulting gff file printing to standard output\n-#\n-###------------------------------------------------------\n-use vars qw($USAGE);                      \n-use strict;                               \n-\n-=head1 NAME\n-\n-splitTranscriptGff.pl - compare 2 input gff files and define utr and intergenic operonic intervalles by couple of overlapping elements\n-\n-=head1 SYNOPSIS\n-\n-% intervallsExtractorGff.pl -i referencies.gff -j transcriptQueries.gff -s strand [-h] \n-\n-=head1 DESCRIPTION\n-This script will intersect 2 gff files and compute distance between 2 successives lines. Take care both of sorting by positions the input files and of that referencies are included in transcriptQueries.\n-\n-    -i|--input1 fileName   gff input file name: included elements\n-    -j|--input2 fileName   gff input file name: extended elements\n-   [-s|--strand] [s|d]\t   s for single strand (colinear) or d for double strands (antisense) [default d]\n-   [-h|--help]             help mode then die                              \n-\n-=head1 USECASE\n-Define many fragments for each extended element (transcript): UTR5, gene, UTR3, "inOperon" for intergenomic region between 2 genes\n-intervallsExtractorGff.pl -i CDSannotations.gff -j RNAseqTranscripts.gff  > UTRsGenesOperonsLists.gff;\n-\n-=head1 KWON BUGS\n-No disjonction of overlapping elements of the included elements (-i file).\n-In usecase, overlapping genes are fused in one long gene.\n-\n-=head1 AUTHOR\n-Claire Toffano-Nioche - sep.11\n-\n-=cut\n-#-----------------------\n-sub feedPositionTab { my ($val, $pF, $pB, @info) = @_ ;\n-\t\t#print "feedPositionTab::$#info, ", ($#info+1)/4," \\n";\n-\tfor (my $i=0 ; $i <= $#info ; $i+=4) { # for each extended element \n-\t\t\t#print "....$info[$i+2]\\n";\n-\t\tif ($info[$i+3] =~ /\\+/) {\n-\t\t\tfor (my $c = $info[$i+1] ; $c <= $info[$i+2] ; $c++) { @$pF[$c]=$val } ; # sequence Forward\n-\t\t} else {\n-\t\t\tfor (my $c = $info[$i+1] ; $c <= $info[$i+2] ; $c++) { @$pB[$c]=$val } ; # sequence Backward\n-\t\t}\n-\t}\n-\t\t#print "feedPos...:: ", join(".", @$pF[0..100]), "\\n";\n-\t\t#print "feedPos...:: ", join(".", @$pB[0..100]), "\\n";\n-}\n-#-----------------------\n-sub recupInfo {\tmy ($pInfo, @lines) = @_ ;\n-    for (my $i=0 ; $i <= ($#lines+1)*4-1 ; $i+=4) {\n-    \tmy @line = split("\\t",$lines[$i/4]);\n-\t\tpush(@$pInfo, $line[0], $line[3], $line[4], $line[6]) ; # 0=nom, 3=debut, 4=fin, 6=sens\n-\t}\n-\t#print "recupInfo::fin=", ($#lines+1)*4, "\\n" ;\n-}\n-#-----------------------\n-sub tagName { my ($seqN, $posB, $posE, $strand) = @_ ;    \n-\tmy $tagN=$seqN.$strand.$posB."..".$posE;\n-\t\t#print "tagName:",join("_",@_)," et tagName:$tagN\\n";\n-return $tagN;\n-}\n-#-----------------------\n-sub transitionAnalysis {\n-my ($pos, $seq, $s, $pdebAmont, $pfinAmont, $pdebIn, $pfinIn, $pdebAval, $pfinAval, $ptag) = @_ ;\n-\tmy $enCours = 0 ; my $precedant = 0 ;\n-\t$enCours = @$ptag[$pos] ; \n-\t$precedant = ($s =~ /\\+/?@$ptag[$pos-1]:@$ptag[$pos+1]) ; \n-    if ($enCours ne $precedant) {\n-    \t#print "transi...:: $s, $pos, $precedant, $enCours\\n";\n-    \t#print "transition::$$pdebAmont, $$pfinAmont, $$pdebIn, $$pfinIn, $$pdebAval, $$pfinAval\\n";\n-    \tSWITCH: for ($precedant.$enCours) {\n-               \t/01/ && do { $$pdebAmont = $pos ; last SWITCH ;};\n-                /02/ && do { $$pdebIn = $pos ; last SWITCH ;};\n-                /10/ && do { $$pfinAval = ($s =~/\\+/?$pos-1:$pos+1) ; \n-                \t\tif (($s =~ /\\+/)and ($$pdebAval!=$$pfinAval)) {\n-                \t\t\tprintf "%s\\tsplit\\tutr3\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n-                \t\t\t\t$seq, $$pdebAval, $$pfinAval, $s, &tagName($seq, $$pdebAval, $$pfinAval, $s) ; \n-                \t\t\t#if ($$pdebAval==$$pfinAval) { print "transition 10 +\\n"};\n-                \t\t} elsif ($$pfinAval!=$$pdebAval) {\n-                \t\t\tprintf "%s\\tsplit\\tutr3\\t%s\\t%s\\t.\\t%s'..b'==$$pdebAmont) { print "transition 12 -\\n"} ;\n-                \t\t}\n-                \t\t$$pdebAmont = 0 ; $$pfinAmont = 0 ;\n-                \t\tlast SWITCH ;\n-                \t };\n-                /20/ && do { $$pfinIn=($s =~/\\+/?$pos-1:$pos+1) ; \n-                        if (($s =~ /\\+/) and ($$pdebIn!=$$pfinIn)) {\n-                        \tprintf "%s\\tsplit\\tgene\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n-                \t\t\t\t$seq, $$pdebIn, $$pfinIn, $s, &tagName($seq, $$pdebIn, $$pfinIn, $s) ; \n-                \t\t} elsif ($$pfinIn!=$$pdebIn) {\n-                \t\t    printf "%s\\tsplit\\tgene\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n-                \t\t\t\t$seq, $$pfinIn, $$pdebIn, $s, &tagName($seq, $$pfinIn, $$pdebIn, $s) ; \n-                \t\t}\n-                \t\t$$pdebIn = 0 ; $$pfinIn = 0 ;\n-                \t\tlast SWITCH ;\n-                \t };\n-                /21/ && do { $$pdebAval=$pos ; $$pfinIn=($s =~/\\+/?$pos-1:$pos+1) ; \n-                        if (($s =~ /\\+/) and ($$pdebIn!=$$pfinIn)) {\n-                        \tprintf "%s\\tsplit\\tgene\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n-                \t\t\t\t$seq, $$pdebIn, $$pfinIn, $s, &tagName($seq, $$pdebIn, $$pfinIn, $s) ; \n-                \t\t} elsif ($$pfinIn!=$$pdebIn) {\n-                \t\t\tprintf "%s\\tsplit\\tgene\\t%s\\t%s\\t.\\t%s\\t.\\tName=%s;\\n", \n-                \t\t\t\t$seq, $$pfinIn, $$pdebIn, $s, &tagName($seq, $$pfinIn, $$pdebIn, $s) ; \n-                \t\t}\n-                \t\t#$$pdebIn = 0 ; $$pfinIn = 0 ;\n-                \t\tlast SWITCH ;\n-                \t };\n-          }\n-    }\n- }\n-#-----------------------\t\n-my ($fileNameI, $fileNameE, $strand) = ("", "", 0) ;\n-# command line check\n-foreach my $num (0 .. $#ARGV) {\n-        SWITCH: for ($ARGV[$num]) {\n-        /--input1|-i/ && do { \n-\t\t\t$fileNameI=$ARGV[$num+1]; \n-\t\t\topen ( fichierGffI, "< $fileNameI" ) or die "Can\'t open gff file: \\"$fileNameI\\"\\n" ; \n-\t\t\tlast };\n-\t/--input2|-j/ && do { \n-\t\t\t$fileNameE=$ARGV[$num+1]; \n-\t\t\topen ( fichierGffE, "< $fileNameE" ) or die "Can\'t open gff file: \\"$fileNameE\\"\\n" ; \n-\t\t\tlast };\n-        /--strand|-s/ && do { \n-\t\t\tif ($ARGV[$num+1] eq "s") { $strand=1}; \n-\t\t\tlast };\n-        /--help|-h/ && do { exec("pod2text $0\\n") ; die };\n-        }\n-}\n-# memory declarations:\n-my @infoI ; my @infoE ;\n-my $seqName ;\n-my @tagF ; my @tagB ; # Forward and Backward sequence\n-# data retrieval:\n-my @linesI = <fichierGffI> ; my @linesE = <fichierGffE> ;\n-close fichierGffI ; close fichierGffE ;\n-\t\t#print "gff files read ; number of lines : $#lines1 + $#lines2\\n";\n-\t\t# positions management\n-&recupInfo(\\@infoI, @linesI) ;\n-&recupInfo(\\@infoE, @linesE) ;\n-# treatement: \n-# transform gff lines into chromosomal position tags : 0 for nothing, 1 resp. 2 for extended resp. included elements\n-if (($#infoI) and ($#infoE)) { \n-\t$seqName=$infoI[0] ;\n-\t\t#print "fin : $infoE[$#infoE-1]\\n";\n-\tfor (my $i=0 ; $i <= $infoE[$#infoE-1] ; $i++) { $tagF[$i] = 0 ; $tagB[$i] = 0 ; } ; # "O" tag in all chr. positions\n-\t\t#print "seqName : $seqName\\n" ;\n-\t&feedPositionTab(1, \\@tagF, \\@tagB, @infoE) ; # "1" tag for all extended elements\n-\t&feedPositionTab(2, \\@tagF, \\@tagB, @infoI) ; # "2" tag for all included elements\n-\t\t#print join("", @tagF), "\\n";\n-\t\t#print join("", @tagB), "\\n";\n-\t# transition management:\n-\tmy ($beginUpstream, $endUpstream, $beginIncluded, $endIncluded, $beginDownstream, $endDownstream) \n-\t\t= (0, 0, 0, 0, 0, 0) ;\n-\tfor (my $i=1 ; $i <= $#tagF-1 ; $i+=1) {\n-\t\t&transitionAnalysis($i, $seqName, "+", \\$beginUpstream, \\$endUpstream, \\$beginIncluded, \\$endIncluded, \\$beginDownstream, \\$endDownstream, \\@tagF) ;\n-\t}\n-\t($beginUpstream, $endUpstream, $beginIncluded, $endIncluded, $beginDownstream, $endDownstream) = ($infoE[$#infoE-1], $infoE[$#infoE-1], $infoE[$#infoE-1], $infoE[$#infoE-1], $infoE[$#infoE-1], $infoE[$#infoE-1]) ;\n-\tfor (my $i=$#tagB-1 ; $i >= 1 ; $i-=1) {\n-\t\t&transitionAnalysis($i, $seqName, "-", \\$beginUpstream, \\$endUpstream, \\$beginIncluded, \\$endIncluded, \\$beginDownstream, \\$endDownstream, \\@tagB) ;\n-\t}\n-}\n-exit(0) ;\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/splitTranscriptGff.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,21 +0,0 @@
-<tool id="splitTranscriptGff" name="splitTranscriptGff">
- <description> Define UTRs and intergenic operonic regions from a transcript file and following a reference file </description>
- <command interpreter="perl"> 
- splitTranscriptGff.pl -i $referenciesFile -j $transcriptsFile > $outputFile  
- </command>
-
- <inputs>
- <param name="referenciesFile" type="data" label="Referencies Input File" format="gff" />
- <param name="transcriptsFile" type="data" label="Transcripts Input File" format="gff" />
- </inputs>
-
- <outputs>
- <data format="gff3" name="outputFile" label="[splitTranscript] Output File"/>
- </outputs>
-
- <help>
- Note that iputs files should be sorted by increasing positions and that expressed referencies should be included in transcripts.
-
- Command example: perl splitTranscriptGff.pl -i annotations.gff -j transcripts.gff > TUTag.gff3
- </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.pl
--- a/SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.pl Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,79 +0,0 @@
-#!/usr/bin/perl -w 
-###
-# But : protocol permettant la detection d'RNA non codant potentiel
-# 
-# Entrees : fichier de mapping Smart gff3
-# fichier gff des gènes
-# fichier gff des clusters Cis regulateur potentiel
-#
-# Sortie : fichier gff des clusters ARN nc
-#
-###------------------------------------------------------
-
-use vars qw($USAGE);                      
-use strict; 
-
-=head1 NAME
-
-protocol_NC_V2_CTN3.pl 
-
-=head1 SYNOPSIS
-
-% strictlyIncludeGff.pl -i toSelect.gff3 -t template.gff3 > result.gff3
-
-=head1 DESCRIPTION
-
-strictlyIncludeGff.pl - print elements strictly include in template (gff files)
-
-    -i|--input  fileName  gff input file name
-    -t|--template fileName  gff template file name
-    [-h|--help] help mode then die                              
-
-=head1 AUTHOR - CTN - mar.11 
-(from RNA-Vibrio/protocol_NC_V2_CTN3.pl - Claire KUCHLY)
-
-=cut
-      
-#----------------------------------------------------------------------------
-# check command line :
-my $outFileName = "outSIG.gff3";
-if ($#ARGV==0) {
- die (exec("pod2text $0\n"));
-} else {
-    foreach my $num (0 .. $#ARGV) {
- SWITCH: for ($ARGV[$num]) {
- /--input|-i/ && do { open(ARN,"<$ARGV[$num+1]") 
- or die "Error: Can't open \"$ARGV[$num+1]\", $!"; 
- last };
- /--template|-t/ && do { open(SEED,"<$ARGV[$num+1]") 
- or die "Error : Can't open file \"$ARGV[$num+1]\", $!";
- last };
- /--help|-h/ && do { exec("pod2text $0\n") ; die };
- }
-    }
-    ##NC_011753.2 RefSeq gene 367 834 . - . locus_tag=VS_0001;db_xref=GeneID:7162789
-#    open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
-    my @seed ;
-    my $s=0;
-    while (my $seedLine = <SEED> ) {
- my @list = split(/\t/,$seedLine);
- $seed[$s][0]= $list[3] ; # position begin seed
- $seed[$s][1]= $list[4] ; # position end seed
- $seed[$s][2]= $list[6] ; # seed sens
- $seed[$s][3]= $list[0] ; # chromesome name
- $s++;
-    }
-    close SEED ;
-    while(my $ligne = <ARN>){
- $s=0;
- my @list = split(/\t/,$ligne);
- while (($s <= $#seed)) {
- if (($seed[$s][3] eq $list[0]) and ($seed[$s][0] <= $list[3]) and ($seed[$s][1] >= $list[4]) and ($seed[$s][2] eq $list[6])) { # if list include in seed + same direction
- print "$ligne";
- }
- $s++;
- }
-    }
-    close ARN ;
-    exit(0);
-}
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/strictlyIncludeGff.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,17 +0,0 @@
-<tool id="strictlyIncludeGff" name="strictly include Gff">
-  <description>Prints the elements which are strictly included in the template.</description>
-  <command interpreter="perl"> strictlyIncludeGff.pl -i $inputFile -t $template > $outputFile  
-  </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff"/>
-    <param name="template" type="data" label="template File" format="gff"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[strictlyIncludeGff] Output File"/>
-  </outputs>
-
-  <help>
- command example: perl strictlyIncludeGff.pl -i toSelect.gff3 -t template.gff -o result.gff3
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.py
--- a/SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,124 +0,0 @@
-#! /usr/bin/env python
-
-import optparse, os, shutil
-from optparse import OptionParser
-
-
-def image(text, url):
- return "<center>%s</center><img src='%s'>" % (text, url)
-
-
-def __main__():
- description = "Write all results in one HTML file."
- parser = OptionParser(description = description)
- parser.add_option("", "--input1Gff1", dest="input1Gff3_1", action="store", type="string", help="First gff3 result in the first analyse.(TRANS detection)")
- parser.add_option("", "--input1Gff2", dest="input1Gff3_2", action="store", type="string", help="Second gff3 result in the first analyse. (TRANS detection)")
- parser.add_option("", "--input1PNG1", dest="input1PNG1", action="store", type="string", help="PNG (getSize) result in the first analyse. (TRANS detection)")
- parser.add_option("", "--input1PNG2", dest="input1PNG2", action="store",type="string", help="PNG (plot) result in the first analyse. (TRANS detection)")
- parser.add_option("", "--input2Gff1", dest="input2Gff3_1", action="store", type="string", help="First gff3 result in the second analyse. (ANTISENSE detection)")
- parser.add_option("", "--input2Gff2", dest="input2Gff3_2", action="store", type="string", help="Second gff3 result in the second analyse. (ANTISENSE detection)")
- parser.add_option("", "--input2PNG1", dest="input2PNG1", action="store", type="string", help="PNG (getSize) result in the second analyse. (ANTISENSE detection)")
- parser.add_option("", "--input2PNG2", dest="input2PNG2", action="store", type="string", help="PNG (plot) result in the second analyse. (ANTISENSE detection)")
- parser.add_option("", "--input3Gff1", dest="input3Gff3_1", action="store", type="string", help="First gff3 result in the third analyse. (CIS detection)")
- parser.add_option("", "--input3Gff2", dest="input3Gff3_2", action="store", type="string", help="Second gff3 result in the third analyse. (CIS detection)")
- parser.add_option("", "--input3PNG1", dest="input3PNG1", action="store", type="string", help="PNG (getSize) result in the third analyse. (CIS detection)")
- parser.add_option("", "--input3PNG2", dest="input3PNG2", action="store", type="string", help="PNG (plot) result in the third analyse. (CIS detection)")
- parser.add_option("", "--outHTML", dest="outHTML", action="store", type="string", help="An HTML output.")
- parser.add_option("", "--outImgDir", dest="imgDir", action="store", type="string", help="Copy all result images into imgDir, for Galaxy option.")
- (options, args) = parser.parse_args()
-
-
- if not os.path.exists(options.imgDir):
- os.makedirs(options.imgDir)
-
- shutil.copy(options.input1PNG1, options.imgDir)
- shutil.copy(options.input1PNG2, options.imgDir)
- shutil.copy(options.input2PNG1, options.imgDir)
- shutil.copy(options.input2PNG2, options.imgDir)
- shutil.copy(options.input3PNG1, options.imgDir)
- shutil.copy(options.input3PNG2, options.imgDir)
-
-
- outfile=open(options.outHTML, "w")
- #print >>outfile, "<html><head><title>The results for ncRNAs detections.</title></head><body>"
- print >>outfile, "<h1><center>The results for ncRNAs detections.</center></h1>"
-
- #write results for the first analysis
- print >>outfile, "<B><center><font color=red size=4>The results of intergenic sRNAs detection.(TRANS)</font></center></B>"
- print >>outfile, "<center><strong>The results of comparison to already known ncRNA to validate some candidates.</strong></center><p>"
- input1Gff1 = open(options.input1Gff3_1, "r")
- lines = input1Gff1.readlines()
- input1Gff1.close()   
- for line in lines:
- print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
- print >>outfile, "<p>"
- print >>outfile, "<center><strong>The results of comparison to already known ncRNA to see which ncRNAs are not detected.</strong></center><p>"
- input1Gff2 = open(options.input1Gff3_2, "r")
- lines = input1Gff2.readlines()
- input1Gff2.close()   
- for line in lines:
- print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
- print >>outfile, "<p>"
- img_input1PNG1 = os.path.basename(options.input1PNG1)
- image1=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input1PNG1)
- print >>outfile, "%s" % image1
- print >>outfile, "<p>"
- img_input1PNG2 = os.path.basename(options.input1PNG2)
- image2=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input1PNG2)
- print >>outfile, "%s" % image2
- print >>outfile, "<BR><p>"
-
-
- #write results for the second analysis
- print >>outfile, "<B><center><font color=red size=4>The results of asRNAs detection.(ANTISENSE)</font></center></B>"
- print >>outfile, "<center><strong>The results of comparison to already known ncRNA to validate some candidates.</strong></center><p>"
- input2Gff1 = open(options.input2Gff3_1, "r")
- lines = input2Gff1.readlines()
- input2Gff1.close()   
- for line in lines:
- print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
- print >>outfile, "<p>"
- print >>outfile, "<center><strong>The results of comparison to already known ncRNA to see which ncRNAs are not detected.</strong></center><p>"
- input2Gff2 = open(options.input2Gff3_2, "r")
- lines = input2Gff2.readlines()
- input2Gff2.close()   
- for line in lines:
- print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
- print >>outfile, "<p>"
- img_input2PNG1 = os.path.basename(options.input2PNG1)
- image1=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input2PNG1)
- print >>outfile, "%s" % image1
- print >>outfile, "<p>"
- img_input2PNG2 = os.path.basename(options.input2PNG2)
- image2=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input2PNG2)
- print >>outfile, "%s" % image2
- print >>outfile, "<BR><p>"
-
-
- #write results for the third analysis
- print >>outfile, "<B><center><font color=red size=4>The results of long 5'UTRs detection.(CIS)</font></center></B>"
- print >>outfile, "<center><strong>The results of comparison to already known ncRNA to validate some candidates.</strong></center><p>"
- input3Gff1 = open(options.input3Gff3_1, "r")
- lines = input3Gff1.readlines()
- input3Gff1.close()   
- for line in lines:
- print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
- print >>outfile, "<p>"
- print >>outfile, "<center><strong>The results of comparison to already known ncRNA to see which ncRNAs are not detected.</strong></center><p>"
- input3Gff2 = open(options.input3Gff3_2, "r")
- lines = input3Gff2.readlines()
- input3Gff2.close()   
- for line in lines:
- print >>outfile, "<font size=2><span style=line-height:3px>%s</span></font><p>" % line
- print >>outfile, "<p>"
- img_input3PNG1 = os.path.basename(options.input3PNG1)
- image1=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input3PNG1)
- print >>outfile, "%s" % image1
- print >>outfile, "<p>"
- img_input3PNG2 = os.path.basename(options.input3PNG2)
- image2=image("<strong>Resulting image : get the candidates sizes distribution.</strong>", img_input3PNG2)
- print >>outfile, "%s" % image2
- print >>outfile, "<BR><p>"
-
-
-if __name__=="__main__": __main__()
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.xml
--- a/SMART/bacteriaRegulatoryRegion_Detection/writeResToHTML.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,36 +0,0 @@
-<tool id="writeResToHTML" name="writeResToHTML">
-  <description>Write all ncRNAs analysis results into an HTML file (Only for ncRNAs analysis pipeline).</description>
-   <command interpreter="python"> writeResToHTML.py 
-   --input1Gff1 $input1GffFile1 --input1Gff2 $input1GffFile2 --input1PNG1 $input1PNGFile1 --input1PNG2 $input1PNGFile2 
-   --input2Gff1 $input2GffFile1 --input2Gff2 $input2GffFile2 --input2PNG1 $input2PNGFile1 --input2PNG2 $input2PNGFile2
-   --input3Gff1 $input3GffFile1 --input3Gff2 $input3GffFile2 --input3PNG1 $input3PNGFile1 --input3PNG2 $input3PNGFile2
-   --outHTML $outHTML 
-   --outImgDir $outHTML.files_path 
-   2> $log </command>
-
-  <inputs>
-      <param name="input1GffFile1" type="data" label="First gff3 result in intergenic sRNAs analysis. (TRANS detection) " format="gff3"/>
-      <param name="input1GffFile2" type="data" label="Second gff3 result in intergenic sRNAs analyse. (TRANS detection) " format="gff3"/>
-      <param name="input1PNGFile1" type="data" label="PNG (getSize) result intergenic sRNAs analyse.(TRANS detection)" format="png"/>
-      <param name="input1PNGFile2" type="data" label="PNG (plot) result in intergenic sRNAs analyse. (TRANS detection) " format="png"/>
-      
-      <param name="input2GffFile1" type="data" label="First gff3 result in asRNAs analysis. (ANTISENSE detection) " format="gff3"/>
-      <param name="input2GffFile2" type="data" label="Second gff3 result in asRNAs analyse. (ANTISENSE detection) " format="gff3"/>
-      <param name="input2PNGFile1" type="data" label="PNG (getSize) result asRNAs analyse.(ANTISENSE detection)" format="png"/>
-      <param name="input2PNGFile2" type="data" label="PNG (plot) result in  asRNAs analyse. (ANTISENSE detection) " format="png"/>
-      
-      <param name="input3GffFile1" type="data" label="First gff3 result in long 5'UTRs analysis. (CIS detection) " format="gff3"/>
-      <param name="input3GffFile2" type="data" label="Second gff3 result in long 5'UTRs analysis. (CIS detection) " format="gff3"/>
-      <param name="input3PNGFile1" type="data" label="PNG (getSize) result in long 5'UTRs analysis. (CIS detection) " format="png"/>
-      <param name="input3PNGFile2" type="data" label="PNG (plot) result in long 5'UTRs analysis. (CIS detection) " format="png"/>
-
-  </inputs>
-
-  <outputs>
-      <data format="HTML" name="outHTML" label="[writeRes2HTML] Output HTML File" help="This output file shows all results by ncRNAs detection analysis."/> 
-      <data format="txt" name="log" label="[writeRes2HTML] Output log File"/> 
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/REF.fasta
--- a/SMART/data/REF.fasta Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,33148 +0,0 @@\n->C10HBa0111D09_LR276\t15142\t24441\t|Longueur=9300\n-GAACAAACAACCCCTTTTTGGAGGTGTTGGCGCGTCGTGCAGCTTACACTCAAAAGTTAA\n-AAAGTTGCCTTGCGATGCGGTCATGTTACAAACCTCTCTGCCTTAAATTAAATTCCATAA\n-CCAAGATTTGGAGGTGCCTCAACGATGCGCAGCCATGTCCCATATTTGGTCGCCTCGTTT\n-AAAAGTCAAGTTAGACTTAATTAAGAGGTCCAACTAGTGTAGGGGCGTTTTGAGTACTTG\n-TGGGATTTATTATAAACGGTTTTGAGTCACTTTAAACCCACTTCACCAATTAAAACAAAA\n-TCCTCAAGTTAAAACTCAATATCTTTCCATTCTCTCTCTCTAAAACCTTCATTGGAGATA\n-TTTGAAGCTCCACGGAAGAAGGTTAATTTTCCAAGGTTTCAATGAAAATTTCGTGTATAG\n-GTCTTCAATAAGGTATGGTGATTTCATCCTTGATTCTTCTATCATTCAAGGATCCAATTC\n-AAAGGTTTTTCAAAAGATCTCAAAAATCCTATTTCGAATTCTAAGTATGGGTTCTTCCAT\n-TTAAAGGTTTAAATGGATGAATTATGATGTTTTCAATGTTAGTTGATGTTTTTATGATAA\n-AAAAACTCCATGAACCCATGAGCATCCTAATTCTCTAATTTTGTCTTGTAAATTGAGTTT\n-GATAATTGTGATTGGTTATGGATGGAATTGTATTTAGATTGCTCTATATTGTTGATTCTT\n-ATTGTTAACCTATCTCTATATATGTAGAATTGAGATTGTAAGGATGAGTTAGTAATCTTG\n-GCTTTATGGGCTTTCGAATCCGGGTTTACCCCCTGGATGTAACCGGCATCCTCGCCCTTT\n-TTCAAGGACTAAGACCAACCTTTTAGTCTCATGTCATTACATTCATAGGTTGACAAATGC\n-GGAAAAATTTAAAACTTTCATTATCACTACTTGGAGGTTTACATAGACCTCTACATACAC\n-ATAAGATATATTCATATAGAGTATACATAGACCCTTCGTATAGGAAGGTTACATAGCCAT\n-CTACTTTTATTACACATACATATATATAAAATATAAAAATAGTCTAACGATTGTCTCATC\n-TCATACCCTCTAAACGATTATCACAATATGGGCATAACCCTTACATCAATCAAACAAGAG\n-CACATATAGGTCATACAAAAGTATAGTACTCAATTAAAAAGGAAAGAAATGAAAGAGTCT\n-TTAAGCTCATAACAAGTCCATAAGCTAGATTATGGCATTGACCTCAAAAGTTGAGGACCT\n-TATGTGCGTACACAAGCAAAACATGCTAAAAAGGGACTTTTTAGTCAAAACATGCCCATT\n-TATCCCTTTAAGAACCTACTACAAAGCCAACAAGTCATACCAACCAACCAAACATGCTTA\n-CTATCTCAACAAGTAATACTTATCCCAACATACTTGAAACCATGATTTACTACAACCCTA\n-TCACCAAGGAAAAATATCACAAGAATGAATAAGAGTCAATCATATCATGATAGAGAGACA\n-ACTATTCATGAATCCTTATCAACTCAACAAGTGCAATAACCAAGCAAAGCCTCATAACCT\n-TACTCAATCAAGTATCCTCAAAAAGAAACCATGACCAATGTCCAACTTTACCTAACATAG\n-CATTTAGGTTTACATTTTATCATATATTAACATTATGACCCAAGGCATACTCATTAGTAA\n-ACTAATTAATATATAATATCAACAATGTGCCATAGTAATCATATATACATAATATATCAT\n-CATAACATAAACATATATAAAAACCTCCTTCTAAGACTCCCCTCAAGGCTAACTAGTGAA\n-ATGTTTAGGTAGAGCCCCATACCCCTACCTAGATTAAGCTAGACCCCTTAGGTTATCCAA\n-GTTAGAGTTCAAGTCCTTTAATTCGTTTTACCTTTTGGGAACATCTTGCCCTAACCGACA\n-TAGACCACATGAGCTAGTGTGGGATACGGTTCCAAAAAACCCTACACAGAAAGAAGGCGG\n-ACTACTTGCCAAAGTATTACCAAAACATGAAACATAGCAACTACGTTGATCCACTAGCAA\n-GTATTTCTATAGGGGCAACATAGTTCAAGAACTCTGAGATATACTTGAGACCCTCTTTAT\n-GCGCCATGCATTATAGTCTCCAACCTCAAGAGTAATGTAGTGTTCCTACCTTCCCCATGT\n-GAGAAAGGACACTCCTCAATCTAGTTCACTCGGTGCTAAGCTAGAGACCCTTTTTGAAAT\n-GTCTTTAAGCCTTTAATTATCAATCATAGCTTAGCTTAGGTCATAGGGTATATCTCTTGT\n-ATAATCATCATCATCAATAGCTCAATAATAATTGTATGAGTATAAGTCCTTTCATCACAA\n-TTCATATAAGTGAGGTTAACATGTTAGCATTTCATTGCATATCAAGAAACATTGATGATT\n-CTTACCATCCTTGTATCACATACACCTTAATCAATCTCACAACATAGTCAGGACATATCA\n-ATTCAACATCATACCACCCTATAATCCTAATATAAGGCATACTCCAATATAACTTCACGT\n-CTTAACAAAAATTTATCACAATTGGAATTAAAGATAGAGATTCTAAGACTTAACAAGTCT\n-TCCTTGTAGTTCATCATCAAGGTCTTACCATCAACCCATAACTCAACCAAGTTTGGGGAG\n-TAACATCATCACACAATGATAATCAATAGGATAACAAGGCTAATTTCATCTCTATAACAC\n-AATTCAACACTAGATCATAACTTAAGACAAGATACATAGGCTAATTTCACACTATAATTC\n-ATAACCTAAATCACATCTCAAGAAATAGCATTATAGTCCTATAATTCATATTAATTTGTT\n-CATAATAACACAATAGGATAGTAATTTAATCAATAACCAAGTCAATTGAATGATCACAAT\n-ACAATATACATCAATATCACAAGCTAGGGTTAGGGATGAAGGATCATATTCTTCAATTTA\n-GACCAAACCACTAACAATTACCATAATAAAGTTTAAATTCATGTAAATGTATTCAATATA\n-ACCTAAATAAATCATTAACAACTCAATCCATAACTTCAATTTCGTAATTGAATGAAACCC\n-ATAAGAAAATTCACCTTTTGAAATCCATTTTAAAGAAACCCTTTGAGGAAAGAGCCTCAA\n-AGGTGAATTAGATCCCATATATTAATGTTTGATGATGAATTCGCCCCTTTCCATCCCCCA\n-AACCCTTATCCTTGCTAGTTTTTAATGGTGAGTTCAAGTAGAGAGAGAAATAAGAGAGAA\n-GGAAGAGAGTTTTTGTCTTAGAGTTCTAATTAATTTAATTGGGGTTGGGGATTTTATATG\n-CGTTTTAAGTTAGTTAATTAGTCACCCCTCAATACCTAACTAACCCCTGAACCACCTAAT\n-TAATTAAATGAATCAATATAAAAACATACAGGAAATTTGACCTTCACAGACGAGACCCCG\n-AACGACGGGCCATCTGTGAGTCAACGGTCCCTCACCCCTCCGTCCTGCACTCTATCGATC\n-AGTTCATAGACTGTGCAGGCAGATCAATTCTTCAACTTGTCTAAGTATGGGATGACGGTG\n-GTATCGACTCCCCGTCAGTCCACACACGGACCGTAGGTGGTCCCATCGATGCGCATTGTC\n-TAGTCCTTGTTTGTTCAAACACAAGGGCCTCAAGGGCCCTTGGTTGGTGCTTGGGGAGTC\n-GTACCCATACGTTTCAATCATGAAACAACTCAAAAACCTATAATCTATCCTTCCACCAAT\n-TTTTGTACCTTTCCGACTCTTAAAAGGTAGTCAAATAGGCTAAGGCACGCTAACACCCCT\n-TTGAACCAACTTCCTGGACGTTC'..b'CAATATTCCATATTGATCGCCAGCTTCCATTGCTACAAAAGA\n-TGCATATAGTATCAGCTTCTTTAGACAAAGCTATAAGAAGTATCAGTTGACCCAATTGAC\n-AAGTCACACGTGCTATAACAAGATATCACTTGACGCAAGTGACAAGTGTGAAGCTGACAG\n-GAAAATAGGCAATAGAATCCCTCATTCTTTGTTTATATATAGCAACCTATTACTTCAGTA\n-TCTGTTTACAAGTTCTGCACCACGATAAGTATAACTATTTAGAAATTATGAAGGGAGTGT\n-TGCACAAATTAGTCAGGGTTAGAATTTTAATAATCCAACACACCAGAAATTCTGAGGACT\n-ATGCCTAGCAGCTGAAATCACCACAACAAGTTCAAAGTCAAATCCTGGCTCTTCCACATC\n-CTTTCCCTTCGTACAGTAAACTGAACAGATGCCTTTTGGATATGTTTCACTCACATACTT\n-CATAATTTCAGCATCCATGGCAGACCTACACAAGAATTACCATGAATAGGAGAATTTGGC\n-AAGCAACGGTAATCACAAAGTCAGTACTCATAAACCAAGCCTCTGATGAGAAGAACTAAC\n-CATGTTAAGAAGGGAACTACTCACATAAAACAAAAACATGATTTTTATAATCTGCATGCC\n-AAGCATTAGGAACTATTGAAGAATTCACTGCGATCTCAGAATATAATCCTTTCATACTCT\n-TAATGCCTCCCAATTATACCAAATTTAGGTGTCTTACTACCATTTTAGTATCTGACATTG\n-ATGTCATTACCCATTAATTTGGAGACACTGGTTCCTCACCGATAAAGATGAACAAAAAGG\n-TAAACCAGTGTATTATGAATCAGATCTCTTTATGGAATAAAAACACACATGAATCATGAT\n-GTAGGATCTTATTAACACTTGAGGACTGAGGCGAAGATTACTAAGAATATCCCGAAAGAT\n-AAGACATAGAATGTAATATAGAAGGACAGAGAACAAATGATACTGATTAAGAAGAAGAGC\n-ATCTATATAGGAGAGATTAGCCTAAACTATTTATTTTCAGACTGATTTCGGTGAGCGCAG\n-ACCAAAACATGCAGCTTTTTTTTCAATTAAGCCGGAAAGACAATTTCCACAAGAAATGCA\n-ACTGTTCTAGCATATCTTAAACTATAATCTGAGTGCTGTCTGAGAACTGAGGTTGAATTT\n-GCAAGTCTTGTTGAGCATGGTTAAAAAATAAGTCCAATTAGGCAAAATAATTGTGAATGT\n-CATAATATAGAAGAACTTCAACATCTCAATGGGAAAAACAGAAAGTGAGTAGCTAAAAAG\n-GGGAGCAATACCAAAGATATTAAACTGAGAAATATATCTCATACCCTACAGATGCATACC\n-TATACTCTTCCACGAAAGCAGATGGAAGTTCTTCATCTCTTGCTGGCCTAACGTCTTTAC\n-AAACCTAGAATGCAGACATACCATGAGCTTACAAGAAGGGAGCATAAATTATTACACGGC\n-AACAATAACTAGGAAAAAAAGAGAGAGAGGAAATACATTTAATTTCACAGCCCTTGAATT\n-ATGAATAAAACCATGCACTTGTTATATTAAGCAGAGACTACTTCCACTTTTCCAATCTAT\n-TTACATCTACTTCATCACATTAACAAGAATAGAAAAGAAATGCATGTTATGTGACATTCA\n-GCACTGTAGACTAATGAGGATTAATTTACAACCATGGACCAGCTATAGCAGAAGAGACCT\n-TATTTTACTTTTCCAAAATTGGTATTTACTCCATCACCATTAACAAGAATAAAAAATGCA\n-TCTTATGTGAAGTTCATAACTATTGAATAATGAGGTTAATCAAGAGCTTGCTATATCAAA\n-GCACATTTTTCAAGTTTACAATTTCTTCATTCTTGGTATCCACTTCTATCATGAAAACCA\n-ACCTAACAGTTAACACAATTCACTCTGGTGTTTTGTTCTTTATCCCTAATGCCTGAGTGT\n-GTTTAACTAATCAAGTTCCAATCAGCCAGAAGATGTCTAAACATACTAAACTATAGATAA\n-ACAACATGAGCATAAAACCAGTTTATGTAGAGATTTTTAATTGCACAGCATAAAAAGGAG\n-TACCCCCGGCACTATATGTGCTTCTCTTTCTTTTTTTCTTACTTCGTCCTGATCTTTTTA\n-CTTTTCTTTTCCTTTCTTTCAGAATAAGCACATTTTTGGATATAGTCCCACCATCTCTAC\n-CAGGTTTATGTCTGTCCATATTATTGCTTTTTTGAATTACCTTCTAAACAAAATACTCAT\n-CCTTACAAACCTGCTTCCTCTTCTATGGATCCCACCACTCCATGCCCCGAAAAAAAAGAA\n-AGAAAAAGATAAGTTTTTCATAGTCACTTTAGATTATCATTTATTAAATTCTGTCGGCTC\n-AACTGATAGGAACAGTGAAATGGACTTTTCAATCATAAAAAGATAATAGAAGTTATTAAA\n-TGATTCCCTTCAAGATTATGAGCTTTTAAACTTACATATCATGCAACTATTGGGGAATTA\n-AGGGACTGGGGATTTGATGATAAATTCCAGCACCATTTTTGGTGCTTTTGTGTTTTTGCA\n-AGGTAGTTTGTTAGTGGCACATGGAAGGAGGTGCCTCATCCAATGAAATTATTAATCTTA\n-TCAACAAGAAGAAAGTCAAAACACCAAACTGTAAAAAATCCAAAAATAGCATTTTGCATT\n-GTGTACTAACTGAACAACGTACTTGCTTGACATGGTCAACTCTGGCAACCTGCGCAGTCC\n-GGGGATCAAGATACTCATCCTTATGAACCTCACTAAATGATGTAATCAGTACCTACAAAT\n-TAGTGAACAGCAACTTTACACAGCTAGATCATGAAAAATAGCTTCCAAGTGTCCATTACT\n-ACATAAATGAAAAGCATTATACTTTCTTTTTAGAAGAGGGGAACAAAATCTTAGACTTCA\n-TAAGGAACAGTTCTCCGGAAAGTTTTCTTTTCTATATTGAAGAAGTAATCATTTATTGAA\n-GTGGTGGAAATTCCCTAAGCTTTAAACAGATGTAGAGAACTTGTCCATAAACATGGTGCT\n-CAACAAACAACACCCAACTCATTCATATATAGTAGCGCCAGCATCCAAGTACCATGAAGT\n-ATATCTCTATCCTAAAAGCTTTACTTGAAGACTTATTAGATTAATCTTACTTATTTCTCA\n-TGCAACTTTTTTTTTGAAAAGTTTCTCAAGCATAACTTTTATCCTTTGTTTTCATTCAGT\n-CTTTGAGCTCTAAAGGATGCCTAGAGAGGTCATGAACCAAGTAAGGAAATTGCAACATCA\n-TATTGCTTCCAATTTACCTCATTTTTTATCTTCAACTACCGAAAACTACCAAATCTGAAA\n-TTATCAACTAAGGAAAATTACAACATCAGTACAATAAGTATTGCTTACAGTTCACTTAAT\n-ATTTCAATCTTCGACTACGGAAAACTGTCAAATCTGAAATTATCAATTTGAATGACACGA\n-TTAGTCTAACTTACATTTTCAAAGATCTAACTTACCCAATGAAAAAAGAAAACAAGAGAG\n-AGACTTACATCGCCACTTCTGTTTGGGAATTCGAGACAAATCAAGTGAGATTTGTTGTAC\n-GAAGGAAATGACTCCTCGGCCGCTTTCTTATATATATTTTCGTCCTTTAAAATAGCTCTA\n-ACATCTGCAAATCCAACCAAATGACGCTCAAAACCAAAAATGTAAAAAATAAACTGCCGA\n-TCGCAAATGAACACCAATGCGGTCACATTTTCAAGCACGAAAAAAGCTTCAAAATACAAA\n-AAACTTTAGCGCAGAAAATAAACGAAAGAGAAGAAGAAGAAGACCTTTGGCGACGTACTG\n-AATTTCGCCGGCTGGGGCATTAAGAAGGAACCATTTGGCAATCTCAAT\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/REF.fasta.fai
--- a/SMART/data/REF.fasta.fai Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,198 +0,0 @@
-C10HBa0111D09_LR276 9300 48 60 61
-C11HBa0029C01_LR281 10969 9552 60 61
-C11HBa0034I10_LR282 9056 20752 60 61
-C11HBa0054I23_LR283 10301 30008 60 61
-C11HBa0062I24_LR284 10050 40531 60 61
-C11HBa0064J13_LR285 9385 50797 60 61
-C11HBa0072I13_LR286 9556 60387 60 61
-C11HBa0089M02_LR287 9244 70150 60 61
-C11HBa0096D22_LR288 9184 79597 60 61
-C11HBa0107K14_LR289 9115 88983 60 61
-C11HBa0139J14_LR291 10002 98299 60 61
-C11HBa0143O06_LR374 10785 108517 60 61
-C11HBa0161D01_LR292 9057 119530 60 61
-C11HBa0168B23_LR293 9826 128787 60 61
-C11HBa0190J03_LR294 10992 138826 60 61
-C11HBa0249E07_LR279 10008 150051 60 61
-C11HBa0303G16_LR296 9430 160274 60 61
-C11HBa0323E19_LR297 9657 169910 60 61
-C11SLe0053P22_LR298 9827 179777 60 61
-C11SLm0052K14_LR376 10013 189817 60 61
-C12HBa115G22_LR301 10021 200043 60 61
-C12HBa120K4_LR313 10271 210279 60 61
-C12HBa144B17_LR302 9247 220768 60 61
-C12HBa149G24_LR381 9271 230217 60 61
-C12HBa165B12_LR303 9257 239690 60 61
-C12HBa183M6_LR379 9473 249148 60 61
-C12HBa221M9_LR377 10755 258826 60 61
-C12HBa224N6_LR382 9130 269807 60 61
-C12HBa26C13_LR299 9139 279136 60 61
-C12HBa326K10_LR306 10414 288478 60 61
-C12HBa90D9_LR311 9638 299111 60 61
-C12HBa93P12_LR312 9510 308956 60 61
-C12SLe124D18_LR385 10545 318673 60 61
-C12SLeRI72J6_LR378 9337 329441 60 61
-C12SLm103K8_LR380 10118 338981 60 61
-C01HBa0003D15_LR7 10776 349315 60 61
-C01HBa0163B20_LR10 9321 360318 60 61
-C01HBa0216G16_LR11 10332 369845 60 61
-C01HBa0256E08_LR13 9024 380399 60 61
-C01HBa0329A12_LR14 9536 389621 60 61
-BAC19_LR16 9760 399355 60 61
-C02HBa0008G02_LR67 9205 409327 60 61
-C02HBa0011O23_LR68 9399 418733 60 61
-C02HBa0016A12_LR19 9822 428336 60 61
-C02HBa0027B01_LR21 9222 438369 60 61
-C02HBa0030A21_LR22 9147 447792 60 61
-C02HBa0046M08_LR23 10763 457140 60 61
-C02HBa0072A04_LR26 9766 468130 60 61
-C02HBa0075D08_LR28 10744 478107 60 61
-C02HBa0124N09_LR31 9335 489077 60 61
-C02HBa0155D20_LR36 10743 498616 60 61
-C02HBa0155E05_LR37 10417 509587 60 61
-C02HBa0164H08_LR38 10279 520227 60 61
-C02HBa0167J21_LR39 9925 530725 60 61
-C02HBa0185P07_LR40 9818 540863 60 61
-C02HBa0190N21_LR41 10835 550895 60 61
-C02HBa0190P16_LR331 10808 561960 60 61
-C02HBa0194L19_LR42 10280 572997 60 61
-C02HBa0204A09_LR332 10029 583498 60 61
-C02HBa0204D01_LR334 9746 593743 60 61
-C02HBa0214B22_LR325 9581 603699 60 61
-C02HBa0215M12_LR319 9918 613488 60 61
-C02HBa0228I09_LR329 10933 623621 60 61
-C02HBa0236E02_LR326 9822 634785 60 61
-C02HBa0284G15_LR47 9034 644820 60 61
-C02HBa0291P19_LR48 9826 654052 60 61
-C02HBa0329G05_LR52 9637 664090 60 61
-C02SLe0010H16_LR53 10744 673936 60 61
-C02SLe0018B07_LR335 9222 684910 60 61
-C02SLe0034H10_LR327 10833 694334 60 61
-C02SLe0127J16_LR59 10965 705396 60 61
-C02SLe0132D01_LR60 10524 716591 60 61
-C02SLm0057H03_LR336 9514 727339 60 61
-C02SLm0057H03_LR64 9170 737059 60 61
-C02SLm0057H03_LR65 9532 746429 60 61
-C03HBa0012D06_LR72 10645 756168 60 61
-C03HBa0030O03_LR74 10569 767039 60 61
-C03HBa0034B23_LR76 10005 777833 60 61
-C03HBa0040F22_LR77 10227 788053 60 61
-C03HBa0054O21_LR78 9044 798497 60 61
-C03HBa0076J13_LR79 10097 807740 60 61
-C03HBa0233O20_LR82 9753 818053 60 61
-C03HBa0295I12_LR83 10258 828017 60 61
-C03HBa0318C22_LR84 10004 838495 60 61
-C03HBa0323D22_LR85 9222 848713 60 61
-C04HBa127N12_LR346 10533 858137 60 61
-C04HBa132O11_LR104 10306 868894 60 61
-C04HBa164O3_LR344 9345 879419 60 61
-C04HBa190C13_LR106 10719 888968 60 61
-C04HBa198I15_LR107 10673 899914 60 61
-C04HBa219H8_LR109 10174 910812 60 61
-C04HBa239P14_LR111 10483 921204 60 61
-C04HBa255I2_LR112 10650 931908 60 61
-C04HBa27G19_LR337 9788 942782 60 61
-C04HBa2G1_LR120 9322 952778 60 61
-C04HBa331L22_LR115 10697 962304 60 61
-C04HBa35C16_LR339 9494 973226 60 61
-C04HBa36C23_LR91 10103 982925 60 61
-C04HBa50I18_LR341 10825 993244 60 61
-C04HBa58E11_LR93 9927 1004296 60 61
-C04HBa66O12_LR94 9355 1014433 60 61
-C04HBa68N5_LR343 9886 1023989 60 61
-C04HBa6E18_LR87 9265 1034086 60 61
-C04HBa6O16_LR123 10386 1043552 60 61
-C04HBa78E4_LR98 9994 1054158 60 61
-C04HBa78J4_LR99 9165 1064363 60 61
-C04HBa80D3_LR100 9781 1073726 60 61
-C04HBa8K13_LR338 9345 1083716 60 61
-C04HBa96I8_LR101 9693 1093262 60 61
-C04SLm14G22_LR116 10306 1103164 60 61
-C04SLm39E17_LR117 9105 1113688 60 61
-C05HBa0003C20_LR126 9460 1122990 60 61
-C05HBa0006N20_LR128 10108 1132657 60 61
-C05HBa0019C24_LR143 9514 1142982 60 61
-C05HBa0042B19_LR129 10674 1152703 60 61
-C05HBa0057G22_LR130 9023 1163602 60 61
-C05HBa0058L13_LR131 9215 1172824 60 61
-C05HBa0108A18_LR132 10114 1182244 60 61
-C05HBa0131D04_LR133 9279 1192574 60 61
-C05HBa0135A02_LR134 10620 1202057 60 61
-C05HBa0138J03_LR135 10910 1212905 60 61
-C05HBa0145P19_LR136 9141 1224045 60 61
-C05HBa0261K11_LR139 9058 1233387 60 61
-C06HBa0024F02_LR152 10452 1242645 60 61
-C06HBa0036J15_LR145 9458 1253320 60 61
-C06HBa0066D13_LR353 10505 1262985 60 61
-C06HBa0066I09_LR156 9957 1273713 60 61
-C06HBa0103N18_LR158 10350 1283886 60 61
-C06HBa0106K23_LR159 10895 1294458 60 61
-C06HBa0120H21_LR161 10738 1305584 60 61
-C06HBa0144J05_LR355 10850 1316549 60 61
-C06HBa0147H20_LR146 10693 1327629 60 61
-C06HBa0197N20_LR164 9418 1338549 60 61
-C06HBa0217M17_LR166 9470 1348172 60 61
-C06HBa0222J18_LR167 9282 1357848 60 61
-C06HBa0304P16_LR358 10525 1367334 60 61
-C06SLe0093P23_LR171 10431 1378084 60 61
-C06SLm0009E16_LR172 9523 1388737 60 61
-C06SLm0022H01_LR173 9212 1398467 60 61
-C06SLm0067G18_LR359 10587 1407882 60 61
-C07HBa0002D20_LR197 9796 1418694 60 61
-C07HBa0002M15_LR175 10456 1428703 60 61
-C07HBa0012N15_LR200 9921 1439382 60 61
-C07HBa0018L21_LR201 9169 1449517 60 61
-C07HBa0116M01_LR182 9010 1458887 60 61
-C07HBa0130B18_LR183 10208 1468097 60 61
-C07HBa0140O20_LR184 9640 1478524 60 61
-C07HBa0224G23_LR186 10880 1488374 60 61
-C07HBa0229H10_LR187 10802 1499484 60 61
-C07HBa0287B22_LR188 10024 1510516 60 61
-C07HBa0308M01_LR189 9216 1520756 60 61
-C07HBa0309B15_LR190 9063 1530174 60 61
-C07HBa0309F18_LR191 9664 1539437 60 61
-C07SLe0008H22_LR192 10612 1549312 60 61
-C07SLe0099J13_LR193 10794 1560150 60 61
-C07SLe0111B06_LR194 9814 1571170 60 61
-C07SLm0119A22_LR209 10081 1581197 60 61
-C07SLm0140K05_LR206 10879 1591498 60 61
-C08HBa0006A17_LR229 10049 1602608 60 61
-C08HBa0012O06_LR211 9339 1612873 60 61
-C08HBa0018C13_LR232 9075 1622416 60 61
-C08HBa0025D10_LR235 10483 1631692 60 61
-C08HBa0025I17_LR236 10269 1642401 60 61
-C08HBa0149J12_LR217 10855 1652891 60 61
-C08HBa0165B06_LR218 10825 1663976 60 61
-C08HBa0201M14_LR220 10831 1675031 60 61
-C08HBa0239G21_LR221 9322 1686090 60 61
-C08HBa0336I24_LR223 10943 1695617 60 61
-C08SLm0118A18_LR226 10481 1706792 60 61
-C09HBa0022M02_LR247 10139 1717497 60 61
-C09HBa0036O20_LR250 9692 1727852 60 61
-C09HBa0038L16_LR251 10860 1737755 60 61
-C09HBa0049F08_LR253 10353 1748845 60 61
-C09HBa0059I05_LR254 10352 1759421 60 61
-C09HBa0099F14_LR257 10809 1769995 60 61
-C09HBa0099P03_LR258 10156 1781034 60 61
-C09HBa0100J12_LR259 9064 1791408 60 61
-C09HBa0102E23_LR260 9715 1800672 60 61
-C09HBa0109D11_LR262 10625 1810598 60 61
-C09HBa0113I06_LR360 9962 1821449 60 61
-C09HBa0116C14_LR240 9470 1831625 60 61
-C09HBa0142I14_LR265 9190 1841301 60 61
-C09HBa0165P17_LR241 10794 1850694 60 61
-C09HBa0176I09_LR266 10260 1861717 60 61
-C09HBa0191P09_LR267 9629 1872195 60 61
-C09HBa0194K19_LR362 10346 1882034 60 61
-C09HBa0203J14_LR243 10369 1892602 60 61
-C09HBa0226D21_LR244 10625 1903193 60 61
-C09SLe0068C01_LR272 9113 1914044 60 61
-C09SLe0076N09_LR363 10488 1923358 60 61
-C09SLe0085A10_LR364 9300 1934069 60 61
-C09SLe0130H12_LR273 9470 1943571 60 61
-C09SLm0008K04_LR274 10746 1953248 60 61
-C09SLm0018L06_LR366 9448 1964222 60 61
-C09SLm0037I08_LR367 9433 1973876 60 61
-C09SLm0094A22_LR246 10193 1983516 60 61
-C09SLm0129J22_LR373 9064 1993927 60 61
-C09SLm0143I09_LR365 10488 2003192 60 61
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/SR1.fasta
--- a/SMART/data/SR1.fasta Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2500 +0,0 @@\n->HWI-EAS337_3:7:1:415:1217/1\n-GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n->HWI-EAS337_3:7:1:208:1489/1\n-GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n->HWI-EAS337_3:7:1:278:1153/1\n-GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n->HWI-EAS337_3:7:1:1178:755/1\n-GGTGAGAGTGGTTGGTTGATGGTAAAACCATTGAAT\n->HWI-EAS337_3:7:1:277:1259/1\n-GGGTGACAAAGAAAACAAAAGGGACATGGTACTTGG\n->HWI-EAS337_3:7:1:447:1231/1\n-GACTTGTGGAAGAGTTGGAATGGAAAGCTGGAGCCT\n->HWI-EAS337_3:7:1:300:1199/1\n-GTTTTTGCATATAGATCTCTTTGTAAAGATATCCAT\n->HWI-EAS337_3:7:1:247:1210/1\n-GATAGCTTTGACTATAGGACTTTTATGTATGTGTTG\n->HWI-EAS337_3:7:1:1154:1517/1\n-GAATGTTGCAGACCTTACTCCTACCTATGAAGCACA\n->HWI-EAS337_3:7:1:164:1869/1\n-GTTTGATAGGAATTTATTTCTTCTTCGACATCCACC\n->HWI-EAS337_3:7:1:415:1194/1\n-GATGGTTGACACATTAAGAACATTCTCACCGGTCTC\n->HWI-EAS337_3:7:1:645:1892/1\n-GATAGTAAGCACCCCTCACTTCCAACCCAAAGATTG\n->HWI-EAS337_3:7:1:33:1446/1\n-GTTATTCTTTCTTTCTCAAATGGATGCAGTAATGCA\n->HWI-EAS337_3:7:1:1194:1427/1\n-GAAAAATCACATTTTTTTGTTTGATAAAAACCCAGA\n->HWI-EAS337_3:7:1:624:1913/1\n-GACATCTTCAACTCCGGAGTTTTGAGTAACATTATA\n->HWI-EAS337_3:7:1:437:1202/1\n-GTACTTATGATGAAACTGAGATCAACTACCACCTCC\n->HWI-EAS337_3:7:1:1386:1787/1\n-GTTTAGCTAGTATTAAGGCTAGAAATGGATATGATG\n->HWI-EAS337_3:7:1:227:1155/1\n-GATAGCAGCAAGGTTATTGGAATCTAAGCAATCTAC\n->HWI-EAS337_3:7:1:472:1025/1\n-GAAGTGATACTCATAAAACTATTTAGAAAGTTAATT\n->HWI-EAS337_3:7:1:220:1482/1\n-GCTATATGAGAATTCAGGCCACTTGTAGTTCGATAA\n->HWI-EAS337_3:7:1:1699:1966/1\n-GATGAAGGATACTACAAAAAAAAGGGTTATTTTGTG\n->HWI-EAS337_3:7:1:547:1084/1\n-GTGGTCAGGTCCTCTTCAAGTGATACAATGTTCCCC\n->HWI-EAS337_3:7:1:464:1097/1\n-GAAATTGAAGCTAGTTATTGACAGTTTACCAAGTTA\n->HWI-EAS337_3:7:1:171:1480/1\n-GATAATACTATTAGCACATCAGCCTCTAGATGAGAC\n->HWI-EAS337_3:7:1:293:1251/1\n-GTGGTAGTGAGCTCCGTGGTGAACAAGATGACGGAA\n->HWI-EAS337_3:7:1:647:1863/1\n-GGGTTTCAGATTAGTAAGTTATAGTGAAAAAATATA\n->HWI-EAS337_3:7:1:263:1275/1\n-GCTACGTCTGCTCTAACTCCTAATATGATCCTGTAT\n->HWI-EAS337_3:7:1:1112:215/1\n-GGTGTTGATTTCACAAGGAGGAATACTCATCTAAAA\n->HWI-EAS337_3:7:1:319:1275/1\n-GTTATAGTTCTTGACAACAAAGTACAGAGGTGGTCC\n->HWI-EAS337_3:7:1:1310:1480/1\n-GTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\n->HWI-EAS337_3:7:1:1203:1470/1\n-GTGATATCTTTAACTAATTCTTCACGCATCTTTTCT\n->HWI-EAS337_3:7:1:415:1242/1\n-GACACATTATAAAATGATTAACAGACAGAAAGTACC\n->HWI-EAS337_3:7:1:1160:1471/1\n-GTAGTTGTGGAGAATGGTGCTTGCTTGGGTTGTTTA\n->HWI-EAS337_3:7:1:42:1475/1\n-GTTAAATGAGTTTAATGAATTAAATTAGTCTATATT\n->HWI-EAS337_3:7:1:155:1439/1\n-GAAGAGAAGCCAGATACTCAGTCTCATGGTGGCGAA\n->HWI-EAS337_3:7:1:1164:1447/1\n-GGAGAGGAAAGTGAAATTTAGGTTTAGACTTCGTTT\n->HWI-EAS337_3:7:1:277:1169/1\n-GGTTAAGCACAGCTGCACCAGCCAAAGCAGCAGAGC\n->HWI-EAS337_3:7:1:359:1249/1\n-GCTACATGTTTATGTGAGAAATATTATTTCAGTTGT\n->HWI-EAS337_3:7:1:1230:1425/1\n-GAATTCTGCGGCTAACTGTGGCATCGTGCAGCTGAT\n->HWI-EAS337_3:7:1:1526:1772/1\n-GGATAATTCATCATAAAATAGACCTTAGGGCAGTAG\n->HWI-EAS337_3:7:1:1171:720/1\n-GTTGATTGCGGAATAATATTTGCTTTTTTAGTTTTC\n->HWI-EAS337_3:7:1:1663:896/1\n-GTTGGGTTGGTTGAGCGAGAGGATTTTGCATCTGGT\n->HWI-EAS337_3:7:1:382:1159/1\n-GTGATAGGACCATTCCATAGTTTAGATGTATAATTG\n->HWI-EAS337_3:7:1:113:1855/1\n-GAATTTCAGCTTGCAAACTGTTTGGCTTTCCATTCA\n->HWI-EAS337_3:7:1:1356:1757/1\n-GTACTGAAAGATATGATAATATACATTGTTTGACTT\n->HWI-EAS337_3:7:1:124:757/1\n-GATACATAACACAATCAGTTGATCGAAACAAAACCA\n->HWI-EAS337_3:7:1:1211:751/1\n-GAAAGGTTAAGGCAAGGCCTGCTTAGTAAACAGGGA\n->HWI-EAS337_3:7:1:524:840/1\n-GTGTAGTTGGCTTCATATCAATTGACGGATGTTTCA\n->HWI-EAS337_3:7:1:478:1078/1\n-GTTAAGTGTGGAAATGAGAAGTTCTAATAGTACTTG\n->HWI-EAS337_3:7:1:1250:656/1\n-GAAGTTTTTTTAACGAGTGCACACGTTAGAACTCGC\n->HWI-EAS337_3:7:1:1301:1484/1\n-GTTGCGGAAGGAGCAAAGGCATTGTATGTAGTGTCA\n->HWI-EAS337_3:7:1:1036:1425/1\n-GGTTGTCAAGCGTTCAGCTCGAGATATATAATGTGA\n->HWI-EAS337_3:7:1:165:1435/1\n-GAGCAAAAGGGGTTGTGACTCTTGAAGAGCTGAAGA\n->HWI-EAS337_3:7:1:612:13/1\n-GGTAACATAGAGGAGGAAATATGGTGAAAACTTGAA\n->HWI-EAS337_3:7:1:485:1045/1\n-GTAACTTTATTTTATAATTTTTGATTTTTATCCGAA\n->HWI-EAS337_3:7:1:609:1117/1\n-GCAATTACAATAACAGGAAATACATCTAGCAGACTT\n->HWI-EAS337_3:7:1:1182:1510/1\n-GGGAACCTAAAAGGCAAAATGAATTGAGTCTGTATT\n->HWI-EAS337_3:7:1:1530:1678/1\n-GTTTTCAAACACAAACTACTTCTCCTAAAGCGGAAG\n->HWI-EAS337_3:7:1:1745:324'..b'>HWI-EAS337_3:7:1:1190:1457/1\n-GGAGCTAAAGTCCTAAGCTTGAGATCCAATAAACTG\n->HWI-EAS337_3:7:1:1067:1958/1\n-GTTCATGACATCCACCAACTTGTTTGTCTGTGGCTC\n->HWI-EAS337_3:7:1:87:714/1\n-GTGAGGAAAATGAAAAGTAAATAGGCAGATGCAGTA\n->HWI-EAS337_3:7:1:566:576/1\n-GAACACAGGGCTTCAGGGTCTAATATCCTGGCAGCG\n->HWI-EAS337_3:7:1:1316:1455/1\n-GAATTTATTTCAGTTCTTCTATTCTTTTTCTCTTCA\n->HWI-EAS337_3:7:1:1734:1346/1\n-GACACCTGACAGGAAGGGATCAACGAGCTATCTTTA\n->HWI-EAS337_3:7:1:1315:1665/1\n-GATCTTCTGAACGTTGAAACTTTAATAGTTCGAGGA\n->HWI-EAS337_3:7:1:319:1450/1\n-GTCCCCCATAATGTTCTTGATAACCTTCCTCTTTCC\n->HWI-EAS337_3:7:1:1565:1478/1\n-GATCCTAACTTGCTTGGAACTGAGATATAGTTGTTG\n->HWI-EAS337_3:7:1:33:25/1\n-GAATATAATCTTAAACATGTAGAGTTTTGAATACTT\n->HWI-EAS337_3:7:1:1281:1649/1\n-GATTAATTCCGTTAGTGAAAAATACAAAATGGAATT\n->HWI-EAS337_3:7:1:783:509/1\n-GTCATAGTTTTAAGGCGTGGAAACAGCCTCTAGCAG\n->HWI-EAS337_3:7:1:527:1823/1\n-GTCGGGACCGGAGAAAAGTATTATTTTAGAGATTAT\n->HWI-EAS337_3:7:1:361:602/1\n-GAAGCGAAAGGGCCATGTTCTTCGTACCTCCGTACA\n->HWI-EAS337_3:7:1:1111:669/1\n-GTGTAGTCATGTGAGGCTTTTGTCTCAGAAAAAAGT\n->HWI-EAS337_3:7:1:1404:1238/1\n-GTAAATTAGGTATAGAAGGAGTAGAGAACAATCTTT\n->HWI-EAS337_3:7:1:1325:1626/1\n-GTATGCCAGAGCGTCTTTTTCCTTTGGCATGGGGGC\n->HWI-EAS337_3:7:1:625:642/1\n-GAAACTGTAGCTATATGACTATCATTTGAATTTTGT\n->HWI-EAS337_3:7:1:1377:1833/1\n-GAAAATGCACCTAAAGTTCTCCCTAAAAACTACTCG\n->HWI-EAS337_3:7:1:964:815/1\n-GAACCCTACGGATGGAAGAGGAAAGGCGAGGATTAA\n->HWI-EAS337_3:7:1:283:1284/1\n-GTATGAAAAGCTTCCACATCCCCACTGGATGTTGAC\n->HWI-EAS337_3:7:1:374:1453/1\n-GTCAGTCTATGAGCGAATCAGAATGATATTATTGGA\n->HWI-EAS337_3:7:1:98:638/1\n-GTTACGTTTTCGTATTTGAATGATGTGATCTCAGGG\n->HWI-EAS337_3:7:1:309:1564/1\n-GTCTGATAATTTTTCCTGTTTTCTATTTATGTACCA\n->HWI-EAS337_3:7:1:863:418/1\n-GATAAAACATAAAAGAAATTCCTTCTATAATGAGCA\n->HWI-EAS337_3:7:1:1532:1997/1\n-GTTGTATTTCTAAATGTTTCTACTCCGATTTTTTTT\n->HWI-EAS337_3:7:1:1079:566/1\n-GAGTAACGTTTATGGAATCTACATAATTCATTATTA\n->HWI-EAS337_3:7:1:42:911/1\n-GAGAAAACCATTCACCATCTTTTGCCGTTATAGTGC\n->HWI-EAS337_3:7:1:428:1520/1\n-GCAAAGAAAGAAGGATTTCCTAACCAAGAAGATTTT\n->HWI-EAS337_3:7:1:986:591/1\n-GAGTTTCTTGCTCCACCACCTTTACCGAAATCTCCA\n->HWI-EAS337_3:7:1:181:1099/1\n-GGTCGACGTAAGAGATCTGCAGGGCTATTACTCATT\n->HWI-EAS337_3:7:1:509:832/1\n-GTGAAGTATGGGTGGAAATGCTTGCGTATGCTGCTA\n->HWI-EAS337_3:7:1:510:597/1\n-GGGTCTGGAATAGTAATGCGCTGATTCTAGTAAAGT\n->HWI-EAS337_3:7:1:1765:1489/1\n-GTCAATTTTTTCTTTGTTTAAATCCGGGGAGGCTAG\n->HWI-EAS337_3:7:1:417:1560/1\n-GTAACCTTCCCAGTGTCTCCTTAAGAAAGACTTGGA\n->HWI-EAS337_3:7:1:1047:854/1\n-GTTGAAATTCCTGATTTTCCATGTGCATCATAAGCC\n->HWI-EAS337_3:7:1:1296:202/1\n-GGTGTTGGAGTTGGATTTGTTTCTGCTTTGATATCC\n->HWI-EAS337_3:7:1:502:642/1\n-GATGATTCTTGCTGGTTAAGTTGAGATGGGTTATAA\n->HWI-EAS337_3:7:1:82:1651/1\n-GGCATCCTGTTCATCCTTCTCTTCATTTTTAGGCGT\n->HWI-EAS337_3:7:1:1505:1274/1\n-GAAACTTTTCAAAAAAAAAGTTGCATGAGAAATAAG\n->HWI-EAS337_3:7:1:5:1770/1\n-GTGTGAAAAAGTATTTCATTCACATATTGAGTTGTT\n->HWI-EAS337_3:7:1:115:1005/1\n-GATTTTACTGGAATGGGGAAGGGTGAGGCATGGGTG\n->HWI-EAS337_3:7:1:354:1708/1\n-GCATCCGACAGTGACTTAGACGATGAGGAATACGAG\n->HWI-EAS337_3:7:1:1639:1500/1\n-GTGATTATTATCTAACTCTGCAACAGCATCCAGGGA\n->HWI-EAS337_3:7:1:766:243/1\n-GTGGCATCTATGGAAGATAAATTGGAGATTGTTGCT\n->HWI-EAS337_3:7:1:920:144/1\n-GTGCGATCACACTGTTTATGTTGTTGTTGATCATTG\n->HWI-EAS337_3:7:1:389:268/1\n-GGTCAATTAGAGAGGGCAACCACCCTCAAAGAATTT\n->HWI-EAS337_3:7:1:294:1868/1\n-GAAAAAAAATTGTTTGTCTTGAATTAATGTTTCAAT\n->HWI-EAS337_3:7:1:1147:62/1\n-GAATTCCCCTCAGGTTGGAGTTGTGCACTTGGCACT\n->HWI-EAS337_3:7:1:787:1759/1\n-GGTTTTATTAGAATTGGTAGCTGTTCTGATTTTCTG\n->HWI-EAS337_3:7:1:425:1939/1\n-GCTAATTGTGGTGTCTGGGTCTATGTGGCTAAACTT\n->HWI-EAS337_3:7:1:187:1132/1\n-GTGGGAGAGGCAAGGGGCTTGGCTCATATCCTCTTC\n->HWI-EAS337_3:7:1:1739:1840/1\n-GGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\n->HWI-EAS337_3:7:1:1505:1876/1\n-GAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\n->HWI-EAS337_3:7:1:447:192/1\n-GACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\n->HWI-EAS337_3:7:1:21:2019/1\n-GTATGAGGTAAAAGATGATAACCTGTCTTCCAGCCC\n->HWI-EAS337_3:7:1:1593:652/1\n-GTGATGAGTAAAACATCATCATATGAACTTGAAGAG\n->HWI-EAS337_3:7:1:1254:1660/1\n-GAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\n->HWI-EAS337_3:7:1:291:629/1\n-GTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/SR1.fastq
--- a/SMART/data/SR1.fastq Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,5000 +0,0 @@\n-@HWI-EAS337_3:7:1:415:1217/1\n-GATGTGCAGACTTTTCACGCAGGACTACATCACTGT\n-+HWI-EAS337_3:7:1:415:1217/1\n-WWWVVVWPWWWVWWWWVVVVKVPWWVVWVWUUQUTQ\n-@HWI-EAS337_3:7:1:208:1489/1\n-GGAAACATATGCACATAAACGTTGAAATCATGCTTA\n-+HWI-EAS337_3:7:1:208:1489/1\n-WWWWWWWWWWWWWWWWWVWWVWWVWWWWWWUUUUUU\n-@HWI-EAS337_3:7:1:278:1153/1\n-GAGAAAACCTGTAATAAATACTGAGAGAAAGTAGGG\n-+HWI-EAS337_3:7:1:278:1153/1\n-WWWWWWWWWWWWWWWWWWWWWWVWVVVWWVUUUUUR\n-@HWI-EAS337_3:7:1:1178:755/1\n-GGTGAGAGTGGTTGGTTGATGGTAAAACCATTGAAT\n-+HWI-EAS337_3:7:1:1178:755/1\n-WWWWWWWWWVWWWVVWWVVWVVVVWVWVVVUUUUUU\n-@HWI-EAS337_3:7:1:277:1259/1\n-GGGTGACAAAGAAAACAAAAGGGACATGGTACTTGG\n-+HWI-EAS337_3:7:1:277:1259/1\n-WWWWWWWWWWWWWWWWWWWWWWVWWWWWWVUUUUUU\n-@HWI-EAS337_3:7:1:447:1231/1\n-GACTTGTGGAAGAGTTGGAATGGAAAGCTGGAGCCT\n-+HWI-EAS337_3:7:1:447:1231/1\n-WWWWWWWWWWWVWVWWWVWWWVVVVVVVVVURUSUU\n-@HWI-EAS337_3:7:1:300:1199/1\n-GTTTTTGCATATAGATCTCTTTGTAAAGATATCCAT\n-+HWI-EAS337_3:7:1:300:1199/1\n-WVWWWWWWWWWWWVWWWWWWWWQWVVVTWWUUUURU\n-@HWI-EAS337_3:7:1:247:1210/1\n-GATAGCTTTGACTATAGGACTTTTATGTATGTGTTG\n-+HWI-EAS337_3:7:1:247:1210/1\n-WWWWWWWWWWWWVWWWVVWWWWWWWWTVVWRULUUR\n-@HWI-EAS337_3:7:1:1154:1517/1\n-GAATGTTGCAGACCTTACTCCTACCTATGAAGCACA\n-+HWI-EAS337_3:7:1:1154:1517/1\n-WWWWWWWVWWVWWWWWWWWWWWWWVWVWVWUSUUUU\n-@HWI-EAS337_3:7:1:164:1869/1\n-GTTTGATAGGAATTTATTTCTTCTTCGACATCCACC\n-+HWI-EAS337_3:7:1:164:1869/1\n-WWWWWWWWVVWWWWWWWWWWWWWWWWQWWWUUUUUU\n-@HWI-EAS337_3:7:1:415:1194/1\n-GATGGTTGACACATTAAGAACATTCTCACCGGTCTC\n-+HWI-EAS337_3:7:1:415:1194/1\n-WWWWWWWWWWWWWWWWWVWVWVWWWWWWWVSUUUUU\n-@HWI-EAS337_3:7:1:645:1892/1\n-GATAGTAAGCACCCCTCACTTCCAACCCAAAGATTG\n-+HWI-EAS337_3:7:1:645:1892/1\n-WWWWWWWWWWWWWWWWWVWWWWWVVWVWWVUUUUUU\n-@HWI-EAS337_3:7:1:33:1446/1\n-GTTATTCTTTCTTTCTCAAATGGATGCAGTAATGCA\n-+HWI-EAS337_3:7:1:33:1446/1\n-WWWWWWWWWWWWWWWWWWWWWUQWWVVWQWUUUSUU\n-@HWI-EAS337_3:7:1:1194:1427/1\n-GAAAAATCACATTTTTTTGTTTGATAAAAACCCAGA\n-+HWI-EAS337_3:7:1:1194:1427/1\n-WWWWWWWWVWVWWWWWWWVWWWUWWWWWWWUUUUSU\n-@HWI-EAS337_3:7:1:624:1913/1\n-GACATCTTCAACTCCGGAGTTTTGAGTAACATTATA\n-+HWI-EAS337_3:7:1:624:1913/1\n-WWWWWWWWWWVWWWWVVVVWWWWVVVWWVWUUUUUU\n-@HWI-EAS337_3:7:1:437:1202/1\n-GTACTTATGATGAAACTGAGATCAACTACCACCTCC\n-+HWI-EAS337_3:7:1:437:1202/1\n-WWWWWVWWWVWVWWWWWWWWVWWWWVWVVVUUUUUU\n-@HWI-EAS337_3:7:1:1386:1787/1\n-GTTTAGCTAGTATTAAGGCTAGAAATGGATATGATG\n-+HWI-EAS337_3:7:1:1386:1787/1\n-WWWWWWWWWWWWWWWWVVWWWVWVVWVVVWUUSUUO\n-@HWI-EAS337_3:7:1:227:1155/1\n-GATAGCAGCAAGGTTATTGGAATCTAAGCAATCTAC\n-+HWI-EAS337_3:7:1:227:1155/1\n-WWVWWVWWVVWVVIWVWVVUWVVVVWVTVVUUUUSU\n-@HWI-EAS337_3:7:1:472:1025/1\n-GAAGTGATACTCATAAAACTATTTAGAAAGTTAATT\n-+HWI-EAS337_3:7:1:472:1025/1\n-WWWWWWWWWWWWWWWVWVWWWWWWVVWWWVUUUUUU\n-@HWI-EAS337_3:7:1:220:1482/1\n-GCTATATGAGAATTCAGGCCACTTGTAGTTCGATAA\n-+HWI-EAS337_3:7:1:220:1482/1\n-WWWWWWWWWVWWWWWWWVWWWWWWWWVVWWURUUUU\n-@HWI-EAS337_3:7:1:1699:1966/1\n-GATGAAGGATACTACAAAAAAAAGGGTTATTTTGTG\n-+HWI-EAS337_3:7:1:1699:1966/1\n-WWWWWWWWWWWWWWWWVWVWWWWWWWVWWWUUUSUR\n-@HWI-EAS337_3:7:1:547:1084/1\n-GTGGTCAGGTCCTCTTCAAGTGATACAATGTTCCCC\n-+HWI-EAS337_3:7:1:547:1084/1\n-WWWWWWWWWWWWWWWWWWVVWVWWWWWWWVUUUUSU\n-@HWI-EAS337_3:7:1:464:1097/1\n-GAAATTGAAGCTAGTTATTGACAGTTTACCAAGTTA\n-+HWI-EAS337_3:7:1:464:1097/1\n-WWWWWWWWWWWVWWVWWWWWWWWVVWWWWVUUUUUR\n-@HWI-EAS337_3:7:1:171:1480/1\n-GATAATACTATTAGCACATCAGCCTCTAGATGAGAC\n-+HWI-EAS337_3:7:1:171:1480/1\n-WWWWWWWWWWWWWVWWWWWWWVWWWWWWTVUUUUUU\n-@HWI-EAS337_3:7:1:293:1251/1\n-GTGGTAGTGAGCTCCGTGGTGAACAAGATGACGGAA\n-+HWI-EAS337_3:7:1:293:1251/1\n-WWWWWWWVWVWWWWVVWWVVVVVVWVVVVVRPUURR\n-@HWI-EAS337_3:7:1:647:1863/1\n-GGGTTTCAGATTAGTAAGTTATAGTGAAAAAATATA\n-+HWI-EAS337_3:7:1:647:1863/1\n-WWVWWWWWVWWWWVWWVVWWWWWWWVWVVWUUUUUU\n-@HWI-EAS337_3:7:1:263:1275/1\n-GCTACGTCTGCTCTAACTCCTAATATGATCCTGTAT\n-+HWI-EAS337_3:7:1:263:1275/1\n-WWWWWWWWWWWWWWWWWWWWWVWWWWQVWWUUOUUU\n-@HWI-EAS337_3:7:1:1112:215/1\n-GGTGTTGATTTCACAAGGAGGAATACTCATCTAAAA\n-+HWI-EAS337_3:7:1:1112:215/1\n-WWVWWVVWVVWWWVWWVUWVVVWWWVWTVWUUUUUU\n-@HWI-EAS337_3:7:1:319:1275/1\n-GTTATAGTTCTTGACAACAAAGTACAGAGGTGGTCC\n-+HWI-EAS337_3:7:1:319:1275/1\n-WWWWWWWWWWWWVWWWWWWWWWWWWWVWVWUUSUUU\n-@HWI-EAS337_3:7:1:1310:1480/1'..b'A\n-+HWI-EAS337_3:7:1:986:591/1\n-WWWWWWWWWVWWWWWWWWWWWWWVWVVWVVUUUUUR\n-@HWI-EAS337_3:7:1:181:1099/1\n-GGTCGACGTAAGAGATCTGCAGGGCTATTACTCATT\n-+HWI-EAS337_3:7:1:181:1099/1\n-WWWWWWWWWWWWWVWWWWWWWVVWWWVWWVUUUUUU\n-@HWI-EAS337_3:7:1:509:832/1\n-GTGAAGTATGGGTGGAAATGCTTGCGTATGCTGCTA\n-+HWI-EAS337_3:7:1:509:832/1\n-WWWWVWWWWVVWWWWWWVWVWWWVVVVVWVSUUUUR\n-@HWI-EAS337_3:7:1:510:597/1\n-GGGTCTGGAATAGTAATGCGCTGATTCTAGTAAAGT\n-+HWI-EAS337_3:7:1:510:597/1\n-VWWWVWWWWWWWWWWWWVWWWWWVWWWWVVUUUUUU\n-@HWI-EAS337_3:7:1:1765:1489/1\n-GTCAATTTTTTCTTTGTTTAAATCCGGGGAGGCTAG\n-+HWI-EAS337_3:7:1:1765:1489/1\n-WVWWVWWWWWWWWWWVWWWWWWWWWQQVTVUSUUUR\n-@HWI-EAS337_3:7:1:417:1560/1\n-GTAACCTTCCCAGTGTCTCCTTAAGAAAGACTTGGA\n-+HWI-EAS337_3:7:1:417:1560/1\n-WWWWVWWSWSVWVVVVWWWWWWWWVSWWWWUQUUQU\n-@HWI-EAS337_3:7:1:1047:854/1\n-GTTGAAATTCCTGATTTTCCATGTGCATCATAAGCC\n-+HWI-EAS337_3:7:1:1047:854/1\n-WWWWWVWWWWWWWWUWWWWWVWVWVVVWVVUUUUUU\n-@HWI-EAS337_3:7:1:1296:202/1\n-GGTGTTGGAGTTGGATTTGTTTCTGCTTTGATATCC\n-+HWI-EAS337_3:7:1:1296:202/1\n-WWWWWWWVWVWWWVVWWWWWWWWWVVWWWTUUUUUF\n-@HWI-EAS337_3:7:1:502:642/1\n-GATGATTCTTGCTGGTTAAGTTGAGATGGGTTATAA\n-+HWI-EAS337_3:7:1:502:642/1\n-WWWWWVPVVVWWWVVVWWWVWWSVVVWVVVUUUUUR\n-@HWI-EAS337_3:7:1:82:1651/1\n-GGCATCCTGTTCATCCTTCTCTTCATTTTTAGGCGT\n-+HWI-EAS337_3:7:1:82:1651/1\n-WWWWWWWWVWWWWWWWWVWWWWWWVWWWWWUKJUQU\n-@HWI-EAS337_3:7:1:1505:1274/1\n-GAAACTTTTCAAAAAAAAAGTTGCATGAGAAATAAG\n-+HWI-EAS337_3:7:1:1505:1274/1\n-WWWWWWWWWWWWVWWWWWWWVWWWWWWVWWSUUUUR\n-@HWI-EAS337_3:7:1:5:1770/1\n-GTGTGAAAAAGTATTTCATTCACATATTGAGTTGTT\n-+HWI-EAS337_3:7:1:5:1770/1\n-WWWWWWWWWWVWWWWWWWWWWWWWWWWWVVSUUQUU\n-@HWI-EAS337_3:7:1:115:1005/1\n-GATTTTACTGGAATGGGGAAGGGTGAGGCATGGGTG\n-+HWI-EAS337_3:7:1:115:1005/1\n-WWWWWWWWWVVWWWVVVVWWVVVWWWVVWVUUUUUU\n-@HWI-EAS337_3:7:1:354:1708/1\n-GCATCCGACAGTGACTTAGACGATGAGGAATACGAG\n-+HWI-EAS337_3:7:1:354:1708/1\n-WWWWWWWWWWWWVWWWWWVWWVWWVWWVWWUUUUUR\n-@HWI-EAS337_3:7:1:1639:1500/1\n-GTGATTATTATCTAACTCTGCAACAGCATCCAGGGA\n-+HWI-EAS337_3:7:1:1639:1500/1\n-WWWWWWVVWWWWWVVWVVWUVVVVVVVVWVUUUUUR\n-@HWI-EAS337_3:7:1:766:243/1\n-GTGGCATCTATGGAAGATAAATTGGAGATTGTTGCT\n-+HWI-EAS337_3:7:1:766:243/1\n-WWVWWWWWWWWVVWWVWWWWWWWVVVTVWWRUUJSU\n-@HWI-EAS337_3:7:1:920:144/1\n-GTGCGATCACACTGTTTATGTTGTTGTTGATCATTG\n-+HWI-EAS337_3:7:1:920:144/1\n-WWVVWWWWWVWPWVWWVWWSWVSWWHWWLVUCPUUH\n-@HWI-EAS337_3:7:1:389:268/1\n-GGTCAATTAGAGAGGGCAACCACCCTCAAAGAATTT\n-+HWI-EAS337_3:7:1:389:268/1\n-WWWWWWWWWVWWWWVVWWWWWWWWWWWWVVSUUUUU\n-@HWI-EAS337_3:7:1:294:1868/1\n-GAAAAAAAATTGTTTGTCTTGAATTAATGTTTCAAT\n-+HWI-EAS337_3:7:1:294:1868/1\n-VWVWWWWWWWWVQWWWWOWVVWWVWVVWQWUURULU\n-@HWI-EAS337_3:7:1:1147:62/1\n-GAATTCCCCTCAGGTTGGAGTTGTGCACTTGGCACT\n-+HWI-EAS337_3:7:1:1147:62/1\n-WWWWWWWWWWWWVWWWWWWVWWVWVVWVVWUUUUUU\n-@HWI-EAS337_3:7:1:787:1759/1\n-GGTTTTATTAGAATTGGTAGCTGTTCTGATTTTCTG\n-+HWI-EAS337_3:7:1:787:1759/1\n-WVVWWWWVWWWWWWWVUWWUWWUVWVVTVVUUUUUH\n-@HWI-EAS337_3:7:1:425:1939/1\n-GCTAATTGTGGTGTCTGGGTCTATGTGGCTAAACTT\n-+HWI-EAS337_3:7:1:425:1939/1\n-WWWWVWWVWWWWVWWWVVVVWWWVVWVVVWUUUUUU\n-@HWI-EAS337_3:7:1:187:1132/1\n-GTGGGAGAGGCAAGGGGCTTGGCTCATATCCTCTTC\n-+HWI-EAS337_3:7:1:187:1132/1\n-WVWWWWWWWWWWWWWWWWWWWTVWVWWWVVUUUUUU\n-@HWI-EAS337_3:7:1:1739:1840/1\n-GGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\n-+HWI-EAS337_3:7:1:1739:1840/1\n-WWVWWWWWWWWWWWWWWVTWWWVVVWWWWWUUUUUU\n-@HWI-EAS337_3:7:1:1505:1876/1\n-GAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\n-+HWI-EAS337_3:7:1:1505:1876/1\n-WWWWWWWWWWWWWWWWWWWWVWWVWWWVVVUUUSUR\n-@HWI-EAS337_3:7:1:447:192/1\n-GACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\n-+HWI-EAS337_3:7:1:447:192/1\n-WWWWWWWWWWWWWWWTWWWVVWWVWWWWWVUUUUUU\n-@HWI-EAS337_3:7:1:21:2019/1\n-GTATGAGGTAAAAGATGATAACCTGTCTTCCAGCCC\n-+HWI-EAS337_3:7:1:21:2019/1\n-VWWVVWWVVWWWWWWWWWWWWWWWQVVWWWUURUUU\n-@HWI-EAS337_3:7:1:1593:652/1\n-GTGATGAGTAAAACATCATCATATGAACTTGAAGAG\n-+HWI-EAS337_3:7:1:1593:652/1\n-WWWVWVWWVWVWWVWWWWWWVVWWVWWVWWUUUSUU\n-@HWI-EAS337_3:7:1:1254:1660/1\n-GAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\n-+HWI-EAS337_3:7:1:1254:1660/1\n-WWWVVWWWWWWWWWWWVWVWVUWWWTWVQWUUUUMU\n-@HWI-EAS337_3:7:1:291:629/1\n-GTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\n-+HWI-EAS337_3:7:1:291:629/1\n-WWWWWWWWWWVVVWVWVVWTWWKOVVTRVSUSSMFR\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/SR2.fastq
--- a/SMART/data/SR2.fastq Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,5000 +0,0 @@\n-@HWI-EAS337_3:7:1:415:1217/2\n-TAAGAACTTGGCTGATCGCCTACTTACTGCTTTTAC\n-+HWI-EAS337_3:7:1:415:1217/2\n-VWWWVWVVVVVVVVUVWVWVVWWVWVVVUVTTTTTN\n-@HWI-EAS337_3:7:1:208:1489/2\n-CGTGTTTTTGGTTGTGCATAAGGCTTTTTAAAGTAA\n-+HWI-EAS337_3:7:1:208:1489/2\n-WWWWWWWWWVVWWQWVWVWWVUVWWWWWWVRTRTTR\n-@HWI-EAS337_3:7:1:278:1153/2\n-GTCAGGCCGCATTGATGGGGGATGGGTTTCCCCCCA\n-+HWI-EAS337_3:7:1:278:1153/2\n-WWWVWWWWWWWWWVVVVVVVVVVWWWVVVVTTTTTR\n-@HWI-EAS337_3:7:1:1178:755/2\n-GACATTTCAATTACATTCATCTTACCATCACCTATA\n-+HWI-EAS337_3:7:1:1178:755/2\n-WVWVWWWWWVWWVWWVWWVVWWWVVWVWVVTTTTTR\n-@HWI-EAS337_3:7:1:277:1259/2\n-TTTTGGCTCATCAGGATCTTCTGAATCACTTGAAGA\n-+HWI-EAS337_3:7:1:277:1259/2\n-WWWWWWWWWWWWWWVVWWWWWVVVVWWVWWTRTTTS\n-@HWI-EAS337_3:7:1:447:1231/2\n-TCAACAAGAGAAAGGAGACGAAAAAGTAAATCCAAC\n-+HWI-EAS337_3:7:1:447:1231/2\n-WWWWWWWWVWWWWVVWWWWWVVWWWWVVWWTTTTTR\n-@HWI-EAS337_3:7:1:300:1199/2\n-AAAGGTATTGGTTATGCTGAAATGTTTTCTCCTATT\n-+HWI-EAS337_3:7:1:300:1199/2\n-WWWWVVWWWWVVVVWVVWVWWVWVWWWWVWTTTTTS\n-@HWI-EAS337_3:7:1:247:1210/2\n-AGCTATTACCAAAATTAAACTTCACTGCTCAAAACG\n-+HWI-EAS337_3:7:1:247:1210/2\n-VWWVWWWWWWWVWVKWWVVWWWVWWVVWVWTTTTTR\n-@HWI-EAS337_3:7:1:1154:1517/2\n-CTAACTTCAATAATCAAGCTTGTCAGTGGAAGAAAA\n-+HWI-EAS337_3:7:1:1154:1517/2\n-WWWWWWWWVVWVVWWVVVWWWVWVVVVVVVTTTTTR\n-@HWI-EAS337_3:7:1:164:1869/2\n-TAATGTCCTAGAGAAGAAGTCCTTGTGGTGGATGTC\n-+HWI-EAS337_3:7:1:164:1869/2\n-WWWWVWWWWWVWVWWWWWVWWWWWVWWVVVTTTTTN\n-@HWI-EAS337_3:7:1:415:1194/2\n-GAGATGTATTCAAGCTAAGCTTCAAGCAGCGCAGAG\n-+HWI-EAS337_3:7:1:415:1194/2\n-WWWWWWWWWWWWWVWWWVWWWWWVVVVVVWTTTTTR\n-@HWI-EAS337_3:7:1:645:1892/2\n-TTATTGTGGAGGTACAAAAAAGATATCTGCAACTAT\n-+HWI-EAS337_3:7:1:645:1892/2\n-WWWWWVWVVWVVVWVVWVVVVVVWVWWWVWTTTTTR\n-@HWI-EAS337_3:7:1:33:1446/2\n-TTTTGGAATAAAATGATATCTGCATCTGTTGAATCT\n-+HWI-EAS337_3:7:1:33:1446/2\n-WWWWWVWWWWWVWWVWWWWWWVWWWWWVWWTTTTTS\n-@HWI-EAS337_3:7:1:1194:1427/2\n-TGTAAAAAGCTTTGTTCAGCTGTGACAAGAAGAGAA\n-+HWI-EAS337_3:7:1:1194:1427/2\n-WWWWWWVVWWWWWWVWWVVWWVWUVVVVWVTTTTTR\n-@HWI-EAS337_3:7:1:624:1913/2\n-CAATATGCAGTTGTCAAGTAAGGGAAGTTCACCATT\n-+HWI-EAS337_3:7:1:624:1913/2\n-WWWWVWVWVVWWVVWVWVWVVUVUVVVWWVTTTTTR\n-@HWI-EAS337_3:7:1:437:1202/2\n-GACTGAGATATTAATTAAGGAGATAGATATCTCTAG\n-+HWI-EAS337_3:7:1:437:1202/2\n-WWWWWVWVWVWWVWWVWVVWWVWVWVWWWVTTTTTR\n-@HWI-EAS337_3:7:1:1386:1787/2\n-AACTAAACACTTACCAAAATCACCTTGTTTAGCAGC\n-+HWI-EAS337_3:7:1:1386:1787/2\n-WWWWWWWWVWVWVWWQVVVVVWWWWVVWVWTRTTTR\n-@HWI-EAS337_3:7:1:227:1155/2\n-ACTAAGAACTTGAGAATTGACAGAACCTGTGGACAA\n-+HWI-EAS337_3:7:1:227:1155/2\n-WWVVWWVVWWVVWVWWWVVVWVVVVVVVVVTTTTTP\n-@HWI-EAS337_3:7:1:472:1025/2\n-TTCTACTTAATTTTATTCAATGATATTTAATTAAAA\n-+HWI-EAS337_3:7:1:472:1025/2\n-WWWWWWWWWVWWWWWWWWWVWVWWVWWWVWTTTTTS\n-@HWI-EAS337_3:7:1:220:1482/2\n-GATTCAATGTTGAAAACTCGGCTGTATTCCAACATT\n-+HWI-EAS337_3:7:1:220:1482/2\n-WWWWWWWWVWWVWWWVWWWVVWWVVOVWUVOTROTR\n-@HWI-EAS337_3:7:1:1699:1966/2\n-ATTTTGTGTGGATCAAAAACTCCCACAGATCGATAT\n-+HWI-EAS337_3:7:1:1699:1966/2\n-VWWWWVVVWVVVWWVWVVVWWWVVVVVVVVTTTTTS\n-@HWI-EAS337_3:7:1:547:1084/2\n-ATTTTCAGATTCTTTCAAATAAGGTTCACTCGTTAG\n-+HWI-EAS337_3:7:1:547:1084/2\n-VWWWWWWVWWWWWWWWVWVWVVUVWWWVVWTTTTTN\n-@HWI-EAS337_3:7:1:464:1097/2\n-TCTACTCTCCTTACTCGGTTTGTTGGATTTCATTGG\n-+HWI-EAS337_3:7:1:464:1097/2\n-VWVVWWWWVWVWVWVWVVUWVVWWVUVWWWTQTTCN\n-@HWI-EAS337_3:7:1:171:1480/2\n-AGATCATATCCTACCCTACTGTGGCAAAGGCGTGCT\n-+HWI-EAS337_3:7:1:171:1480/2\n-VWWWWWWWWWWWVWWWVWWVVVVVWWVVVVTTTTTS\n-@HWI-EAS337_3:7:1:293:1251/2\n-CATATGTCCCAATGAAGAGGAGATGTGCATATGTAC\n-+HWI-EAS337_3:7:1:293:1251/2\n-WWWWWWWWWWWVWWWWWWWVVWWVWVVVVVTTTTTR\n-@HWI-EAS337_3:7:1:647:1863/2\n-TTGAAATGAAAAGAGAATGTACTGCAAACGATCTTC\n-+HWI-EAS337_3:7:1:647:1863/2\n-WWWWWWWWWWWWWWVWWWWWWWWVWVWWWWVVVVVS\n-@HWI-EAS337_3:7:1:263:1275/2\n-CAATAACTGGAGCGATGTTAGTAACTGCTGACCACA\n-+HWI-EAS337_3:7:1:263:1275/2\n-WVVWWWWWVVVVWWWWWVWWVWWWWWVWWVTTTTTS\n-@HWI-EAS337_3:7:1:1112:215/2\n-TAACTAAGATATAGGTATACTCTCTCTACATACAAC\n-+HWI-EAS337_3:7:1:1112:215/2\n-VWVWWWWVVVVWWVVWWWVWVVWVVVWWWVTTTTTR\n-@HWI-EAS337_3:7:1:319:1275/2\n-CATTATGTACAAGGATTGAACCTGGTTTGGAACAAC\n-+HWI-EAS337_3:7:1:319:1275/2\n-WWWVWWVWWWVWVVWWWVWWWWWVVVWWVVTTTTTR\n-@HWI-EAS337_3:7:1:1310:1480/2'..b'A\n-+HWI-EAS337_3:7:1:986:591/2\n-WWWWWWWVVWVWWVWWVWWVWWVWWWVWWWTTTROR\n-@HWI-EAS337_3:7:1:181:1099/2\n-CAAGTGAATCCCCATGACAGCATGGAGTAAGAAGGT\n-+HWI-EAS337_3:7:1:181:1099/2\n-WVWWWWWWWVWWWWVVVWWVWVWWWVWVVVTTTTTR\n-@HWI-EAS337_3:7:1:509:832/2\n-AGAGATCAAGACTCTTCAATCACTACGTTGTGGATG\n-+HWI-EAS337_3:7:1:509:832/2\n-VVVWWWWWVVVWWWWVWVVWVVWWVVVVWVTQQSTP\n-@HWI-EAS337_3:7:1:510:597/2\n-TGAATAGGTCGCTGGTATTGCTTCCTGTGGGGTTTA\n-+HWI-EAS337_3:7:1:510:597/2\n-WWWWWWWWWWWWWWWWVWWWWWWVWWVVVVQRTTTR\n-@HWI-EAS337_3:7:1:1765:1489/2\n-AAGAAAATGAATGGATGTGTAATATATGCTATGCAT\n-+HWI-EAS337_3:7:1:1765:1489/2\n-WWVWWWVWWWVVWWWVWVVVWVWWWWWVWWTTTTTR\n-@HWI-EAS337_3:7:1:417:1560/2\n-ATTTACACTATAATATTTATTTTTAAGACTTATGGA\n-+HWI-EAS337_3:7:1:417:1560/2\n-WVWWWWWWWWWWWWWWVVWVWVWVWVVWWWTTTNQS\n-@HWI-EAS337_3:7:1:1047:854/2\n-GAGCTTTCCATGTTTTGACTTCTTCTTTACTACTAA\n-+HWI-EAS337_3:7:1:1047:854/2\n-WWWWVWWWWVVVWWWWVVWWWWWWVWWWVVTTTTTN\n-@HWI-EAS337_3:7:1:1296:202/2\n-AGCTAGAAAGCATCAAATGCACATTTATACTAAATA\n-+HWI-EAS337_3:7:1:1296:202/2\n-VWWWWWVWWWWWWWWVVWWWWWWWWVWWWVTSTTTS\n-@HWI-EAS337_3:7:1:502:642/2\n-ATACATCACTGTGTTACCTAAAAAGAGATTACAAGA\n-+HWI-EAS337_3:7:1:502:642/2\n-WVVVWWVWWVWWVWVVWWWOWVVVVWVLVWTTTTTN\n-@HWI-EAS337_3:7:1:82:1651/2\n-GCTAAAAAGTACTATTTCAAAGAAGGATGAAGAGAT\n-+HWI-EAS337_3:7:1:82:1651/2\n-WWWVWWWWWVVWVVWWWWWWWVWVVVVWWWTTTTTS\n-@HWI-EAS337_3:7:1:1505:1274/2\n-CAACAAACAACACCCAACTCATTCATATATAGTAGC\n-+HWI-EAS337_3:7:1:1505:1274/2\n-WVWWWVVWVWWWWWWVWWVWVVWWVVVWVWTTTTTS\n-@HWI-EAS337_3:7:1:5:1770/2\n-ATGAACTTCAGATATATGGCAACAGACTTAATTAGA\n-+HWI-EAS337_3:7:1:5:1770/2\n-WWWWWWWWWWVWWWWWWVVWWWWVVVWWWVTTTTRS\n-@HWI-EAS337_3:7:1:115:1005/2\n-TGTCCTAAATACTTACAGCAGCTGGGATGGTTTTCC\n-+HWI-EAS337_3:7:1:115:1005/2\n-WWWWWWWWVWWWWWVWWVWVVWWVWVVWVVTTTTTR\n-@HWI-EAS337_3:7:1:354:1708/2\n-GAATCCTTCGTCACAACGGTGAAAGTTGACGGAGCA\n-+HWI-EAS337_3:7:1:354:1708/2\n-WWWVWWWWWVRWVWWVWVWVVVVWWWWWVVTTTTTR\n-@HWI-EAS337_3:7:1:1639:1500/2\n-AACTTTCTAAATATTCGATAATCAATTCTCCAATAT\n-+HWI-EAS337_3:7:1:1639:1500/2\n-WWVWWWWWVWVWWWWWVVWVVWWVVWVWVVTTTTTS\n-@HWI-EAS337_3:7:1:766:243/2\n-TATTACAGTAAGACAAGAACTTCAAGAAGCTTTAGA\n-+HWI-EAS337_3:7:1:766:243/2\n-WVWWWWWVVWVVVWWWVVVWWVWVVVVVVVTTTTRR\n-@HWI-EAS337_3:7:1:920:144/2\n-ATATTTCAGTGGAAACACAAAAATCCAAGGAAGAAA\n-+HWI-EAS337_3:7:1:920:144/2\n-WVWWWWWVVNWVWVWVWVWVVWVRVVWWVVTTTTTR\n-@HWI-EAS337_3:7:1:389:268/2\n-CCAACAGAGAGAAAAGGGAATTCTTTGAGGGTACCT\n-+HWI-EAS337_3:7:1:389:268/2\n-WWWWWVWWWWWWWWWVWVWVWVWWWWVWVVTTTTTS\n-@HWI-EAS337_3:7:1:294:1868/2\n-GGAACATCATTTCACACTTTCAAACACTTAGCAACA\n-+HWI-EAS337_3:7:1:294:1868/2\n-VVWVVVWWWVWWWWVVWWVWVWVVVVWVVWTTTTTR\n-@HWI-EAS337_3:7:1:1147:62/2\n-TAGTTCTCCAGGTTCGGCAACCACAAATACATCATT\n-+HWI-EAS337_3:7:1:1147:62/2\n-WWWWWWWWWVWVVWVVVWVWWWVVVVVVVWTTTTTR\n-@HWI-EAS337_3:7:1:787:1759/2\n-CTGGATTATTATATGATGATATTTGAACTTTCCTTA\n-+HWI-EAS337_3:7:1:787:1759/2\n-VWVWWWWVWWVWVWVWVWWWWWWWVVVVWVTTTTTR\n-@HWI-EAS337_3:7:1:425:1939/2\n-TAACTTTTCTATCTGGTTTCTATGTTTTCCAGCTCT\n-+HWI-EAS337_3:7:1:425:1939/2\n-WVWWWWWWWWWWWWVVVWWWWWWUVWVWVVTQTTTS\n-@HWI-EAS337_3:7:1:187:1132/2\n-GAATAAAAAAAGACAACAACATATCAAGATACAAAG\n-+HWI-EAS337_3:7:1:187:1132/2\n-WWWVWVWWWWWVWWVWWWVWWVWWWVWWVWTTTTTR\n-@HWI-EAS337_3:7:1:1739:1840/2\n-AAAACTGTCCTTCCCGTCAAACTTTTGGGTCAAAAG\n-+HWI-EAS337_3:7:1:1739:1840/2\n-WWVWWWWWWWWWWWWVWWWVVWVCWWVVVVTTTTTR\n-@HWI-EAS337_3:7:1:1505:1876/2\n-CATTCTCTAAACTAGTGAACTTGACAACACTTGATC\n-+HWI-EAS337_3:7:1:1505:1876/2\n-WWWWWWWWWWWWWWWWVWWWWWVWVVWVVVTTTTTS\n-@HWI-EAS337_3:7:1:447:192/2\n-GTGAGTGAAACATATCCAAAAGGCATCTGTTCAGTT\n-+HWI-EAS337_3:7:1:447:192/2\n-WVWWWVWWWWWVWVVWWWVVVVVWWVWVVWTTTTTS\n-@HWI-EAS337_3:7:1:21:2019/2\n-ATTATGTTTACGGGACAATTGTATGTTCCATTATCT\n-+HWI-EAS337_3:7:1:21:2019/2\n-VWVWWWWWWWWWWWWWWWWWVWVWUWVVWWTTTTTR\n-@HWI-EAS337_3:7:1:1593:652/2\n-TTATTTAGCTGTATAATTCCTCTTGTTTTAAGCATA\n-+HWI-EAS337_3:7:1:1593:652/2\n-WWVVWVWWWWWWWVWVVVWVWWWWVWWWWWTKTTTS\n-@HWI-EAS337_3:7:1:1254:1660/2\n-AGTAATGGCATCACATATGGGTAGGAAAAATGTACA\n-+HWI-EAS337_3:7:1:1254:1660/2\n-WVWWWVWVWVWVVWVWWWVWWVVWWVVVVWTTTTTS\n-@HWI-EAS337_3:7:1:291:629/2\n-ATGAAGGGTTTTTTTGTTCTCTAATGTCATCTTATT\n-+HWI-EAS337_3:7:1:291:629/2\n-WWWWVVWWWWWWWWWVWWVWVWVVVQWVVWTTTTTS\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/bamFile.bam
b
Binary file SMART/data/bamFile.bam has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/dummy.fasta
--- a/SMART/data/dummy.fasta Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
->HWI-EAS337_3:7:1:415:1217/1
-GATGTGCAGACTTTTCACGCAGGACTACATCACTGT
->HWI-EAS337_3:7:1:208:1489/1
-GGAAACATATGCACATAAACGTTGAAATCATGCTTA
->HWI-EAS337_3:7:1:1154:1517/1
-GAATGTTGCAGACCTTACTCCTACCTATGAAGCACA
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/expRef.fasta
--- a/SMART/data/expRef.fasta Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,33148 +0,0 @@\n->C10HBa0111D09_LR276\n-GAACAAACAACCCCTTTTTGGAGGTGTTGGCGCGTCGTGCAGCTTACACTCAAAAGTTAA\n-AAAGTTGCCTTGCGATGCGGTCATGTTACAAACCTCTCTGCCTTAAATTAAATTCCATAA\n-CCAAGATTTGGAGGTGCCTCAACGATGCGCAGCCATGTCCCATATTTGGTCGCCTCGTTT\n-AAAAGTCAAGTTAGACTTAATTAAGAGGTCCAACTAGTGTAGGGGCGTTTTGAGTACTTG\n-TGGGATTTATTATAAACGGTTTTGAGTCACTTTAAACCCACTTCACCAATTAAAACAAAA\n-TCCTCAAGTTAAAACTCAATATCTTTCCATTCTCTCTCTCTAAAACCTTCATTGGAGATA\n-TTTGAAGCTCCACGGAAGAAGGTTAATTTTCCAAGGTTTCAATGAAAATTTCGTGTATAG\n-GTCTTCAATAAGGTATGGTGATTTCATCCTTGATTCTTCTATCATTCAAGGATCCAATTC\n-AAAGGTTTTTCAAAAGATCTCAAAAATCCTATTTCGAATTCTAAGTATGGGTTCTTCCAT\n-TTAAAGGTTTAAATGGATGAATTATGATGTTTTCAATGTTAGTTGATGTTTTTATGATAA\n-AAAAACTCCATGAACCCATGAGCATCCTAATTCTCTAATTTTGTCTTGTAAATTGAGTTT\n-GATAATTGTGATTGGTTATGGATGGAATTGTATTTAGATTGCTCTATATTGTTGATTCTT\n-ATTGTTAACCTATCTCTATATATGTAGAATTGAGATTGTAAGGATGAGTTAGTAATCTTG\n-GCTTTATGGGCTTTCGAATCCGGGTTTACCCCCTGGATGTAACCGGCATCCTCGCCCTTT\n-TTCAAGGACTAAGACCAACCTTTTAGTCTCATGTCATTACATTCATAGGTTGACAAATGC\n-GGAAAAATTTAAAACTTTCATTATCACTACTTGGAGGTTTACATAGACCTCTACATACAC\n-ATAAGATATATTCATATAGAGTATACATAGACCCTTCGTATAGGAAGGTTACATAGCCAT\n-CTACTTTTATTACACATACATATATATAAAATATAAAAATAGTCTAACGATTGTCTCATC\n-TCATACCCTCTAAACGATTATCACAATATGGGCATAACCCTTACATCAATCAAACAAGAG\n-CACATATAGGTCATACAAAAGTATAGTACTCAATTAAAAAGGAAAGAAATGAAAGAGTCT\n-TTAAGCTCATAACAAGTCCATAAGCTAGATTATGGCATTGACCTCAAAAGTTGAGGACCT\n-TATGTGCGTACACAAGCAAAACATGCTAAAAAGGGACTTTTTAGTCAAAACATGCCCATT\n-TATCCCTTTAAGAACCTACTACAAAGCCAACAAGTCATACCAACCAACCAAACATGCTTA\n-CTATCTCAACAAGTAATACTTATCCCAACATACTTGAAACCATGATTTACTACAACCCTA\n-TCACCAAGGAAAAATATCACAAGAATGAATAAGAGTCAATCATATCATGATAGAGAGACA\n-ACTATTCATGAATCCTTATCAACTCAACAAGTGCAATAACCAAGCAAAGCCTCATAACCT\n-TACTCAATCAAGTATCCTCAAAAAGAAACCATGACCAATGTCCAACTTTACCTAACATAG\n-CATTTAGGTTTACATTTTATCATATATTAACATTATGACCCAAGGCATACTCATTAGTAA\n-ACTAATTAATATATAATATCAACAATGTGCCATAGTAATCATATATACATAATATATCAT\n-CATAACATAAACATATATAAAAACCTCCTTCTAAGACTCCCCTCAAGGCTAACTAGTGAA\n-ATGTTTAGGTAGAGCCCCATACCCCTACCTAGATTAAGCTAGACCCCTTAGGTTATCCAA\n-GTTAGAGTTCAAGTCCTTTAATTCGTTTTACCTTTTGGGAACATCTTGCCCTAACCGACA\n-TAGACCACATGAGCTAGTGTGGGATACGGTTCCAAAAAACCCTACACAGAAAGAAGGCGG\n-ACTACTTGCCAAAGTATTACCAAAACATGAAACATAGCAACTACGTTGATCCACTAGCAA\n-GTATTTCTATAGGGGCAACATAGTTCAAGAACTCTGAGATATACTTGAGACCCTCTTTAT\n-GCGCCATGCATTATAGTCTCCAACCTCAAGAGTAATGTAGTGTTCCTACCTTCCCCATGT\n-GAGAAAGGACACTCCTCAATCTAGTTCACTCGGTGCTAAGCTAGAGACCCTTTTTGAAAT\n-GTCTTTAAGCCTTTAATTATCAATCATAGCTTAGCTTAGGTCATAGGGTATATCTCTTGT\n-ATAATCATCATCATCAATAGCTCAATAATAATTGTATGAGTATAAGTCCTTTCATCACAA\n-TTCATATAAGTGAGGTTAACATGTTAGCATTTCATTGCATATCAAGAAACATTGATGATT\n-CTTACCATCCTTGTATCACATACACCTTAATCAATCTCACAACATAGTCAGGACATATCA\n-ATTCAACATCATACCACCCTATAATCCTAATATAAGGCATACTCCAATATAACTTCACGT\n-CTTAACAAAAATTTATCACAATTGGAATTAAAGATAGAGATTCTAAGACTTAACAAGTCT\n-TCCTTGTAGTTCATCATCAAGGTCTTACCATCAACCCATAACTCAACCAAGTTTGGGGAG\n-TAACATCATCACACAATGATAATCAATAGGATAACAAGGCTAATTTCATCTCTATAACAC\n-AATTCAACACTAGATCATAACTTAAGACAAGATACATAGGCTAATTTCACACTATAATTC\n-ATAACCTAAATCACATCTCAAGAAATAGCATTATAGTCCTATAATTCATATTAATTTGTT\n-CATAATAACACAATAGGATAGTAATTTAATCAATAACCAAGTCAATTGAATGATCACAAT\n-ACAATATACATCAATATCACAAGCTAGGGTTAGGGATGAAGGATCATATTCTTCAATTTA\n-GACCAAACCACTAACAATTACCATAATAAAGTTTAAATTCATGTAAATGTATTCAATATA\n-ACCTAAATAAATCATTAACAACTCAATCCATAACTTCAATTTCGTAATTGAATGAAACCC\n-ATAAGAAAATTCACCTTTTGAAATCCATTTTAAAGAAACCCTTTGAGGAAAGAGCCTCAA\n-AGGTGAATTAGATCCCATATATTAATGTTTGATGATGAATTCGCCCCTTTCCATCCCCCA\n-AACCCTTATCCTTGCTAGTTTTTAATGGTGAGTTCAAGTAGAGAGAGAAATAAGAGAGAA\n-GGAAGAGAGTTTTTGTCTTAGAGTTCTAATTAATTTAATTGGGGTTGGGGATTTTATATG\n-CGTTTTAAGTTAGTTAATTAGTCACCCCTCAATACCTAACTAACCCCTGAACCACCTAAT\n-TAATTAAATGAATCAATATAAAAACATACAGGAAATTTGACCTTCACAGACGAGACCCCG\n-AACGACGGGCCATCTGTGAGTCAACGGTCCCTCACCCCTCCGTCCTGCACTCTATCGATC\n-AGTTCATAGACTGTGCAGGCAGATCAATTCTTCAACTTGTCTAAGTATGGGATGACGGTG\n-GTATCGACTCCCCGTCAGTCCACACACGGACCGTAGGTGGTCCCATCGATGCGCATTGTC\n-TAGTCCTTGTTTGTTCAAACACAAGGGCCTCAAGGGCCCTTGGTTGGTGCTTGGGGAGTC\n-GTACCCATACGTTTCAATCATGAAACAACTCAAAAACCTATAATCTATCCTTCCACCAAT\n-TTTTGTACCTTTCCGACTCTTAAAAGGTAGTCAAATAGGCTAAGGCACGCTAACACCCCT\n-TTGAACCAACTTCCTGGACGTTCTTATACATTTTGGTTCTTAAACTTCCT'..b'CAATATTCCATATTGATCGCCAGCTTCCATTGCTACAAAAGA\n-TGCATATAGTATCAGCTTCTTTAGACAAAGCTATAAGAAGTATCAGTTGACCCAATTGAC\n-AAGTCACACGTGCTATAACAAGATATCACTTGACGCAAGTGACAAGTGTGAAGCTGACAG\n-GAAAATAGGCAATAGAATCCCTCATTCTTTGTTTATATATAGCAACCTATTACTTCAGTA\n-TCTGTTTACAAGTTCTGCACCACGATAAGTATAACTATTTAGAAATTATGAAGGGAGTGT\n-TGCACAAATTAGTCAGGGTTAGAATTTTAATAATCCAACACACCAGAAATTCTGAGGACT\n-ATGCCTAGCAGCTGAAATCACCACAACAAGTTCAAAGTCAAATCCTGGCTCTTCCACATC\n-CTTTCCCTTCGTACAGTAAACTGAACAGATGCCTTTTGGATATGTTTCACTCACATACTT\n-CATAATTTCAGCATCCATGGCAGACCTACACAAGAATTACCATGAATAGGAGAATTTGGC\n-AAGCAACGGTAATCACAAAGTCAGTACTCATAAACCAAGCCTCTGATGAGAAGAACTAAC\n-CATGTTAAGAAGGGAACTACTCACATAAAACAAAAACATGATTTTTATAATCTGCATGCC\n-AAGCATTAGGAACTATTGAAGAATTCACTGCGATCTCAGAATATAATCCTTTCATACTCT\n-TAATGCCTCCCAATTATACCAAATTTAGGTGTCTTACTACCATTTTAGTATCTGACATTG\n-ATGTCATTACCCATTAATTTGGAGACACTGGTTCCTCACCGATAAAGATGAACAAAAAGG\n-TAAACCAGTGTATTATGAATCAGATCTCTTTATGGAATAAAAACACACATGAATCATGAT\n-GTAGGATCTTATTAACACTTGAGGACTGAGGCGAAGATTACTAAGAATATCCCGAAAGAT\n-AAGACATAGAATGTAATATAGAAGGACAGAGAACAAATGATACTGATTAAGAAGAAGAGC\n-ATCTATATAGGAGAGATTAGCCTAAACTATTTATTTTCAGACTGATTTCGGTGAGCGCAG\n-ACCAAAACATGCAGCTTTTTTTTCAATTAAGCCGGAAAGACAATTTCCACAAGAAATGCA\n-ACTGTTCTAGCATATCTTAAACTATAATCTGAGTGCTGTCTGAGAACTGAGGTTGAATTT\n-GCAAGTCTTGTTGAGCATGGTTAAAAAATAAGTCCAATTAGGCAAAATAATTGTGAATGT\n-CATAATATAGAAGAACTTCAACATCTCAATGGGAAAAACAGAAAGTGAGTAGCTAAAAAG\n-GGGAGCAATACCAAAGATATTAAACTGAGAAATATATCTCATACCCTACAGATGCATACC\n-TATACTCTTCCACGAAAGCAGATGGAAGTTCTTCATCTCTTGCTGGCCTAACGTCTTTAC\n-AAACCTAGAATGCAGACATACCATGAGCTTACAAGAAGGGAGCATAAATTATTACACGGC\n-AACAATAACTAGGAAAAAAAGAGAGAGAGGAAATACATTTAATTTCACAGCCCTTGAATT\n-ATGAATAAAACCATGCACTTGTTATATTAAGCAGAGACTACTTCCACTTTTCCAATCTAT\n-TTACATCTACTTCATCACATTAACAAGAATAGAAAAGAAATGCATGTTATGTGACATTCA\n-GCACTGTAGACTAATGAGGATTAATTTACAACCATGGACCAGCTATAGCAGAAGAGACCT\n-TATTTTACTTTTCCAAAATTGGTATTTACTCCATCACCATTAACAAGAATAAAAAATGCA\n-TCTTATGTGAAGTTCATAACTATTGAATAATGAGGTTAATCAAGAGCTTGCTATATCAAA\n-GCACATTTTTCAAGTTTACAATTTCTTCATTCTTGGTATCCACTTCTATCATGAAAACCA\n-ACCTAACAGTTAACACAATTCACTCTGGTGTTTTGTTCTTTATCCCTAATGCCTGAGTGT\n-GTTTAACTAATCAAGTTCCAATCAGCCAGAAGATGTCTAAACATACTAAACTATAGATAA\n-ACAACATGAGCATAAAACCAGTTTATGTAGAGATTTTTAATTGCACAGCATAAAAAGGAG\n-TACCCCCGGCACTATATGTGCTTCTCTTTCTTTTTTTCTTACTTCGTCCTGATCTTTTTA\n-CTTTTCTTTTCCTTTCTTTCAGAATAAGCACATTTTTGGATATAGTCCCACCATCTCTAC\n-CAGGTTTATGTCTGTCCATATTATTGCTTTTTTGAATTACCTTCTAAACAAAATACTCAT\n-CCTTACAAACCTGCTTCCTCTTCTATGGATCCCACCACTCCATGCCCCGAAAAAAAAGAA\n-AGAAAAAGATAAGTTTTTCATAGTCACTTTAGATTATCATTTATTAAATTCTGTCGGCTC\n-AACTGATAGGAACAGTGAAATGGACTTTTCAATCATAAAAAGATAATAGAAGTTATTAAA\n-TGATTCCCTTCAAGATTATGAGCTTTTAAACTTACATATCATGCAACTATTGGGGAATTA\n-AGGGACTGGGGATTTGATGATAAATTCCAGCACCATTTTTGGTGCTTTTGTGTTTTTGCA\n-AGGTAGTTTGTTAGTGGCACATGGAAGGAGGTGCCTCATCCAATGAAATTATTAATCTTA\n-TCAACAAGAAGAAAGTCAAAACACCAAACTGTAAAAAATCCAAAAATAGCATTTTGCATT\n-GTGTACTAACTGAACAACGTACTTGCTTGACATGGTCAACTCTGGCAACCTGCGCAGTCC\n-GGGGATCAAGATACTCATCCTTATGAACCTCACTAAATGATGTAATCAGTACCTACAAAT\n-TAGTGAACAGCAACTTTACACAGCTAGATCATGAAAAATAGCTTCCAAGTGTCCATTACT\n-ACATAAATGAAAAGCATTATACTTTCTTTTTAGAAGAGGGGAACAAAATCTTAGACTTCA\n-TAAGGAACAGTTCTCCGGAAAGTTTTCTTTTCTATATTGAAGAAGTAATCATTTATTGAA\n-GTGGTGGAAATTCCCTAAGCTTTAAACAGATGTAGAGAACTTGTCCATAAACATGGTGCT\n-CAACAAACAACACCCAACTCATTCATATATAGTAGCGCCAGCATCCAAGTACCATGAAGT\n-ATATCTCTATCCTAAAAGCTTTACTTGAAGACTTATTAGATTAATCTTACTTATTTCTCA\n-TGCAACTTTTTTTTTGAAAAGTTTCTCAAGCATAACTTTTATCCTTTGTTTTCATTCAGT\n-CTTTGAGCTCTAAAGGATGCCTAGAGAGGTCATGAACCAAGTAAGGAAATTGCAACATCA\n-TATTGCTTCCAATTTACCTCATTTTTTATCTTCAACTACCGAAAACTACCAAATCTGAAA\n-TTATCAACTAAGGAAAATTACAACATCAGTACAATAAGTATTGCTTACAGTTCACTTAAT\n-ATTTCAATCTTCGACTACGGAAAACTGTCAAATCTGAAATTATCAATTTGAATGACACGA\n-TTAGTCTAACTTACATTTTCAAAGATCTAACTTACCCAATGAAAAAAGAAAACAAGAGAG\n-AGACTTACATCGCCACTTCTGTTTGGGAATTCGAGACAAATCAAGTGAGATTTGTTGTAC\n-GAAGGAAATGACTCCTCGGCCGCTTTCTTATATATATTTTCGTCCTTTAAAATAGCTCTA\n-ACATCTGCAAATCCAACCAAATGACGCTCAAAACCAAAAATGTAAAAAATAAACTGCCGA\n-TCGCAAATGAACACCAATGCGGTCACATTTTCAAGCACGAAAAAAGCTTCAAAATACAAA\n-AAACTTTAGCGCAGAAAATAAACGAAAGAGAAGAAGAAGAAGACCTTTGGCGACGTACTG\n-AATTTCGCCGGCTGGGGCATTAAGAAGGAACCATTTGGCAATCTCAAT\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/expRef_withoutSplit.fasta
--- a/SMART/data/expRef_withoutSplit.fasta Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,33148 +0,0 @@\n->C10HBa0111D09_LR276\n-GAACAAACAACCCCTTTTTGGAGGTGTTGGCGCGTCGTGCAGCTTACACTCAAAAGTTAA\n-AAAGTTGCCTTGCGATGCGGTCATGTTACAAACCTCTCTGCCTTAAATTAAATTCCATAA\n-CCAAGATTTGGAGGTGCCTCAACGATGCGCAGCCATGTCCCATATTTGGTCGCCTCGTTT\n-AAAAGTCAAGTTAGACTTAATTAAGAGGTCCAACTAGTGTAGGGGCGTTTTGAGTACTTG\n-TGGGATTTATTATAAACGGTTTTGAGTCACTTTAAACCCACTTCACCAATTAAAACAAAA\n-TCCTCAAGTTAAAACTCAATATCTTTCCATTCTCTCTCTCTAAAACCTTCATTGGAGATA\n-TTTGAAGCTCCACGGAAGAAGGTTAATTTTCCAAGGTTTCAATGAAAATTTCGTGTATAG\n-GTCTTCAATAAGGTATGGTGATTTCATCCTTGATTCTTCTATCATTCAAGGATCCAATTC\n-AAAGGTTTTTCAAAAGATCTCAAAAATCCTATTTCGAATTCTAAGTATGGGTTCTTCCAT\n-TTAAAGGTTTAAATGGATGAATTATGATGTTTTCAATGTTAGTTGATGTTTTTATGATAA\n-AAAAACTCCATGAACCCATGAGCATCCTAATTCTCTAATTTTGTCTTGTAAATTGAGTTT\n-GATAATTGTGATTGGTTATGGATGGAATTGTATTTAGATTGCTCTATATTGTTGATTCTT\n-ATTGTTAACCTATCTCTATATATGTAGAATTGAGATTGTAAGGATGAGTTAGTAATCTTG\n-GCTTTATGGGCTTTCGAATCCGGGTTTACCCCCTGGATGTAACCGGCATCCTCGCCCTTT\n-TTCAAGGACTAAGACCAACCTTTTAGTCTCATGTCATTACATTCATAGGTTGACAAATGC\n-GGAAAAATTTAAAACTTTCATTATCACTACTTGGAGGTTTACATAGACCTCTACATACAC\n-ATAAGATATATTCATATAGAGTATACATAGACCCTTCGTATAGGAAGGTTACATAGCCAT\n-CTACTTTTATTACACATACATATATATAAAATATAAAAATAGTCTAACGATTGTCTCATC\n-TCATACCCTCTAAACGATTATCACAATATGGGCATAACCCTTACATCAATCAAACAAGAG\n-CACATATAGGTCATACAAAAGTATAGTACTCAATTAAAAAGGAAAGAAATGAAAGAGTCT\n-TTAAGCTCATAACAAGTCCATAAGCTAGATTATGGCATTGACCTCAAAAGTTGAGGACCT\n-TATGTGCGTACACAAGCAAAACATGCTAAAAAGGGACTTTTTAGTCAAAACATGCCCATT\n-TATCCCTTTAAGAACCTACTACAAAGCCAACAAGTCATACCAACCAACCAAACATGCTTA\n-CTATCTCAACAAGTAATACTTATCCCAACATACTTGAAACCATGATTTACTACAACCCTA\n-TCACCAAGGAAAAATATCACAAGAATGAATAAGAGTCAATCATATCATGATAGAGAGACA\n-ACTATTCATGAATCCTTATCAACTCAACAAGTGCAATAACCAAGCAAAGCCTCATAACCT\n-TACTCAATCAAGTATCCTCAAAAAGAAACCATGACCAATGTCCAACTTTACCTAACATAG\n-CATTTAGGTTTACATTTTATCATATATTAACATTATGACCCAAGGCATACTCATTAGTAA\n-ACTAATTAATATATAATATCAACAATGTGCCATAGTAATCATATATACATAATATATCAT\n-CATAACATAAACATATATAAAAACCTCCTTCTAAGACTCCCCTCAAGGCTAACTAGTGAA\n-ATGTTTAGGTAGAGCCCCATACCCCTACCTAGATTAAGCTAGACCCCTTAGGTTATCCAA\n-GTTAGAGTTCAAGTCCTTTAATTCGTTTTACCTTTTGGGAACATCTTGCCCTAACCGACA\n-TAGACCACATGAGCTAGTGTGGGATACGGTTCCAAAAAACCCTACACAGAAAGAAGGCGG\n-ACTACTTGCCAAAGTATTACCAAAACATGAAACATAGCAACTACGTTGATCCACTAGCAA\n-GTATTTCTATAGGGGCAACATAGTTCAAGAACTCTGAGATATACTTGAGACCCTCTTTAT\n-GCGCCATGCATTATAGTCTCCAACCTCAAGAGTAATGTAGTGTTCCTACCTTCCCCATGT\n-GAGAAAGGACACTCCTCAATCTAGTTCACTCGGTGCTAAGCTAGAGACCCTTTTTGAAAT\n-GTCTTTAAGCCTTTAATTATCAATCATAGCTTAGCTTAGGTCATAGGGTATATCTCTTGT\n-ATAATCATCATCATCAATAGCTCAATAATAATTGTATGAGTATAAGTCCTTTCATCACAA\n-TTCATATAAGTGAGGTTAACATGTTAGCATTTCATTGCATATCAAGAAACATTGATGATT\n-CTTACCATCCTTGTATCACATACACCTTAATCAATCTCACAACATAGTCAGGACATATCA\n-ATTCAACATCATACCACCCTATAATCCTAATATAAGGCATACTCCAATATAACTTCACGT\n-CTTAACAAAAATTTATCACAATTGGAATTAAAGATAGAGATTCTAAGACTTAACAAGTCT\n-TCCTTGTAGTTCATCATCAAGGTCTTACCATCAACCCATAACTCAACCAAGTTTGGGGAG\n-TAACATCATCACACAATGATAATCAATAGGATAACAAGGCTAATTTCATCTCTATAACAC\n-AATTCAACACTAGATCATAACTTAAGACAAGATACATAGGCTAATTTCACACTATAATTC\n-ATAACCTAAATCACATCTCAAGAAATAGCATTATAGTCCTATAATTCATATTAATTTGTT\n-CATAATAACACAATAGGATAGTAATTTAATCAATAACCAAGTCAATTGAATGATCACAAT\n-ACAATATACATCAATATCACAAGCTAGGGTTAGGGATGAAGGATCATATTCTTCAATTTA\n-GACCAAACCACTAACAATTACCATAATAAAGTTTAAATTCATGTAAATGTATTCAATATA\n-ACCTAAATAAATCATTAACAACTCAATCCATAACTTCAATTTCGTAATTGAATGAAACCC\n-ATAAGAAAATTCACCTTTTGAAATCCATTTTAAAGAAACCCTTTGAGGAAAGAGCCTCAA\n-AGGTGAATTAGATCCCATATATTAATGTTTGATGATGAATTCGCCCCTTTCCATCCCCCA\n-AACCCTTATCCTTGCTAGTTTTTAATGGTGAGTTCAAGTAGAGAGAGAAATAAGAGAGAA\n-GGAAGAGAGTTTTTGTCTTAGAGTTCTAATTAATTTAATTGGGGTTGGGGATTTTATATG\n-CGTTTTAAGTTAGTTAATTAGTCACCCCTCAATACCTAACTAACCCCTGAACCACCTAAT\n-TAATTAAATGAATCAATATAAAAACATACAGGAAATTTGACCTTCACAGACGAGACCCCG\n-AACGACGGGCCATCTGTGAGTCAACGGTCCCTCACCCCTCCGTCCTGCACTCTATCGATC\n-AGTTCATAGACTGTGCAGGCAGATCAATTCTTCAACTTGTCTAAGTATGGGATGACGGTG\n-GTATCGACTCCCCGTCAGTCCACACACGGACCGTAGGTGGTCCCATCGATGCGCATTGTC\n-TAGTCCTTGTTTGTTCAAACACAAGGGCCTCAAGGGCCCTTGGTTGGTGCTTGGGGAGTC\n-GTACCCATACGTTTCAATCATGAAACAACTCAAAAACCTATAATCTATCCTTCCACCAAT\n-TTTTGTACCTTTCCGACTCTTAAAAGGTAGTCAAATAGGCTAAGGCACGCTAACACCCCT\n-TTGAACCAACTTCCTGGACGTTCTTATACATTTTGGTTCTTAAACTTCCT'..b'CAATATTCCATATTGATCGCCAGCTTCCATTGCTACAAAAGA\n-TGCATATAGTATCAGCTTCTTTAGACAAAGCTATAAGAAGTATCAGTTGACCCAATTGAC\n-AAGTCACACGTGCTATAACAAGATATCACTTGACGCAAGTGACAAGTGTGAAGCTGACAG\n-GAAAATAGGCAATAGAATCCCTCATTCTTTGTTTATATATAGCAACCTATTACTTCAGTA\n-TCTGTTTACAAGTTCTGCACCACGATAAGTATAACTATTTAGAAATTATGAAGGGAGTGT\n-TGCACAAATTAGTCAGGGTTAGAATTTTAATAATCCAACACACCAGAAATTCTGAGGACT\n-ATGCCTAGCAGCTGAAATCACCACAACAAGTTCAAAGTCAAATCCTGGCTCTTCCACATC\n-CTTTCCCTTCGTACAGTAAACTGAACAGATGCCTTTTGGATATGTTTCACTCACATACTT\n-CATAATTTCAGCATCCATGGCAGACCTACACAAGAATTACCATGAATAGGAGAATTTGGC\n-AAGCAACGGTAATCACAAAGTCAGTACTCATAAACCAAGCCTCTGATGAGAAGAACTAAC\n-CATGTTAAGAAGGGAACTACTCACATAAAACAAAAACATGATTTTTATAATCTGCATGCC\n-AAGCATTAGGAACTATTGAAGAATTCACTGCGATCTCAGAATATAATCCTTTCATACTCT\n-TAATGCCTCCCAATTATACCAAATTTAGGTGTCTTACTACCATTTTAGTATCTGACATTG\n-ATGTCATTACCCATTAATTTGGAGACACTGGTTCCTCACCGATAAAGATGAACAAAAAGG\n-TAAACCAGTGTATTATGAATCAGATCTCTTTATGGAATAAAAACACACATGAATCATGAT\n-GTAGGATCTTATTAACACTTGAGGACTGAGGCGAAGATTACTAAGAATATCCCGAAAGAT\n-AAGACATAGAATGTAATATAGAAGGACAGAGAACAAATGATACTGATTAAGAAGAAGAGC\n-ATCTATATAGGAGAGATTAGCCTAAACTATTTATTTTCAGACTGATTTCGGTGAGCGCAG\n-ACCAAAACATGCAGCTTTTTTTTCAATTAAGCCGGAAAGACAATTTCCACAAGAAATGCA\n-ACTGTTCTAGCATATCTTAAACTATAATCTGAGTGCTGTCTGAGAACTGAGGTTGAATTT\n-GCAAGTCTTGTTGAGCATGGTTAAAAAATAAGTCCAATTAGGCAAAATAATTGTGAATGT\n-CATAATATAGAAGAACTTCAACATCTCAATGGGAAAAACAGAAAGTGAGTAGCTAAAAAG\n-GGGAGCAATACCAAAGATATTAAACTGAGAAATATATCTCATACCCTACAGATGCATACC\n-TATACTCTTCCACGAAAGCAGATGGAAGTTCTTCATCTCTTGCTGGCCTAACGTCTTTAC\n-AAACCTAGAATGCAGACATACCATGAGCTTACAAGAAGGGAGCATAAATTATTACACGGC\n-AACAATAACTAGGAAAAAAAGAGAGAGAGGAAATACATTTAATTTCACAGCCCTTGAATT\n-ATGAATAAAACCATGCACTTGTTATATTAAGCAGAGACTACTTCCACTTTTCCAATCTAT\n-TTACATCTACTTCATCACATTAACAAGAATAGAAAAGAAATGCATGTTATGTGACATTCA\n-GCACTGTAGACTAATGAGGATTAATTTACAACCATGGACCAGCTATAGCAGAAGAGACCT\n-TATTTTACTTTTCCAAAATTGGTATTTACTCCATCACCATTAACAAGAATAAAAAATGCA\n-TCTTATGTGAAGTTCATAACTATTGAATAATGAGGTTAATCAAGAGCTTGCTATATCAAA\n-GCACATTTTTCAAGTTTACAATTTCTTCATTCTTGGTATCCACTTCTATCATGAAAACCA\n-ACCTAACAGTTAACACAATTCACTCTGGTGTTTTGTTCTTTATCCCTAATGCCTGAGTGT\n-GTTTAACTAATCAAGTTCCAATCAGCCAGAAGATGTCTAAACATACTAAACTATAGATAA\n-ACAACATGAGCATAAAACCAGTTTATGTAGAGATTTTTAATTGCACAGCATAAAAAGGAG\n-TACCCCCGGCACTATATGTGCTTCTCTTTCTTTTTTTCTTACTTCGTCCTGATCTTTTTA\n-CTTTTCTTTTCCTTTCTTTCAGAATAAGCACATTTTTGGATATAGTCCCACCATCTCTAC\n-CAGGTTTATGTCTGTCCATATTATTGCTTTTTTGAATTACCTTCTAAACAAAATACTCAT\n-CCTTACAAACCTGCTTCCTCTTCTATGGATCCCACCACTCCATGCCCCGAAAAAAAAGAA\n-AGAAAAAGATAAGTTTTTCATAGTCACTTTAGATTATCATTTATTAAATTCTGTCGGCTC\n-AACTGATAGGAACAGTGAAATGGACTTTTCAATCATAAAAAGATAATAGAAGTTATTAAA\n-TGATTCCCTTCAAGATTATGAGCTTTTAAACTTACATATCATGCAACTATTGGGGAATTA\n-AGGGACTGGGGATTTGATGATAAATTCCAGCACCATTTTTGGTGCTTTTGTGTTTTTGCA\n-AGGTAGTTTGTTAGTGGCACATGGAAGGAGGTGCCTCATCCAATGAAATTATTAATCTTA\n-TCAACAAGAAGAAAGTCAAAACACCAAACTGTAAAAAATCCAAAAATAGCATTTTGCATT\n-GTGTACTAACTGAACAACGTACTTGCTTGACATGGTCAACTCTGGCAACCTGCGCAGTCC\n-GGGGATCAAGATACTCATCCTTATGAACCTCACTAAATGATGTAATCAGTACCTACAAAT\n-TAGTGAACAGCAACTTTACACAGCTAGATCATGAAAAATAGCTTCCAAGTGTCCATTACT\n-ACATAAATGAAAAGCATTATACTTTCTTTTTAGAAGAGGGGAACAAAATCTTAGACTTCA\n-TAAGGAACAGTTCTCCGGAAAGTTTTCTTTTCTATATTGAAGAAGTAATCATTTATTGAA\n-GTGGTGGAAATTCCCTAAGCTTTAAACAGATGTAGAGAACTTGTCCATAAACATGGTGCT\n-CAACAAACAACACCCAACTCATTCATATATAGTAGCGCCAGCATCCAAGTACCATGAAGT\n-ATATCTCTATCCTAAAAGCTTTACTTGAAGACTTATTAGATTAATCTTACTTATTTCTCA\n-TGCAACTTTTTTTTTGAAAAGTTTCTCAAGCATAACTTTTATCCTTTGTTTTCATTCAGT\n-CTTTGAGCTCTAAAGGATGCCTAGAGAGGTCATGAACCAAGTAAGGAAATTGCAACATCA\n-TATTGCTTCCAATTTACCTCATTTTTTATCTTCAACTACCGAAAACTACCAAATCTGAAA\n-TTATCAACTAAGGAAAATTACAACATCAGTACAATAAGTATTGCTTACAGTTCACTTAAT\n-ATTTCAATCTTCGACTACGGAAAACTGTCAAATCTGAAATTATCAATTTGAATGACACGA\n-TTAGTCTAACTTACATTTTCAAAGATCTAACTTACCCAATGAAAAAAGAAAACAAGAGAG\n-AGACTTACATCGCCACTTCTGTTTGGGAATTCGAGACAAATCAAGTGAGATTTGTTGTAC\n-GAAGGAAATGACTCCTCGGCCGCTTTCTTATATATATTTTCGTCCTTTAAAATAGCTCTA\n-ACATCTGCAAATCCAACCAAATGACGCTCAAAACCAAAAATGTAAAAAATAAACTGCCGA\n-TCGCAAATGAACACCAATGCGGTCACATTTTCAAGCACGAAAAAAGCTTCAAAATACAAA\n-AAACTTTAGCGCAGAAAATAAACGAAAGAGAAGAAGAAGAAGACCTTTGGCGACGTACTG\n-AATTTCGCCGGCTGGGGCATTAAGAAGGAACCATTTGGCAATCTCAAT\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/output.png
b
Binary file SMART/data/output.png has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part0.bam
b
Binary file SMART/data/part0.bam has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part0.sam
--- a/SMART/data/part0.sam Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,448 +0,0 @@\n-@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n-@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n-@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n-@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n-@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n-@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n-@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n-@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n-@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n-@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n-@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n-@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n-@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n-@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n-@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n-@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n-@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n-@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n-@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n-@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n-@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n-@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n-@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n-@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n-@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n-@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n-@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n-@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n-@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n-@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n-@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n-@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n-@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n-@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n-@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n-@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n-@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n-@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n-@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n-@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n-@SQ\tSN:BAC19_LR16\tLN:9760\n-@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n-@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n-@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n-@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n-@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n-@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n-@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n-@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n-@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n-@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n-@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n-@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n-@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n-@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n-@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n-@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n-@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n-@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n-@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n-@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n-@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n-@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n-@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n-@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n-@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n-@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n-@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n-@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n-@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n-@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n-@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n-@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n-@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n-@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n-@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n-@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n-@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n-@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n-@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n-@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n-@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n-@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n-@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n-@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n-@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n-@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n-@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n-@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n-@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n-@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n-@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n-@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n-@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n-@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n-@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n-@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n-@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n-@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n-@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n-@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n-@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n-@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n-@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n-@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n-@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n-@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n-@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n-@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n-@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n-@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n-@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n-@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09HBa0116C14_LR240,+6179,36M,0;\n-HWI-EAS337_3:7:1:1567:1795\t16\tC07HBa0309B15_LR190\t4202\t37\t36M\t*\t0\t0\tATAAACCCTAACACTTCTTCTCCTGTTACTGTTTCC\tUUUUMUVVWWVVVVWVVWVVWVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:216:392\t0\tC04HBa96I8_LR101\t4961\t37\t36M\t*\t0\t0\tGAAGATTTTTTAATCTTGTGGTCTTAAACGTGTCAC\tWWWWWWWWWWWWWWWWWVWVUWWWWVVWWQUSUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:38:1803\t16\tC04HBa50I18_LR341\t2928\t37\t36M\t*\t0\t0\tCATACCTGATTTTTTGCCATATGTTTGTGATTTCAC\tUUUUUUWWWWWWWWWWWVWWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:425:1196\t16\tC06HBa0217M17_LR166\t4141\t37\t36M\t*\t0\t0\tCAATCCTGCTTTAGCACTGCACTGTGATCTCTGCTC\tUUUUUUVVVVVWWVWWWVWWTVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:181:410\t16\tC02HBa0072A04_LR26\t2348\t37\t36M\t*\t0\t0\tAAATCCTAATTACTTCCCCCCTCATTTCATTATTTC\tUUUUUUWWWVWWVWWWWVVVVWWWWVWVVWWWWWWV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1065:1826\t16\tC09SLe0130H12_LR273\t4026\t0\t36M\t*\t0\t0\tTTATTCCCTGTCCATAGAAGGCATAAATAAGCAATC\tUUUUUUWVWWWVVWVWWWWVWVWWWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09HBa0116C14_LR240,+5410,36M,0;\n-HWI-EAS337_3:7:1:79:1444\t16\tC02HBa0027B01_LR21\t4337\t0\t36M\t*\t0\t0\tTCGGAAAGAAAACATCTAGAGCAGAGATCAATATTC\tRQUUUUWWWWWWVVWVVWWWWVWWWWWWWWWWWVWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02SLe0018B07_LR335,+4851,36M,0;\n-HWI-EAS337_3:7:1:1634:1526\t16\tC09SLe0085A10_LR364\t607\t37\t36M\t*\t0\t0\tCCTTCTAGCAAAAGCCCTTCGAGTAGTCATATCTTC\tRRUUUUWVTWWWWVVVWWWVWWWWWWWVWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:462:1320\t16\tC12HBa326K10_LR306\t6488\t25\t36M\t*\t0\t0\tAAAGAAAAAAATAATAAATAATAATTAAATCAAAAC\tUUCUUUWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:2C28G4\n-HWI-EAS337_3:7:1:532:1095\t0\tC04HBa50I18_LR341\t1\t37\t36M\t*\t0\t0\tGGTCTGGCTATTTGATTTCCACCTGCTACCCCCGCA\tWWWVWWWWWWVWWVVVWWVWWVVVTVVWVVUUUSUJ\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:27:305\t4\t*\t0\t0\t*\t*\t0\t0\tGATTGATTCAGTAATTGAGTTAACCTCTGAAAGAGA\tWWWVVWWWWWVWWWWVVWVWWWWWVWVWQVUUURRU\n-HWI-EAS337_3:7:1:832:1960\t16\tC07HBa0002M15_LR175\t7167\t0\t36M\t*\t0\t0\tCTTTGCCAAAACAAAGTGAAGTACTGGATGTACCTC\tPUUUUQKWWWWVWWWWWWWWWWWWWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09HBa0194K19_LR362,+1277,36M,0;C11HBa0029C01_LR281,-7887,36M,1;\n-HWI-EAS337_3:7:1:1312:645\t16\tC06HBa0120H21_LR161\t186\t37\t36M\t*\t0\t0\tTCAACTTTAGCTGCACCCTTGAATATGTGATTGTGC\tUQUUUUVVWWVWWVWVVVWWWWWWWWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1107:226\t0\tC12HBa326K10_LR306\t5231\t37\t36M\t*\t0\t0\tGCAGAAAAAACGAATTTGTTTTGGCCCCCACTTCGT\tWWWWWWWWWWWWVVWWWVWWWWVQVVWWVVUUUURU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:274:1287\t0\tC09SLm0008K04_LR274\t10575\t37\t36M\t*\t0\t0\tGAGGATTTGCCTAGAGGTGCTTTTGTTTGTGAATAT\tWWWWWWWWWWWWWVVVVWWWWWWWVWWWVWRUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1772:270\t4\t*\t0\t0\t*\t*\t0\t0\tGGAAATACTAACTANANNNNNNNNNNNNNNNNNNNN\tWWVVWWWWWVWVWWCVCCCCCCCCCCCCCCCCCCC@\n-HWI-EAS337_3:7:1:1704:1373\t16\tC06HBa0217M17_LR166\t3360\t37\t36M\t*\t0\t0\tCAAACAATTCGAAGGTTTCCACAAATCAGCTTTACC\tQUUSSUWOVVWWWWWWWVVSWWWWSWWWVWWWWWWW\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:10A25\n-HWI-EAS337_3:7:1:241:903\t16\tC06HBa0066I09_LR156\t5444\t37\t36M\t*\t0\t0\tCATATTGAATTTGCTCATTCGACTTGGTACATTACC\tRUUUUUWVWWWWWWWVWWWVWWVWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:404:1924\t16\tC08HBa0239G21_LR221\t4431\t37\t36M\t*\t0\t0\tGAGTGTTTGCAATCCAAGTGTTCGAGTTGCATCGAC\tUUUUUUWWWWVWWVWWWWWWWWWWWWWWWWWWWWWV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:23:1455\t0\tC02HBa0190P16_LR331\t3279\t37\t36M\t*\t0\t0\tGGGGAATTGTTATGCTTTAAACTAATGGGAAAAGAA\tWWWWWWWWWWWWWVWVWWWVWVVWWWTVTWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part1.bam
b
Binary file SMART/data/part1.bam has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part1.sam
--- a/SMART/data/part1.sam Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,448 +0,0 @@\n-@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n-@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n-@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n-@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n-@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n-@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n-@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n-@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n-@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n-@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n-@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n-@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n-@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n-@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n-@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n-@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n-@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n-@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n-@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n-@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n-@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n-@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n-@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n-@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n-@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n-@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n-@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n-@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n-@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n-@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n-@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n-@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n-@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n-@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n-@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n-@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n-@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n-@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n-@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n-@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n-@SQ\tSN:BAC19_LR16\tLN:9760\n-@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n-@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n-@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n-@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n-@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n-@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n-@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n-@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n-@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n-@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n-@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n-@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n-@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n-@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n-@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n-@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n-@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n-@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n-@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n-@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n-@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n-@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n-@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n-@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n-@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n-@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n-@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n-@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n-@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n-@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n-@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n-@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n-@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n-@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n-@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n-@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n-@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n-@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n-@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n-@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n-@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n-@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n-@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n-@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n-@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n-@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n-@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n-@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n-@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n-@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n-@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n-@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n-@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n-@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n-@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n-@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n-@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n-@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n-@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n-@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n-@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n-@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n-@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n-@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n-@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n-@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n-@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n-@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n-@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n-@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n-@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n-@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'NM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:879:430\t0\tC04HBa36C23_LR91\t274\t37\t36M\t*\t0\t0\tGAGAGATCCTAAAGAGACACAGGAACTAGATCAAAA\tWWWWWWWWWWWWWWWWWWWWWWWWWVWWWVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1754:671\t16\tC05HBa0138J03_LR135\t4988\t25\t36M\t*\t0\t0\tCTCATTGACCTACAATTTTTATATGCCGTGGCTCAC\tRUUUUUVWVVWVVWWVWWWWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:14G8C12\n-HWI-EAS337_3:7:1:14:1473\t0\tC04HBa50I18_LR341\t1\t37\t36M\t*\t0\t0\tGGTCTGGCTATTTGATTTCCACCTGCTACCCCCGCA\tWWVWVVWWWWWWWWWWWWWWWWWWVVWVWVUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:425:150\t16\tC02HBa0124N09_LR31\t1935\t37\t36M\t*\t0\t0\tTCCTCCAAGAAACTTCTCCGATTACTCTCCATTTGC\tRSUUSSWWVWWWWWWWWVVWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:773:145\t16\tC04HBa164O3_LR344\t8533\t37\t36M\t*\t0\t0\tTTAGTATTGATTGTAAGGTGATCTGTTGGAAATTAC\tUUUUUUVWVVWWVVWWVWWVWWVVWWWWWWWWWWWW\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:4A31\n-HWI-EAS337_3:7:1:1191:747\t16\tC06HBa0147H20_LR146\t3868\t37\t36M\t*\t0\t0\tTGCGATAAAGAACAACTTCAATTGCAAAGTTAGATC\tKUUUUUWVWQWVVWVVVVVWWVWVVRWVWWWVWWVV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:103:821\t4\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTTCTGTAGATGTTT\tWWWWWWWWVWVWWVVWWUVVVVUVJWKVJKKURUUU\n-HWI-EAS337_3:7:1:1689:453\t0\tC09HBa0142I14_LR265\t8410\t37\t36M\t*\t0\t0\tGAAAAAAACACTCTGATTTGTAAGAGAGAATTTAAC\tWWWWWWWWWWWWWWWVWWVWVVVWWVWWVVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:259:111\t16\tC09SLe0076N09_LR363\t6949\t0\t36M\t*\t0\t0\tATTCTGAGGACTATGCCTAGCAGCTGAAATCACCAC\tUUUUUUWVVVQVWVVVVWUWWVWWWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-6949,36M,0;\n-HWI-EAS337_3:7:1:577:868\t16\tC12HBa149G24_LR381\t8433\t37\t36M\t*\t0\t0\tGTATGAAGAGGAGAAACTGCAAAAAGAAGCCTTAGC\tUUURUUVVWWWWWWWWVVVVWWWWWWWVWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1223:332\t16\tC04HBa27G19_LR337\t2599\t37\t36M\t*\t0\t0\tTTCCATGAAAACATTTTCCTTCATACCAAACACACC\tRRSRUUWWWWWVWWWWWVVWWVWWWVVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1626:1930\t16\tC12HBa326K10_LR306\t9855\t37\t36M\t*\t0\t0\tGGATAAAGTTTGCGTATACACCACCCTTCCCATACC\tUUUUUUWVVVVWVVWWVWVWWWWWWWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1310:342\t0\tC07HBa0287B22_LR188\t267\t25\t36M\t*\t0\t0\tGTTTATATGATCAGAAACGGATCCAGTTTCAGACAT\tWWWWWWWWVWWWWVWWWWTWWWWVWVVWWVURUUUU\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:12G4T18\n-HWI-EAS337_3:7:1:629:1071\t0\tC02SLm0057H03_LR65\t8840\t37\t36M\t*\t0\t0\tGAATCACCGTCGGTGCCGTCATCGGTGATGGTGGAC\tWWWWWWWWWWWWWVVWWVVWVVVWWVWDVTUMSUPC\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1306:1849\t16\tC04HBa8K13_LR338\t8095\t37\t36M\t*\t0\t0\tTTATTGAATCTGGGTTTGATCTCAAATCGAAGATAC\tUUUUUUWVVVVWWWWWWWWWVWVWWVWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1169:328\t0\tC04HBa164O3_LR344\t1860\t37\t36M\t*\t0\t0\tGATTGAATATCTCTCAACTAGAATAGACTCACCAAT\tWWWWWWWWWWWWWWVWWWWWVVWWWWVVWVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:752:1933\t16\tC06HBa0106K23_LR159\t10004\t37\t36M\t*\t0\t0\tTGACAAACAGCAGCCAAAATACTACAGCCTGGAATC\t@CRQJUVQVOQWVVUWWWWWWWWVWWWWWWWWWWWW\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:4C31\n-HWI-EAS337_3:7:1:1726:1896\t0\tC09HBa0194K19_LR362\t423\t23\t36M\t*\t0\t0\tGTTCAAGGCTCGTTTAGTGGTTAAAGGATATTCACA\tWWWWWWVWWWWWVWWVWVWUVWVVWVTVWVUUSUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:1\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C11HBa0029C01_LR281,-8741,36M,1;\n-HWI-EAS337_3:7:1:382:233\t16\tC04HBa80D3_LR100\t3451\t37\t36M\t*\t0\t0\tAATAAAGGCATCAAGAAGTATACAACCAAATTCTTC\tUUUUUUWWTWVVWWWWWWWWWVVWWVVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1209:738\t16\tC09SLm0018L06_LR366\t5546\t37\t36M\t*\t0\t0\tTTTTCCAGACAAGCACGAAACTATATAAACCATTAC\tRUUUUSVVWJWWPVWVWWWWVWWWWVWWVVWWVWVW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part2.bam
b
Binary file SMART/data/part2.bam has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part2.sam
--- a/SMART/data/part2.sam Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,448 +0,0 @@\n-@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n-@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n-@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n-@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n-@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n-@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n-@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n-@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n-@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n-@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n-@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n-@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n-@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n-@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n-@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n-@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n-@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n-@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n-@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n-@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n-@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n-@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n-@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n-@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n-@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n-@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n-@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n-@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n-@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n-@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n-@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n-@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n-@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n-@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n-@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n-@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n-@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n-@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n-@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n-@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n-@SQ\tSN:BAC19_LR16\tLN:9760\n-@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n-@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n-@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n-@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n-@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n-@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n-@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n-@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n-@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n-@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n-@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n-@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n-@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n-@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n-@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n-@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n-@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n-@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n-@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n-@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n-@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n-@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n-@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n-@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n-@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n-@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n-@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n-@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n-@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n-@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n-@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n-@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n-@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n-@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n-@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n-@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n-@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n-@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n-@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n-@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n-@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n-@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n-@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n-@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n-@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n-@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n-@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n-@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n-@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n-@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n-@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n-@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n-@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n-@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n-@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n-@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n-@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n-@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n-@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n-@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n-@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n-@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n-@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n-@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n-@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n-@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n-@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n-@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n-@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n-@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n-@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n-@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'155\t0\tC09SLe0076N09_LR363\t6097\t0\t36M\t*\t0\t0\tGTTTTGAAGCGTTAACCCTTAGACTGGTTAAGAACA\tWWVWWVWWVWVVWWWOWWWTVTVWVVVWWTURUUSU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,+6097,36M,0;\n-HWI-EAS337_3:7:1:1760:1107\t0\tC04HBa164O3_LR344\t5136\t37\t36M\t*\t0\t0\tGAATTGATTGATCGACAACAATAAATTAGTGTGGTG\tWWWWWWWWWVWWWUWWWWWWWWWVVVWWVWUUUSUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:881:441\t0\tC06HBa0066D13_LR353\t2550\t37\t36M\t*\t0\t0\tGCCTTTTAGCATGATCAAAGTCAAACCTTATCTGTC\tWWWWWWWWVWWWVWWWWWVVWWWWWVVWWVUUUSUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:713:1296\t16\tC12SLeRI72J6_LR378\t4507\t37\t36M\t*\t0\t0\tTTGCCCACTATCTGTTATGTCCATGATTTTTCCTCC\tUUUUUUVWWWVWWWWWWWWWWWWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1703:1622\t16\tC05HBa0058L13_LR131\t135\t37\t36M\t*\t0\t0\tTAAGATCTTGAAAACTAAATGAAGCACTAAGAACAC\tUUUUUUKWWWWWWWLWWWWWWWWWVWVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:114:374\t16\tC09HBa0113I06_LR360\t2601\t37\t36M\t*\t0\t0\tTCTGAATATGTATTTGCTTTTTTAGGGAAAAATAAC\tRSUUUUVWVVWWWWVVUVWWWWWWWWWWWWWWVWWW\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:32A3\n-HWI-EAS337_3:7:1:1688:1598\t0\tC04HBa80D3_LR100\t8540\t37\t36M\t*\t0\t0\tGACCGAAAGATGTCTAGTGGAAAGTAGCCCAGTGGA\tWWWWWWWWWWWWWWWWWWVVWWWWWWWWVWUUUSUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1164:1787\t4\t*\t0\t0\t*\t*\t0\t0\tGAAGTGTGAAATTGATAGATTTAGTGGGCGCACCAA\tWWWWWWWWWWVWWWWWWWWWWWWVVWVWVWUUUUUU\n-HWI-EAS337_3:7:1:321:1064\t16\tC05HBa0145P19_LR136\t7916\t37\t36M\t*\t0\t0\tACCATAAAACTGAGTCGTCAGAATGATTCTGCTTCC\tURUUUUWVWVVVWWWVWWWWWWWWWWWWWWWWVWVW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:168:1419\t0\tC07HBa0309B15_LR190\t1250\t0\t36M\t*\t0\t0\tGGATCTGTTGACCTTCCTAGGTCAAGACAAGTTTCA\tWWWWWWWWWVWWWWWWWWVVVWWWWVWWWWSUUUUU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C04HBa255I2_LR112,+3312,36M,0;\n-HWI-EAS337_3:7:1:174:1900\t0\tC02HBa0167J21_LR39\t7262\t37\t36M\t*\t0\t0\tGAGGATGATCCATTAACCGCAGGAACAGTGGAGACT\tWWWWWWWWWWWWWWWVWWWWVWWWWWVWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:399:1239\t16\tC04HBa35C16_LR339\t2042\t37\t36M\t*\t0\t0\tAAGCACATAACACTTCCACCATAAATGGACCATACC\tULSUUUWVWVVVWLVWVWVVWVWWWWVVWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:213:1008\t4\t*\t0\t0\t*\t*\t0\t0\tGATCTGTACACCTCTCTTTTTGCTGAATATATCAGC\tWWWWWWWWVWWWVWWWVVWVWVWWVVTWWWUUUURU\n-HWI-EAS337_3:7:1:656:1907\t16\tC05HBa0131D04_LR133\t767\t37\t36M\t*\t0\t0\tGAAGGTAATAGCATGATATATACCATGACAAGAATC\tUUUSUUWWVWVQWVVWVWWWVWVVWWWWVWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1622:793\t16\tC06HBa0120H21_LR161\t4515\t37\t36M\t*\t0\t0\tTGCTCAGATCCTCTTTCCTTTCTCCTAACCATAATC\tRUSUUUVWVWVVWVWWWVWWWVWVWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:736:1713\t4\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTTCTGTAGATGTTT\tWWWWWWWWVWVVVWVWWQUUVVQVOWCVQQRUKUUU\n-HWI-EAS337_3:7:1:168:2006\t0\tC03HBa0012D06_LR72\t6920\t37\t36M\t*\t0\t0\tGCAGACCAAGCAAAATTATATTTATCTGGTATGGCT\tWWWWWWWWWWWWWWWULWWVWVWWWVTVQTUSSUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1707:99\t0\tC12HBa224N6_LR382\t6492\t37\t36M\t*\t0\t0\tGTAATGAGTAGATATGCAATTCAATGTCTTTCACTT\tWWWWWWWWWVWWWWWWWWWWWWVVWWVVVWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:278:1921\t16\tC11SLm0052K14_LR376\t2794\t37\t36M\t*\t0\t0\tATGTGATAATGTCATCTGTATAAATGTTCTTAAAAC\tRUSUUUVWWVVVUWWWWWWWWVWWWWWWVWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:307:1516\t16\tC11HBa0143O06_LR374\t6204\t37\t36M\t*\t0\t0\tAACAGTAGTATGCTGCATACCTTTTCAGTGGCAACC\tUUUUUUWWVWWWVWWVWWWVWVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:489:749\t16\tC09SLm0143I09_LR365\t5492\t0\t36M\t*\t0\t0\tAACAATAGACCAGCAAAATATAGGATATCCAGAAAC\tUUJUURWVWKKWVQWWWWWWVWWWWWWVWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,-5492,36M,0;\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part3.bam
b
Binary file SMART/data/part3.bam has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part3.sam
--- a/SMART/data/part3.sam Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,448 +0,0 @@\n-@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n-@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n-@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n-@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n-@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n-@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n-@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n-@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n-@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n-@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n-@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n-@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n-@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n-@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n-@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n-@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n-@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n-@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n-@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n-@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n-@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n-@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n-@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n-@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n-@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n-@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n-@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n-@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n-@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n-@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n-@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n-@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n-@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n-@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n-@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n-@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n-@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n-@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n-@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n-@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n-@SQ\tSN:BAC19_LR16\tLN:9760\n-@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n-@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n-@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n-@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n-@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n-@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n-@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n-@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n-@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n-@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n-@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n-@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n-@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n-@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n-@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n-@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n-@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n-@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n-@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n-@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n-@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n-@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n-@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n-@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n-@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n-@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n-@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n-@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n-@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n-@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n-@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n-@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n-@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n-@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n-@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n-@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n-@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n-@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n-@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n-@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n-@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n-@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n-@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n-@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n-@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n-@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n-@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n-@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n-@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n-@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n-@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n-@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n-@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n-@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n-@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n-@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n-@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n-@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n-@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n-@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n-@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n-@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n-@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n-@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n-@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n-@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n-@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n-@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n-@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n-@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n-@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n-@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'\t16\tC04HBa35C16_LR339\t4733\t37\t36M\t*\t0\t0\tTTCTTGAATTATTCCCGCTAACTAGGACCCGAGTCC\tUUSUUUWWVVWVVVVVVVVWWVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:15:1065\t4\t*\t0\t0\t*\t*\t0\t0\tGGGTTTAGGGTTTAGGGTTTAGGGTTTAGGGTTTAG\tWWVWWWVVWWWWVWVWWVWWWVUVWVWVTVQSUUUU\n-HWI-EAS337_3:7:1:651:1154\t16\tC09SLm0037I08_LR367\t3924\t37\t36M\t*\t0\t0\tCCTTCCCATTTCTTTATAGCATCATATTCTAGAACC\tUUUUUUWVWVVWVVVWVWWWWVWWVWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:412:1548\t16\tC05HBa0145P19_LR136\t3086\t37\t36M\t*\t0\t0\tCTTCATCCAGAGGGAGCTGAACAAGGACAGTTTATC\tOUUSUUVTWWWVVWWWUWWWWVWWWWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:565:893\t16\tC09HBa0100J12_LR259\t5501\t0\t36M\t*\t0\t0\tAGCAGTTTGTTGGCTGCTGTTTTGAAGAGTGGAGTC\tUUUUUUWWWWVWVVWWWWWWWVWWWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0129J22_LR373,-5501,36M,0;\n-HWI-EAS337_3:7:1:1086:894\t0\tC07HBa0309F18_LR191\t6311\t37\t36M\t*\t0\t0\tGTGAAGCAGAAGATCTGCAATAACTCTTAAATTTGT\tWWWWWWWVWVVWWWWWWVVSVVVVWVVVVVUUUUSU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:904:892\t16\tC02HBa0016A12_LR19\t1326\t0\t36M\t*\t0\t0\tTGTGAAGAAAGAGGTCGTTTAAAAATCTGAAGTTCC\tUUUUUUWWWWWWWWWVWWWWWWWWWWWVWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02HBa0236E02_LR326,+8462,36M,0;\n-HWI-EAS337_3:7:1:1201:1415\t0\tC06HBa0144J05_LR355\t1215\t0\t36M\t*\t0\t0\tGAGTTGGCAAAGTAGGAAGATCTCTTGAAGATTCAA\tWWWWWWWWWWVWWWWWWVWVWWWWWWVVWWUUUUUU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C01HBa0003D15_LR7,+308,36M,0;\n-HWI-EAS337_3:7:1:157:1509\t0\tC05HBa0042B19_LR129\t1213\t37\t36M\t*\t0\t0\tGAATTAAGAAACGATTTCACTGAAGAACATAGAACT\tVVWWWVWVSWVVVVSVQHWQWWWQVVVQWWUUUUOS\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1150:1510\t16\tC04HBa164O3_LR344\t2069\t37\t36M\t*\t0\t0\tCCGACTAATGATATAATAAGGATGGTTTCATCTCCC\tRUSURUVWWWWWWVWVWWVWVWVVWWWWWWWWVWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:757:1140\t0\tC05HBa0138J03_LR135\t1491\t37\t36M\t*\t0\t0\tGAAAAAACTGAACTGATAAATGCCTACGAAAGATGT\tWWWWWWWWWWVWWWWWWWWWVWWWWVWVVVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:397:1260\t16\tC04HBa239P14_LR111\t7648\t37\t36M\t*\t0\t0\tTGGCCAAGGATTGCAGATTAACTATAGTAAGAAAGC\tUUUSRUVVVVVWWWWWWWWWVWWWWWWWWVWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:370:525\t0\tC05HBa0135A02_LR134\t3139\t37\t36M\t*\t0\t0\tGTATAGGGTTTGAGCTAAAGTTGTTGACTTTAATTG\tWWWWWWWWWWWWWWWWWWWVWWVWWVWVWWUUUUUP\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:782:729\t0\tC04HBa35C16_LR339\t254\t37\t36M\t*\t0\t0\tGACTCTATCCGAAGGTGTTTTTTTGATGGATTAACC\tWWWWWWWWWWWWWWWVWWWWWWWWUVWQQVUUUSUR\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:31C4\n-HWI-EAS337_3:7:1:1602:38\t16\tC07HBa0002M15_LR175\t8458\t37\t36M\t*\t0\t0\tCTTCTGCTCTCTGTCCTTGTACAGGACAGACTCATC\tUURUSUWUVVVRVVVVVVVVVUWWVVWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:204:1509\t16\tC09SLe0068C01_LR272\t1470\t37\t36M\t*\t0\t0\tACTTTGTCACACTTAACCCTGTGAATCCTTGCTCTC\tUUUUUUVWWWWVVWWWWWWVWWWWWWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1525:1263\t0\tC09SLe0085A10_LR364\t5595\t37\t36M\t*\t0\t0\tGGGTAAGAAGCATGTCACATCTATAAATATGAGGCT\tWWWWWWWWWWWWWWWWWWWWWWWWWVWWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:895:1075\t16\tC09SLe0130H12_LR273\t8142\t0\t36M\t*\t0\t0\tTAGACATTTCTGCTCAATTTCGTTTTTTTGCTAGCC\tUUUUUUVVVWWWVWWWWWWWVWWWWWVWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09HBa0116C14_LR240,+1294,36M,0;\n-HWI-EAS337_3:7:1:1204:1920\t16\tC07HBa0002D20_LR197\t3427\t37\t36M\t*\t0\t0\tTGGCTGTCTCCCCTTCTTTTATTCCAGGGGAAAAAC\tUUUUUUVVVVVWWWWVWWWWWWWVVWWWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:115:342\t0\tC06HBa0197N20_LR164\t5182\t37\t36M\t*\t0\t0\tGAATAACCTCCATAGTAGGCTTGAGAAATTGCTTTA\tWWWWWWWWVWVWWWWVVWWVWWWVWWWWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part4.bam
b
Binary file SMART/data/part4.bam has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/part4.sam
--- a/SMART/data/part4.sam Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,448 +0,0 @@\n-@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n-@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n-@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n-@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n-@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n-@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n-@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n-@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n-@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n-@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n-@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n-@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n-@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n-@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n-@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n-@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n-@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n-@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n-@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n-@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n-@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n-@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n-@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n-@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n-@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n-@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n-@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n-@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n-@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n-@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n-@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n-@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n-@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n-@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n-@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n-@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n-@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n-@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n-@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n-@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n-@SQ\tSN:BAC19_LR16\tLN:9760\n-@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n-@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n-@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n-@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n-@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n-@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n-@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n-@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n-@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n-@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n-@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n-@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n-@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n-@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n-@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n-@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n-@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n-@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n-@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n-@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n-@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n-@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n-@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n-@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n-@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n-@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n-@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n-@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n-@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n-@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n-@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n-@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n-@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n-@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n-@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n-@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n-@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n-@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n-@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n-@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n-@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n-@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n-@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n-@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n-@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n-@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n-@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n-@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n-@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n-@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n-@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n-@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n-@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n-@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n-@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n-@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n-@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n-@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n-@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n-@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n-@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n-@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n-@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n-@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n-@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n-@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n-@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n-@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n-@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n-@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n-@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n-@SQ\tSN:C05HBa0006N20_LR128\tLN:1010'..b'\t*\t0\t0\tCTTATTTCTCATGCAACTTTTTTTTTGAAAAGTTTC\tRUUUUSWWVWWWWWWVWWWWWWWVWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-9770,36M,0;\n-HWI-EAS337_3:7:1:5:1770\t0\tC02HBa0185P07_LR40\t1386\t37\t36M\t*\t0\t0\tGTGTGAAAAAGTATTTCATTCACATATTGAGTTGTT\tWWWWWWWWWWVWWWWWWWWWWWWWWWWWVVSUUQUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:115:1005\t0\tC11HBa0161D01_LR292\t3394\t37\t36M\t*\t0\t0\tGATTTTACTGGAATGGGGAAGGGTGAGGCATGGGTG\tWWWWWWWWWVVWWWVVVVWWVVVWWWVVWVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:354:1708\t0\tC09HBa0165P17_LR241\t4985\t37\t36M\t*\t0\t0\tGCATCCGACAGTGACTTAGACGATGAGGAATACGAG\tWWWWWWWWWWWWVWWWWWVWWVWWVWWVWWUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1639:1500\t0\tC11SLm0052K14_LR376\t2483\t37\t36M\t*\t0\t0\tGTGATTATTATCTAACTCTGCAACAGCATCCAGGGA\tWWWWWWVVWWWWWVVWVVWUVVVVVVVVWVUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:766:243\t16\tC07HBa0308M01_LR189\t6701\t37\t36M\t*\t0\t0\tAGCAACAATCTCCAATTTATCTTCCATAGATGCCAC\tUSJUURWWVTVVVWWWWWWWVWWVVWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:920:144\t0\tC05HBa0138J03_LR135\t8153\t37\t36M\t*\t0\t0\tGTGCGATCACACTGTTTATGTTGTTGTTGATCATTG\tWWVVWWWWWVWPWVWWVWWSWVSWWHWWLVUCPUUH\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:389:268\t16\tC12SLeRI72J6_LR378\t6610\t37\t36M\t*\t0\t0\tAAATTCTTTGAGGGTGGTTGCCCTCTCTAATTGACC\tUUUUUSVVWWWWWWWWWWWWVVWWWWVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:294:1868\t0\tC04HBa58E11_LR93\t7287\t25\t36M\t*\t0\t0\tGAAAAAAAATTGTTTGTCTTGAATTAATGTTTCAAT\tVWVWWWWWWWWVQWWWWOWVVWWVWVVWQWUURULU\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:0T0G34\n-HWI-EAS337_3:7:1:1147:62\t0\tC02HBa0204D01_LR334\t6554\t37\t36M\t*\t0\t0\tGAATTCCCCTCAGGTTGGAGTTGTGCACTTGGCACT\tWWWWWWWWWWWWVWWWWWWVWWVWVVWVVWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:787:1759\t16\tC02SLe0018B07_LR335\t8378\t0\t36M\t*\t0\t0\tCAGAAAATCAGAACAGCTACCAATTCTAATAAAACC\tHUUUUUVVTVVWVUWWUWWUVWWWWWWWVWWWWVVW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02HBa0027B01_LR21,+810,36M,0;\n-HWI-EAS337_3:7:1:425:1939\t16\tC09SLe0076N09_LR363\t1546\t0\t36M\t*\t0\t0\tAAGTTTAGCCACATAGACCCAGACACCACAATTAGC\tUUUUUUWVVVWVVWWWVVVVWWWVWWWWVWWVWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-1546,36M,0;\n-HWI-EAS337_3:7:1:187:1132\t0\tC02HBa0027B01_LR21\t10\t0\t36M\t*\t0\t0\tGTGGGAGAGGCAAGGGGCTTGGCTCATATCCTCTTC\tWVWWWWWWWWWWWWWWWWWWWTVWVWWWVVUUUUUU\tXT:A:R\tNM:i:1\tX0:i:2\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:21T14\tXA:Z:C02SLe0018B07_LR335,-9178,36M,1;\n-HWI-EAS337_3:7:1:1739:1840\t0\tC02HBa0072A04_LR26\t2868\t37\t36M\t*\t0\t0\tGGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\tWWVWWWWWWWWWWWWWWVTWWWVVVWWWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1505:1876\t0\tC07SLe0111B06_LR194\t8673\t37\t36M\t*\t0\t0\tGAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\tWWWWWWWWWWWWWWWWWWWWVWWVWWWVVVUUUSUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:447:192\t0\tC09SLm0143I09_LR365\t6957\t0\t36M\t*\t0\t0\tGACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\tWWWWWWWWWWWWWWWTWWWVVWWVWWWWWVUUUUUU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,+6957,36M,0;\n-HWI-EAS337_3:7:1:21:2019\t16\tC09SLm0037I08_LR367\t1298\t37\t36M\t*\t0\t0\tGGGCTGGAAGACAGGTTATCATCTTTTACCTCATAC\tUUURUUWWWVVQWWWWWWWWWWWWWWWVVWWVVWWV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1593:652\t0\tC04HBa8K13_LR338\t2175\t37\t36M\t*\t0\t0\tGTGATGAGTAAAACATCATCATATGAACTTGAAGAG\tWWWVWVWWVWVWWVWWWWWWVVWWVWWVWWUUUSUU\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:28A7\n-HWI-EAS337_3:7:1:1254:1660\t0\tC12HBa326K10_LR306\t8100\t37\t36M\t*\t0\t0\tGAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\tWWWVVWWWWWWWWWWWVWVWVUWWWTWVQWUUUUMU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:291:629\t4\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\tWWWWWWWWWWVVVWVWVVWTWWKOVVTRVSUSSMFR\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/samFile.sam
--- a/SMART/data/samFile.sam Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,1448 +0,0 @@\n-@SQ\tSN:C10HBa0111D09_LR276\tLN:9300\n-@SQ\tSN:C11HBa0029C01_LR281\tLN:10969\n-@SQ\tSN:C11HBa0034I10_LR282\tLN:9056\n-@SQ\tSN:C11HBa0054I23_LR283\tLN:10301\n-@SQ\tSN:C11HBa0062I24_LR284\tLN:10050\n-@SQ\tSN:C11HBa0064J13_LR285\tLN:9385\n-@SQ\tSN:C11HBa0072I13_LR286\tLN:9556\n-@SQ\tSN:C11HBa0089M02_LR287\tLN:9244\n-@SQ\tSN:C11HBa0096D22_LR288\tLN:9184\n-@SQ\tSN:C11HBa0107K14_LR289\tLN:9115\n-@SQ\tSN:C11HBa0139J14_LR291\tLN:10002\n-@SQ\tSN:C11HBa0143O06_LR374\tLN:10785\n-@SQ\tSN:C11HBa0161D01_LR292\tLN:9057\n-@SQ\tSN:C11HBa0168B23_LR293\tLN:9826\n-@SQ\tSN:C11HBa0190J03_LR294\tLN:10992\n-@SQ\tSN:C11HBa0249E07_LR279\tLN:10008\n-@SQ\tSN:C11HBa0303G16_LR296\tLN:9430\n-@SQ\tSN:C11HBa0323E19_LR297\tLN:9657\n-@SQ\tSN:C11SLe0053P22_LR298\tLN:9827\n-@SQ\tSN:C11SLm0052K14_LR376\tLN:10013\n-@SQ\tSN:C12HBa115G22_LR301\tLN:10021\n-@SQ\tSN:C12HBa120K4_LR313\tLN:10271\n-@SQ\tSN:C12HBa144B17_LR302\tLN:9247\n-@SQ\tSN:C12HBa149G24_LR381\tLN:9271\n-@SQ\tSN:C12HBa165B12_LR303\tLN:9257\n-@SQ\tSN:C12HBa183M6_LR379\tLN:9473\n-@SQ\tSN:C12HBa221M9_LR377\tLN:10755\n-@SQ\tSN:C12HBa224N6_LR382\tLN:9130\n-@SQ\tSN:C12HBa26C13_LR299\tLN:9139\n-@SQ\tSN:C12HBa326K10_LR306\tLN:10414\n-@SQ\tSN:C12HBa90D9_LR311\tLN:9638\n-@SQ\tSN:C12HBa93P12_LR312\tLN:9510\n-@SQ\tSN:C12SLe124D18_LR385\tLN:10545\n-@SQ\tSN:C12SLeRI72J6_LR378\tLN:9337\n-@SQ\tSN:C12SLm103K8_LR380\tLN:10118\n-@SQ\tSN:C01HBa0003D15_LR7\tLN:10776\n-@SQ\tSN:C01HBa0163B20_LR10\tLN:9321\n-@SQ\tSN:C01HBa0216G16_LR11\tLN:10332\n-@SQ\tSN:C01HBa0256E08_LR13\tLN:9024\n-@SQ\tSN:C01HBa0329A12_LR14\tLN:9536\n-@SQ\tSN:BAC19_LR16\tLN:9760\n-@SQ\tSN:C02HBa0008G02_LR67\tLN:9205\n-@SQ\tSN:C02HBa0011O23_LR68\tLN:9399\n-@SQ\tSN:C02HBa0016A12_LR19\tLN:9822\n-@SQ\tSN:C02HBa0027B01_LR21\tLN:9222\n-@SQ\tSN:C02HBa0030A21_LR22\tLN:9147\n-@SQ\tSN:C02HBa0046M08_LR23\tLN:10763\n-@SQ\tSN:C02HBa0072A04_LR26\tLN:9766\n-@SQ\tSN:C02HBa0075D08_LR28\tLN:10744\n-@SQ\tSN:C02HBa0124N09_LR31\tLN:9335\n-@SQ\tSN:C02HBa0155D20_LR36\tLN:10743\n-@SQ\tSN:C02HBa0155E05_LR37\tLN:10417\n-@SQ\tSN:C02HBa0164H08_LR38\tLN:10279\n-@SQ\tSN:C02HBa0167J21_LR39\tLN:9925\n-@SQ\tSN:C02HBa0185P07_LR40\tLN:9818\n-@SQ\tSN:C02HBa0190N21_LR41\tLN:10835\n-@SQ\tSN:C02HBa0190P16_LR331\tLN:10808\n-@SQ\tSN:C02HBa0194L19_LR42\tLN:10280\n-@SQ\tSN:C02HBa0204A09_LR332\tLN:10029\n-@SQ\tSN:C02HBa0204D01_LR334\tLN:9746\n-@SQ\tSN:C02HBa0214B22_LR325\tLN:9581\n-@SQ\tSN:C02HBa0215M12_LR319\tLN:9918\n-@SQ\tSN:C02HBa0228I09_LR329\tLN:10933\n-@SQ\tSN:C02HBa0236E02_LR326\tLN:9822\n-@SQ\tSN:C02HBa0284G15_LR47\tLN:9034\n-@SQ\tSN:C02HBa0291P19_LR48\tLN:9826\n-@SQ\tSN:C02HBa0329G05_LR52\tLN:9637\n-@SQ\tSN:C02SLe0010H16_LR53\tLN:10744\n-@SQ\tSN:C02SLe0018B07_LR335\tLN:9222\n-@SQ\tSN:C02SLe0034H10_LR327\tLN:10833\n-@SQ\tSN:C02SLe0127J16_LR59\tLN:10965\n-@SQ\tSN:C02SLe0132D01_LR60\tLN:10524\n-@SQ\tSN:C02SLm0057H03_LR336\tLN:9514\n-@SQ\tSN:C02SLm0057H03_LR64\tLN:9170\n-@SQ\tSN:C02SLm0057H03_LR65\tLN:9532\n-@SQ\tSN:C03HBa0012D06_LR72\tLN:10645\n-@SQ\tSN:C03HBa0030O03_LR74\tLN:10569\n-@SQ\tSN:C03HBa0034B23_LR76\tLN:10005\n-@SQ\tSN:C03HBa0040F22_LR77\tLN:10227\n-@SQ\tSN:C03HBa0054O21_LR78\tLN:9044\n-@SQ\tSN:C03HBa0076J13_LR79\tLN:10097\n-@SQ\tSN:C03HBa0233O20_LR82\tLN:9753\n-@SQ\tSN:C03HBa0295I12_LR83\tLN:10258\n-@SQ\tSN:C03HBa0318C22_LR84\tLN:10004\n-@SQ\tSN:C03HBa0323D22_LR85\tLN:9222\n-@SQ\tSN:C04HBa127N12_LR346\tLN:10533\n-@SQ\tSN:C04HBa132O11_LR104\tLN:10306\n-@SQ\tSN:C04HBa164O3_LR344\tLN:9345\n-@SQ\tSN:C04HBa190C13_LR106\tLN:10719\n-@SQ\tSN:C04HBa198I15_LR107\tLN:10673\n-@SQ\tSN:C04HBa219H8_LR109\tLN:10174\n-@SQ\tSN:C04HBa239P14_LR111\tLN:10483\n-@SQ\tSN:C04HBa255I2_LR112\tLN:10650\n-@SQ\tSN:C04HBa27G19_LR337\tLN:9788\n-@SQ\tSN:C04HBa2G1_LR120\tLN:9322\n-@SQ\tSN:C04HBa331L22_LR115\tLN:10697\n-@SQ\tSN:C04HBa35C16_LR339\tLN:9494\n-@SQ\tSN:C04HBa36C23_LR91\tLN:10103\n-@SQ\tSN:C04HBa50I18_LR341\tLN:10825\n-@SQ\tSN:C04HBa58E11_LR93\tLN:9927\n-@SQ\tSN:C04HBa66O12_LR94\tLN:9355\n-@SQ\tSN:C04HBa68N5_LR343\tLN:9886\n-@SQ\tSN:C04HBa6E18_LR87\tLN:9265\n-@SQ\tSN:C04HBa6O16_LR123\tLN:10386\n-@SQ\tSN:C04HBa78E4_LR98\tLN:9994\n-@SQ\tSN:C04HBa78J4_LR99\tLN:9165\n-@SQ\tSN:C04HBa80D3_LR100\tLN:9781\n-@SQ\tSN:C04HBa8K13_LR338\tLN:9345\n-@SQ\tSN:C04HBa96I8_LR101\tLN:9693\n-@SQ\tSN:C04SLm14G22_LR116\tLN:10306\n-@SQ\tSN:C04SLm39E17_LR117\tLN:9105\n-@SQ\tSN:C05HBa0003C20_LR126\tLN:9460\n-@SQ\tSN:C05HBa0006N20_LR128\tLN:101'..b'\t*\t0\t0\tCTTATTTCTCATGCAACTTTTTTTTTGAAAAGTTTC\tRUUUUSWWVWWWWWWVWWWWWWWVWWWWWWWWWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-9770,36M,0;\n-HWI-EAS337_3:7:1:5:1770\t0\tC02HBa0185P07_LR40\t1386\t37\t36M\t*\t0\t0\tGTGTGAAAAAGTATTTCATTCACATATTGAGTTGTT\tWWWWWWWWWWVWWWWWWWWWWWWWWWWWVVSUUQUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:115:1005\t0\tC11HBa0161D01_LR292\t3394\t37\t36M\t*\t0\t0\tGATTTTACTGGAATGGGGAAGGGTGAGGCATGGGTG\tWWWWWWWWWVVWWWVVVVWWVVVWWWVVWVUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:354:1708\t0\tC09HBa0165P17_LR241\t4985\t37\t36M\t*\t0\t0\tGCATCCGACAGTGACTTAGACGATGAGGAATACGAG\tWWWWWWWWWWWWVWWWWWVWWVWWVWWVWWUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1639:1500\t0\tC11SLm0052K14_LR376\t2483\t37\t36M\t*\t0\t0\tGTGATTATTATCTAACTCTGCAACAGCATCCAGGGA\tWWWWWWVVWWWWWVVWVVWUVVVVVVVVWVUUUUUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:766:243\t16\tC07HBa0308M01_LR189\t6701\t37\t36M\t*\t0\t0\tAGCAACAATCTCCAATTTATCTTCCATAGATGCCAC\tUSJUURWWVTVVVWWWWWWWVWWVVWWWWWWWWVWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:920:144\t0\tC05HBa0138J03_LR135\t8153\t37\t36M\t*\t0\t0\tGTGCGATCACACTGTTTATGTTGTTGTTGATCATTG\tWWVVWWWWWVWPWVWWVWWSWVSWWHWWLVUCPUUH\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:389:268\t16\tC12SLeRI72J6_LR378\t6610\t37\t36M\t*\t0\t0\tAAATTCTTTGAGGGTGGTTGCCCTCTCTAATTGACC\tUUUUUSVVWWWWWWWWWWWWVVWWWWVWWWWWWWWW\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:294:1868\t0\tC04HBa58E11_LR93\t7287\t25\t36M\t*\t0\t0\tGAAAAAAAATTGTTTGTCTTGAATTAATGTTTCAAT\tVWVWWWWWWWWVQWWWWOWVVWWVWVVWQWUURULU\tXT:A:U\tNM:i:2\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:0T0G34\n-HWI-EAS337_3:7:1:1147:62\t0\tC02HBa0204D01_LR334\t6554\t37\t36M\t*\t0\t0\tGAATTCCCCTCAGGTTGGAGTTGTGCACTTGGCACT\tWWWWWWWWWWWWVWWWWWWVWWVWVVWVVWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:787:1759\t16\tC02SLe0018B07_LR335\t8378\t0\t36M\t*\t0\t0\tCAGAAAATCAGAACAGCTACCAATTCTAATAAAACC\tHUUUUUVVTVVWVUWWUWWUVWWWWWWWVWWWWVVW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C02HBa0027B01_LR21,+810,36M,0;\n-HWI-EAS337_3:7:1:425:1939\t16\tC09SLe0076N09_LR363\t1546\t0\t36M\t*\t0\t0\tAAGTTTAGCCACATAGACCCAGACACCACAATTAGC\tUUUUUUWVVVWVVWWWVVVVWWWVWWWWVWWVWWWW\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLm0143I09_LR365,-1546,36M,0;\n-HWI-EAS337_3:7:1:187:1132\t0\tC02HBa0027B01_LR21\t10\t0\t36M\t*\t0\t0\tGTGGGAGAGGCAAGGGGCTTGGCTCATATCCTCTTC\tWVWWWWWWWWWWWWWWWWWWWTVWVWWWVVUUUUUU\tXT:A:R\tNM:i:1\tX0:i:2\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:21T14\tXA:Z:C02SLe0018B07_LR335,-9178,36M,1;\n-HWI-EAS337_3:7:1:1739:1840\t0\tC02HBa0072A04_LR26\t2868\t37\t36M\t*\t0\t0\tGGAGGGGTGAAATCGTTTCTGAAAAATAATGAAATG\tWWVWWWWWWWWWWWWWWVTWWWVVVWWWWWUUUUUU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1505:1876\t0\tC07SLe0111B06_LR194\t8673\t37\t36M\t*\t0\t0\tGAAAGATCAAGTGTTGTCAAGTTCACTAGTTTAGAG\tWWWWWWWWWWWWWWWWWWWWVWWVWWWVVVUUUSUR\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:447:192\t0\tC09SLm0143I09_LR365\t6957\t0\t36M\t*\t0\t0\tGACTATGCCTAGCAGCTGAAATCACCACAACAAGTT\tWWWWWWWWWWWWWWWTWWWVVWWVWWWWWVUUUUUU\tXT:A:R\tNM:i:0\tX0:i:2\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\tXA:Z:C09SLe0076N09_LR363,+6957,36M,0;\n-HWI-EAS337_3:7:1:21:2019\t16\tC09SLm0037I08_LR367\t1298\t37\t36M\t*\t0\t0\tGGGCTGGAAGACAGGTTATCATCTTTTACCTCATAC\tUUURUUWWWVVQWWWWWWWWWWWWWWWVVWWVVWWV\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:1593:652\t0\tC04HBa8K13_LR338\t2175\t37\t36M\t*\t0\t0\tGTGATGAGTAAAACATCATCATATGAACTTGAAGAG\tWWWVWVWWVWVWWVWWWWWWVVWWVWWVWWUUUSUU\tXT:A:U\tNM:i:1\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:28A7\n-HWI-EAS337_3:7:1:1254:1660\t0\tC12HBa326K10_LR306\t8100\t37\t36M\t*\t0\t0\tGAAGTTTGTAATTCCTTTTAGGATTGTGGTTAACAT\tWWWVVWWWWWWWWWWWVWVWVUWWWTWVQWUUUUMU\tXT:A:U\tNM:i:0\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:36\n-HWI-EAS337_3:7:1:291:629\t4\t*\t0\t0\t*\t*\t0\t0\tGTAGAGGAGGTAGGCTTGGTGGTCCCTCTATGGTAA\tWWWWWWWWWWVVVWVWVVWTWWKOVVTRVSUSSMFR\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/sortedBamFile.bam
b
Binary file SMART/data/sortedBamFile.bam has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/test.gff.gff3
--- a/SMART/data/test.gff.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,218 +0,0 @@\n-C02HBa0185P07_LR40\tSMART\ttranscript\t3889\t3924\t36\t-\t.\tName=HWI-EAS337_3:7:1:415:1217;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:415:1217;identity=100\n-C11SLe0053P22_LR298\tSMART\ttranscript\t2130\t2165\t36\t-\t.\tName=HWI-EAS337_3:7:1:1178:755;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1178:755;identity=100\n-C06HBa0144J05_LR355\tSMART\ttranscript\t1\t36\t36\t+\t.\tName=HWI-EAS337_3:7:1:277:1259;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:277:1259;identity=100\n-C08HBa0165B06_LR218\tSMART\ttranscript\t3619\t3654\t36\t-\t.\tName=HWI-EAS337_3:7:1:447:1231;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:447:1231;identity=100\n-C02HBa0329G05_LR52\tSMART\ttranscript\t4746\t4781\t36\t-\t.\tName=HWI-EAS337_3:7:1:1154:1517;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1154:1517;identity=100\n-C04HBa80D3_LR100\tSMART\ttranscript\t423\t458\t36\t-\t.\tName=HWI-EAS337_3:7:1:164:1869;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:164:1869;identity=100\n-C01HBa0216G16_LR11\tSMART\ttranscript\t648\t683\t36\t-\t.\tName=HWI-EAS337_3:7:1:415:1194;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:415:1194;identity=100\n-C05HBa0145P19_LR136\tSMART\ttranscript\t3686\t3721\t36\t-\t.\tName=HWI-EAS337_3:7:1:645:1892;quality=25;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=2;ID=HWI-EAS337_3:7:1:645:1892;identity=94\n-C08HBa0012O06_LR211\tSMART\ttranscript\t1768\t1803\t36\t-\t.\tName=HWI-EAS337_3:7:1:33:1446;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWI-EAS337_3:7:1:33:1446;identity=97\n-C09HBa0194K19_LR362\tSMART\ttranscript\t9168\t9203\t36\t+\t.\tName=HWI-EAS337_3:7:1:1194:1427;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1194:1427;identity=100\n-C09SLm0018L06_LR366\tSMART\ttranscript\t5034\t5069\t36\t+\t.\tName=HWI-EAS337_3:7:1:624:1913;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:624:1913;identity=100\n-C09SLe0085A10_LR364\tSMART\ttranscript\t6700\t6735\t36\t-\t.\tName=HWI-EAS337_3:7:1:437:1202;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:437:1202;identity=100\n-C09HBa0113I06_LR360\tSMART\ttranscript\t1764\t1799\t36\t-\t.\tName=HWI-EAS337_3:7:1:1386:1787;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1386:1787;identity=100\n-C11HBa0143O06_LR374\tSMART\ttranscript\t8925\t8960\t36\t-\t.\tName=HWI-EAS337_3:7:1:227:1155;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:227:1155;identity=100\n-C06HBa0066D13_LR353\tSMART\ttranscript\t6619\t6654\t36\t-\t.\tName=HWI-EAS337_3:7:1:472:1025;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:472:1025;identity=100\n-C07SLe0099J13_LR193\tSMART\ttranscript\t3528\t3563\t36\t+\t.\tName=HWI-EAS337_3:7:1:220:1482;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:220:1482;identity=100\n-C07HBa0224G23_LR186\tSMART\ttranscript\t9232\t9267\t36\t-\t.\tName=HWI-EAS337_3:7:1:1699:1966;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:1699:1966;identity=100\n-C07HBa0224G23_LR186\tSMART\ttranscript\t3761\t3796\t36\t-\t.\tName=HWI-EAS337_3:7:1:547:1084;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:547:1084;identity=100\n-C02HBa0291P19_LR48\tSMART\ttranscript\t131\t166\t36\t+\t.\tName=HWI-EAS337_3:7:1:464:1097;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:464:1097;identity=100\n-C12SLm103K8_LR380\tSMART\ttranscript\t7346\t7381\t36\t+\t.\tName=HWI-EAS337_3:7:1:171:1480;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:171:1480;identity=100\n-C11HBa0143O06_LR374\tSMART\ttranscript\t7925\t7960\t36\t-\t.\tName'..b'=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1254:1491;identity=100\n-C12HBa93P12_LR312\tSMART\ttranscript\t4116\t4151\t36\t+\t.\tName=HWI-EAS337_3:7:1:42:1990;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:42:1990;identity=100\n-C09SLe0130H12_LR273\tSMART\ttranscript\t3257\t3292\t36\t-\t.\tName=HWI-EAS337_3:7:1:1319:766;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:1319:766;identity=100\n-C07HBa0309B15_LR190\tSMART\ttranscript\t4202\t4237\t36\t-\t.\tName=HWI-EAS337_3:7:1:1567:1795;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1567:1795;identity=100\n-C04HBa96I8_LR101\tSMART\ttranscript\t4961\t4996\t36\t+\t.\tName=HWI-EAS337_3:7:1:216:392;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:216:392;identity=100\n-C04HBa50I18_LR341\tSMART\ttranscript\t2928\t2963\t36\t-\t.\tName=HWI-EAS337_3:7:1:38:1803;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:38:1803;identity=100\n-C06HBa0217M17_LR166\tSMART\ttranscript\t4141\t4176\t36\t-\t.\tName=HWI-EAS337_3:7:1:425:1196;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:425:1196;identity=100\n-C02HBa0072A04_LR26\tSMART\ttranscript\t2348\t2383\t36\t-\t.\tName=HWI-EAS337_3:7:1:181:410;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:181:410;identity=100\n-C09SLe0130H12_LR273\tSMART\ttranscript\t4026\t4061\t36\t-\t.\tName=HWI-EAS337_3:7:1:1065:1826;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:1065:1826;identity=100\n-C02HBa0027B01_LR21\tSMART\ttranscript\t4337\t4372\t36\t-\t.\tName=HWI-EAS337_3:7:1:79:1444;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=2;nbMismatches=0;ID=HWI-EAS337_3:7:1:79:1444;identity=100\n-C09SLe0085A10_LR364\tSMART\ttranscript\t607\t642\t36\t-\t.\tName=HWI-EAS337_3:7:1:1634:1526;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1634:1526;identity=100\n-C12HBa326K10_LR306\tSMART\ttranscript\t6488\t6523\t36\t-\t.\tName=HWI-EAS337_3:7:1:462:1320;quality=25;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=2;ID=HWI-EAS337_3:7:1:462:1320;identity=94\n-C04HBa50I18_LR341\tSMART\ttranscript\t1\t36\t36\t+\t.\tName=HWI-EAS337_3:7:1:532:1095;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:532:1095;identity=100\n-C07HBa0002M15_LR175\tSMART\ttranscript\t7167\t7202\t36\t-\t.\tName=HWI-EAS337_3:7:1:832:1960;quality=0;bestRegion=(self);nbGaps=0;nbOccurrences=3;nbMismatches=0;ID=HWI-EAS337_3:7:1:832:1960;identity=100\n-C06HBa0120H21_LR161\tSMART\ttranscript\t186\t221\t36\t-\t.\tName=HWI-EAS337_3:7:1:1312:645;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1312:645;identity=100\n-C12HBa326K10_LR306\tSMART\ttranscript\t5231\t5266\t36\t+\t.\tName=HWI-EAS337_3:7:1:1107:226;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:1107:226;identity=100\n-C09SLm0008K04_LR274\tSMART\ttranscript\t10575\t10610\t36\t+\t.\tName=HWI-EAS337_3:7:1:274:1287;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:274:1287;identity=100\n-C06HBa0217M17_LR166\tSMART\ttranscript\t3360\t3395\t36\t-\t.\tName=HWI-EAS337_3:7:1:1704:1373;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=1;ID=HWI-EAS337_3:7:1:1704:1373;identity=97\n-C06HBa0066I09_LR156\tSMART\ttranscript\t5444\t5479\t36\t-\t.\tName=HWI-EAS337_3:7:1:241:903;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:241:903;identity=100\n-C08HBa0239G21_LR221\tSMART\ttranscript\t4431\t4466\t36\t-\t.\tName=HWI-EAS337_3:7:1:404:1924;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:404:1924;identity=100\n-C02HBa0190P16_LR331\tSMART\ttranscript\t3279\t3314\t36\t+\t.\tName=HWI-EAS337_3:7:1:23:1455;quality=37;bestRegion=(self);nbGaps=0;nbOccurrences=1;nbMismatches=0;ID=HWI-EAS337_3:7:1:23:1455;identity=100\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/test_clusterize.gff3
--- a/SMART/data/test_clusterize.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,213 +0,0 @@\n-C12HBa326K10_LR306\tS-MART\ttranscript\t3066\t3101\t36\t-\t.\tName=HWI-EAS337_3:7:1:263:1275;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:263:1275;identity=100\n-C12HBa326K10_LR306\tS-MART\ttranscript\t3148\t3183\t36\t-\t.\tName=HWI-EAS337_3:7:1:1262:1508;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1262:1508;identity=100\n-C12HBa326K10_LR306\tS-MART\ttranscript\t4561\t4596\t36\t-\t.\tName=HWI-EAS337_3:7:1:1187:1977;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=6;quality=0;ID=HWI-EAS337_3:7:1:1187:1977;identity=100\n-C12HBa326K10_LR306\tS-MART\ttranscript\t5231\t5266\t36\t+\t.\tName=HWI-EAS337_3:7:1:1107:226;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1107:226;identity=100\n-C12HBa326K10_LR306\tS-MART\ttranscript\t6488\t6523\t36\t-\t.\tName=HWI-EAS337_3:7:1:462:1320;nbMismatches=2;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=25;ID=HWI-EAS337_3:7:1:462:1320;identity=94\n-C07HBa0130B18_LR183\tS-MART\ttranscript\t9167\t9202\t36\t+\t.\tName=HWI-EAS337_3:7:1:65:1436;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:65:1436;identity=100\n-C09HBa0109D11_LR262\tS-MART\ttranscript\t10202\t10237\t36\t-\t.\tName=HWI-EAS337_3:7:1:172:1019;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:172:1019;identity=100\n-C04HBa50I18_LR341\tS-MART\ttranscript\t1\t36\t36\t+\t.\tName=HWI-EAS337_3:7:1:532:1095;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:532:1095;identity=100\n-C04HBa50I18_LR341\tS-MART\ttranscript\t2928\t2963\t36\t-\t.\tName=HWI-EAS337_3:7:1:38:1803;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:38:1803;identity=100\n-C04HBa50I18_LR341\tS-MART\ttranscript\t4171\t4206\t36\t+\t.\tName=HWI-EAS337_3:7:1:37:1418;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:37:1418;identity=100\n-C04HBa6E18_LR87\tS-MART\ttranscript\t3416\t3451\t36\t+\t.\tName=HWI-EAS337_3:7:1:4:1451;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:4:1451;identity=100\n-C04HBa6E18_LR87\tS-MART\ttranscript\t4296\t4331\t36\t-\t.\tName=HWI-EAS337_3:7:1:1526:1772;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1526:1772;identity=100\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t3107\t3142\t36\t+\t.\tName=HWI-EAS337_3:7:1:300:1184;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:300:1184;identity=100\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t3360\t3395\t36\t-\t.\tName=HWI-EAS337_3:7:1:1704:1373;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1704:1373;identity=97\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t4141\t4176\t36\t-\t.\tName=HWI-EAS337_3:7:1:425:1196;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:425:1196;identity=100\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t5919\t5954\t36\t-\t.\tName=HWI-EAS337_3:7:1:498:810;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:498:810;identity=97\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t6075\t6110\t36\t-\t.\tName=HWI-EAS337_3:7:1:364:1210;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:364:1210;identity=100\n-C04HBa219H8_LR109\tS-MART\ttranscript\t8651\t8686\t36\t+\t.\tName=HWI-EAS337_3:7:1:1160:1471;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences='..b'_LR115\tS-MART\ttranscript\t10241\t10276\t36\t-\t.\tName=HWI-EAS337_3:7:1:1160:1426;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1160:1426;identity=100\n-C02SLe0132D01_LR60\tS-MART\ttranscript\t5644\t5679\t36\t+\t.\tName=HWI-EAS337_3:7:1:644:33;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:644:33;identity=100\n-C02HBa0167J21_LR39\tS-MART\ttranscript\t836\t871\t36\t-\t.\tName=HWI-EAS337_3:7:1:395:1182--HWI-EAS337_3:7:1:736:505;nbElements=2;score=36;quality=37;feature=transcript;ID=HWI-EAS337_3:7:1:395:1182\n-C02HBa0167J21_LR39\tS-MART\ttranscript\t1972\t2007\t36\t-\t.\tName=HWI-EAS337_3:7:1:647:1863;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:647:1863;identity=100\n-C02SLe0018B07_LR335\tS-MART\ttranscript\t8723\t8758\t36\t-\t.\tName=HWI-EAS337_3:7:1:1633:1841;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1633:1841;identity=100\n-C02HBa0155E05_LR37\tS-MART\ttranscript\t4273\t4308\t36\t-\t.\tName=HWI-EAS337_3:7:1:34:1851;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=23;ID=HWI-EAS337_3:7:1:34:1851;identity=97\n-C08SLm0118A18_LR226\tS-MART\ttranscript\t4396\t4431\t36\t+\t.\tName=HWI-EAS337_3:7:1:600:1107;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:600:1107;identity=100\n-C02HBa0027B01_LR21\tS-MART\ttranscript\t1887\t1922\t36\t+\t.\tName=HWI-EAS337_3:7:1:1545:1519;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1545:1519;identity=97\n-C02HBa0027B01_LR21\tS-MART\ttranscript\t4337\t4372\t36\t-\t.\tName=HWI-EAS337_3:7:1:79:1444;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:79:1444;identity=100\n-C02HBa0027B01_LR21\tS-MART\ttranscript\t5114\t5149\t36\t-\t.\tName=HWI-EAS337_3:7:1:1177:1504;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1177:1504;identity=100\n-C02HBa0027B01_LR21\tS-MART\ttranscript\t5948\t5983\t36\t-\t.\tName=HWI-EAS337_3:7:1:31:1659;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:31:1659;identity=100\n-C07SLe0111B06_LR194\tS-MART\ttranscript\t8883\t8918\t36\t+\t.\tName=HWI-EAS337_3:7:1:1477:1673;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1477:1673;identity=100\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t932\t967\t36\t-\t.\tName=HWI-EAS337_3:7:1:141:1433;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:141:1433;identity=100\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t2303\t2338\t36\t-\t.\tName=HWI-EAS337_3:7:1:1254:1491;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1254:1491;identity=100\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t2348\t2383\t36\t-\t.\tName=HWI-EAS337_3:7:1:181:410;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:181:410;identity=100\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t6848\t6883\t36\t-\t.\tName=HWI-EAS337_3:7:1:373:2009;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:373:2009;identity=97\n-C08HBa0165B06_LR218\tS-MART\ttranscript\t3619\t3654\t36\t-\t.\tName=HWI-EAS337_3:7:1:447:1231;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:447:1231;identity=100\n-C08HBa0165B06_LR218\tS-MART\ttranscript\t8558\t8593\t36\t+\t.\tName=HWI-EAS337_3:7:1:538:1054;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:538:1054;identity=97\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/data/test_clusterize2.gff3
--- a/SMART/data/test_clusterize2.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,214 +0,0 @@\n-C12HBa326K10_LR306\tS-MART\ttranscript\t3066\t3101\t36\t-\t.\tName=HWI-EAS337_3:7:1:263:1275;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:263:1275;identity=100\n-C12HBa326K10_LR306\tS-MART\ttranscript\t3148\t3183\t36\t-\t.\tName=HWI-EAS337_3:7:1:1262:1508;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1262:1508;identity=100\n-C12HBa326K10_LR306\tS-MART\ttranscript\t4561\t4596\t36\t-\t.\tName=HWI-EAS337_3:7:1:1187:1977;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=6;quality=0;ID=HWI-EAS337_3:7:1:1187:1977;identity=100\n-C12HBa326K10_LR306\tS-MART\ttranscript\t5231\t5266\t36\t+\t.\tName=HWI-EAS337_3:7:1:1107:226;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1107:226;identity=100\n-C12HBa326K10_LR306\tS-MART\ttranscript\t6488\t6523\t36\t-\t.\tName=HWI-EAS337_3:7:1:462:1320;nbMismatches=2;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=25;ID=HWI-EAS337_3:7:1:462:1320;identity=94\n-C07HBa0130B18_LR183\tS-MART\ttranscript\t9167\t9202\t36\t+\t.\tName=HWI-EAS337_3:7:1:65:1436;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:65:1436;identity=100\n-C09HBa0109D11_LR262\tS-MART\ttranscript\t10202\t10237\t36\t-\t.\tName=HWI-EAS337_3:7:1:172:1019;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:172:1019;identity=100\n-C04HBa50I18_LR341\tS-MART\ttranscript\t1\t36\t36\t+\t.\tName=HWI-EAS337_3:7:1:532:1095;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:532:1095;identity=100\n-C04HBa50I18_LR341\tS-MART\ttranscript\t2928\t2963\t36\t-\t.\tName=HWI-EAS337_3:7:1:38:1803;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:38:1803;identity=100\n-C04HBa50I18_LR341\tS-MART\ttranscript\t4171\t4206\t36\t+\t.\tName=HWI-EAS337_3:7:1:37:1418;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:37:1418;identity=100\n-C04HBa6E18_LR87\tS-MART\ttranscript\t3416\t3451\t36\t+\t.\tName=HWI-EAS337_3:7:1:4:1451;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:4:1451;identity=100\n-C04HBa6E18_LR87\tS-MART\ttranscript\t4296\t4331\t36\t-\t.\tName=HWI-EAS337_3:7:1:1526:1772;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1526:1772;identity=100\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t3107\t3142\t36\t+\t.\tName=HWI-EAS337_3:7:1:300:1184;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:300:1184;identity=100\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t3360\t3395\t36\t-\t.\tName=HWI-EAS337_3:7:1:1704:1373;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1704:1373;identity=97\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t4141\t4176\t36\t-\t.\tName=HWI-EAS337_3:7:1:425:1196;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:425:1196;identity=100\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t5919\t5954\t36\t-\t.\tName=HWI-EAS337_3:7:1:498:810;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:498:810;identity=97\n-C06HBa0217M17_LR166\tS-MART\ttranscript\t6075\t6110\t36\t-\t.\tName=HWI-EAS337_3:7:1:364:1210;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:364:1210;identity=100\n-C04HBa219H8_LR109\tS-MART\ttranscript\t8651\t8686\t36\t+\t.\tName=HWI-EAS337_3:7:1:1160:1471;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences='..b'_LR115\tS-MART\ttranscript\t10241\t10276\t36\t-\t.\tName=HWI-EAS337_3:7:1:1160:1426;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1160:1426;identity=100\n-C02SLe0132D01_LR60\tS-MART\ttranscript\t5644\t5679\t36\t+\t.\tName=HWI-EAS337_3:7:1:644:33;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:644:33;identity=100\n-C02HBa0167J21_LR39\tS-MART\ttranscript\t836\t871\t36\t-\t.\tName=HWI-EAS337_3:7:1:395:1182--HWI-EAS337_3:7:1:736:505;nbElements=2;score=36;quality=37;feature=transcript;ID=HWI-EAS337_3:7:1:395:1182\n-C02HBa0167J21_LR39\tS-MART\ttranscript\t1972\t2007\t36\t-\t.\tName=HWI-EAS337_3:7:1:647:1863;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:647:1863;identity=100\n-C02SLe0018B07_LR335\tS-MART\ttranscript\t8723\t8758\t36\t-\t.\tName=HWI-EAS337_3:7:1:1633:1841;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1633:1841;identity=100\n-C02HBa0155E05_LR37\tS-MART\ttranscript\t4273\t4308\t36\t-\t.\tName=HWI-EAS337_3:7:1:34:1851;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=23;ID=HWI-EAS337_3:7:1:34:1851;identity=97\n-C08SLm0118A18_LR226\tS-MART\ttranscript\t4396\t4431\t36\t+\t.\tName=HWI-EAS337_3:7:1:600:1107;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:600:1107;identity=100\n-C02HBa0027B01_LR21\tS-MART\ttranscript\t1887\t1922\t36\t+\t.\tName=HWI-EAS337_3:7:1:1545:1519;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1545:1519;identity=97\n-C02HBa0027B01_LR21\tS-MART\ttranscript\t4337\t4372\t36\t-\t.\tName=HWI-EAS337_3:7:1:79:1444;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:79:1444;identity=100\n-C02HBa0027B01_LR21\tS-MART\ttranscript\t5114\t5149\t36\t-\t.\tName=HWI-EAS337_3:7:1:1177:1504;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:1177:1504;identity=100\n-C02HBa0027B01_LR21\tS-MART\ttranscript\t5948\t5983\t36\t-\t.\tName=HWI-EAS337_3:7:1:31:1659;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=2;quality=0;ID=HWI-EAS337_3:7:1:31:1659;identity=100\n-C07SLe0111B06_LR194\tS-MART\ttranscript\t8883\t8918\t36\t+\t.\tName=HWI-EAS337_3:7:1:1477:1673;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1477:1673;identity=100\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t932\t967\t36\t-\t.\tName=HWI-EAS337_3:7:1:141:1433;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:141:1433;identity=100\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t2303\t2338\t36\t-\t.\tName=HWI-EAS337_3:7:1:1254:1491;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:1254:1491;identity=100\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t2348\t2383\t36\t-\t.\tName=HWI-EAS337_3:7:1:181:410;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:181:410;identity=100\n-C02HBa0072A04_LR26\tS-MART\ttranscript\t6848\t6883\t36\t-\t.\tName=HWI-EAS337_3:7:1:373:2009;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:373:2009;identity=97\n-C08HBa0165B06_LR218\tS-MART\ttranscript\t3619\t3654\t36\t-\t.\tName=HWI-EAS337_3:7:1:447:1231;nbMismatches=0;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:447:1231;identity=100\n-C08HBa0165B06_LR218\tS-MART\ttranscript\t8558\t8593\t36\t+\t.\tName=HWI-EAS337_3:7:1:538:1054;nbMismatches=1;feature=transcript;score=36;bestRegion=(self);nbGaps=0;nbOccurrences=1;quality=37;ID=HWI-EAS337_3:7:1:538:1054;identity=97\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/CleanTranscriptFile.xml
--- a/SMART/galaxy/CleanTranscriptFile.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,69 +0,0 @@
-<tool id="CleanTranscriptFile" name="Clean Transcript File">
- <description> Clean a transcript file so that it is useable for S-MART.</description>
- <command interpreter="python"> ../Java/Python/CleanTranscriptFile.py -i $formatType.inputFileName 
- #if $formatType.FormatInputFileName == 'gff':
-   -f gff
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #end if
- #if $optionType.type == 'Yes':
- -t $optionType.value
- #end if 
- -o $outputFile 
- </command> 
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName"  type="select" label="Input File Format">
- <option value="gff">gff</option>
- <option value="gtf">gtf</option>
- <option value="gff3">gff3</option>
-  </param>
-  <when value="gff">
-  <param name="inputFileName" format="gff" type="data" label="Input File"/>  
- </when>
- <when value="gtf"> 
-          <param name="inputFileName" format="gtf" type="data" label="Input File"/> 
-  </when>
- <when value="gff3"> 
-          <param name="inputFileName" format="gff3" type="data" label="Input File"/> 
-  </when>
-  </conditional>

-  <conditional name="optionType">
-
- <param name="type" type="select" label="You can choose the tag that you are interested in, like tRNA,rRNA,ncRNA,CDS,exon, etc." help="Name of the types you want to keep in GFF/GTF (list separated by commas)">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
-  <param name="value" type="text" value="tRNA,rRNA,ncRNA,CDS,exon"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
-
- <outputs>
- <data name="outputFile" format="gtf">
- <change_format>
- <when input="formatType.FormatInputFileName" value="gff" format="gff" />
- <when input="formatType.FormatInputFileName" value="gff3" format="gff3" />
- </change_format> 
- </data>
-
- </outputs>
-<tests>
-    <test>
-      <param name="FormatInputFileName" value="gtf" />
-      <param name="inputFileName" value="genes.gtf" />
-      <param name="type" value="No" />
-      <output name="outputFile" file="exp_cleantranscriptfile_genes.gtf" />
-    </test>
-  </tests>
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/Clusterize.xml
--- a/SMART/galaxy/Clusterize.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,73 +0,0 @@
-<tool id="MergingDataClusterize" name="Clusterize">
- <description>Clusterizes the reads when their genomic intervals overlap.</description>
- <command interpreter="python">
- ../Java/Python/clusterize.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'csv':
- -f csv
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
- -o $outputFileGff 
- $colinear
- $normalize
- -d $distance
- $log $outputFileLog
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="csv">csv</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="csv">
- <param name="inputFileName" format="csv" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
- <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (attention!! Only for GFF3 file!!!!!)"/>
- <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="false" label="log option" help="This option create a log file"/>
- <param name="distance" type="text" value="0" label="distance option" help="Limit the maximum distance between two reads"/>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- <data name="outputFileLog" format="txt">
- <filter>log</filter>
- </data>
- </outputs> 
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/CollapseReads.xml
--- a/SMART/galaxy/CollapseReads.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,59 +0,0 @@
-<tool id="collapseReads" name="collapse reads">
- <description>Merges two reads if they have exactly the same genomic coordinates.</description>
- <command interpreter="python">
- ../Java/Python/CollapseReads.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
- -$strand
- -o $outputFileGff 
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Strand option merges 2 different strands[default:False]."/>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/CompareOverlappingSmallQuery.xml
--- a/SMART/galaxy/CompareOverlappingSmallQuery.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,180 +0,0 @@
-<tool id="CompareOverlappingSmallQuery" name="Compare Overlapping Small Query">
- <description>Provide the queries that overlap with a reference, when the query is small.</description>  
- <command interpreter="python">
- ../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 
- #if $formatType.FormatInputFileName1 == 'bed':  
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName1 == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'gtf':
- -f gtf
- #end if
- -j $formatType2.inputFileName2
- #if $formatType2.FormatInputFileName2 == 'bed':
- -g bed
- #elif $formatType2.FormatInputFileName2 == 'gff':
- -g gff
- #elif $formatType2.FormatInputFileName2 == 'gff2':
- -g gff2
- #elif $formatType2.FormatInputFileName2 == 'gff3':
- -g gff3
- #elif $formatType2.FormatInputFileName2 == 'sam':
- -g sam
- #elif $formatType2.FormatInputFileName2 == 'gtf':
-     -g gtf
- #end if
- -o $outputFileGff 
- #if $OptionDistance.Dist == 'Yes':
- -d $OptionDistance.distance
- #end if
- #if $OptionMinOverlap.present == 'Yes':
- -m $OptionMinOverlap.minOverlap
- #end if
- #if $OptionPcOverlapQuery.present == 'Yes':
- -p $OptionPcOverlapQuery.minOverlap
- #end if
- #if $OptionPcOverlapRef.present == 'Yes':
- -P $OptionPcOverlapRef.minOverlap
- #end if
- #if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
- -c 
- #elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
- -a
- #end if
- $InvertMatch
- $NotOverlapping
- $OptionInclusionQuery
- $OptionInclusionRef
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="Input Query File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
- </when>
- <when value="gff">
- <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
- </when>
- <when value="gff2">
- <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
- </when>
- <when value="gtf">
- <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
-                         </when>
- </conditional>
-
- <conditional name="formatType2">
- <param name="FormatInputFileName2" type="select" label="Input Reference File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
- </when>
- <when value="gff">
- <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
- </when>
- <when value="gff2">
- <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
- </when>
- <when value="gff3">
- <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
- </when>
- <when value="sam">
- <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
- </when>
- <when value="gtf">
- <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
- </when>
- </conditional>
- <conditional name="OptionDistance">
- <param name="Dist" type="select" label="Maximum Distance between two reads">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="distance" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
- <conditional name="OptionMinOverlap">
- <param name="present" type="select" label="Minimum number of common nucleotides to declare an overlap">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="minOverlap" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
- <conditional name="OptionPcOverlapQuery">
- <param name="present" type="select" label="N% of the query must overlap">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="minOverlap" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
- <conditional name="OptionPcOverlapRef">
- <param name="present" type="select" label="N% of the reference must overlap">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="minOverlap" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
- <param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must nested in a query"/>
- <param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must nested in a query"/>
- <conditional name="OptionCollinearOrAntiSens">
- <param name="OptionCA" type="select" label="Collinear or anti-sens">
- <option value="Collinear">Collinear</option>
- <option value="AntiSens">AntiSens</option>
- <option value="NONE" selected="true">NONE</option>
- </param>
- <when value="Collinear">
- </when>
- <when value="AntiSens">
- </when>
- <when value="NONE">
- </when>
- </conditional>
- <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>
- <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- </outputs> 
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/CompareOverlappingSmallRef.xml
--- a/SMART/galaxy/CompareOverlappingSmallRef.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,180 +0,0 @@
-<tool id="CompareOverlappingSmallRef" name="Compare Overlapping Small Reference">
- <description>Provide the queries that overlap with a reference, when the reference is small.</description>  
- <command interpreter="python">
- ../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 
- #if $formatType.FormatInputFileName1 == 'bed':  
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName1 == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'gtf':
- -f gtf
- #end if
- -j $formatType2.inputFileName2
- #if $formatType2.FormatInputFileName2 == 'bed':
- -g bed
- #elif $formatType2.FormatInputFileName2 == 'gff':
- -g gff
- #elif $formatType2.FormatInputFileName2 == 'gff2':
- -g gff2
- #elif $formatType2.FormatInputFileName2 == 'gff3':
- -g gff3
- #elif $formatType2.FormatInputFileName2 == 'sam':
- -g sam
- #elif $formatType2.FormatInputFileName2 == 'gtf':
-     -g gtf
- #end if
- -o $outputFileGff 
- #if $OptionDistance.Dist == 'Yes':
- -d $OptionDistance.distance
- #end if
- #if $OptionMinOverlap.present == 'Yes':
- -m $OptionMinOverlap.minOverlap
- #end if
- #if $OptionPcOverlapQuery.present == 'Yes':
- -p $OptionPcOverlapQuery.minOverlap
- #end if
- #if $OptionPcOverlapRef.present == 'Yes':
- -P $OptionPcOverlapRef.minOverlap
- #end if
- #if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
- -c 
- #elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
- -a
- #end if
- $InvertMatch
- $NotOverlapping
- $OptionInclusionQuery
- $OptionInclusionRef
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="Input Query File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
- </when>
- <when value="gff">
- <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
- </when>
- <when value="gff2">
- <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
- </when>
- <when value="gtf">
- <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
-                         </when>
- </conditional>
-
- <conditional name="formatType2">
- <param name="FormatInputFileName2" type="select" label="Input Reference File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
- </when>
- <when value="gff">
- <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
- </when>
- <when value="gff2">
- <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
- </when>
- <when value="gff3">
- <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
- </when>
- <when value="sam">
- <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
- </when>
- <when value="gtf">
- <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
- </when>
- </conditional>
- <conditional name="OptionDistance">
- <param name="Dist" type="select" label="Maximum Distance between two reads">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="distance" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
- <conditional name="OptionMinOverlap">
- <param name="present" type="select" label="Minimum number of common nucleotides to declare an overlap">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="minOverlap" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
- <conditional name="OptionPcOverlapQuery">
- <param name="present" type="select" label="N% of the query must overlap">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="minOverlap" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
- <conditional name="OptionPcOverlapRef">
- <param name="present" type="select" label="N% of the reference must overlap">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="minOverlap" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
- <param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must nested in a query"/>
- <param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must nested in a query"/>
- <conditional name="OptionCollinearOrAntiSens">
- <param name="OptionCA" type="select" label="Collinear or anti-sens">
- <option value="Collinear">Collinear</option>
- <option value="AntiSens">AntiSens</option>
- <option value="NONE" selected="true">NONE</option>
- </param>
- <when value="Collinear">
- </when>
- <when value="AntiSens">
- </when>
- <when value="NONE">
- </when>
- </conditional>
- <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>
- <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- </outputs> 
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile.xml
--- a/SMART/galaxy/ConvertTranscriptFile.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,119 +0,0 @@
-<tool id="ConvertTranscriptFile" name="Convert transcript file">
-  <description>Convert a file from a format to another.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFormatType.inputFileName 
-   #if $inputFormatType.FormatInputFileName == 'gff3':
-    -f gff3
-   #elif $inputFormatType.FormatInputFileName == 'bed': 
-   -f bed 
-   #elif $inputFormatType.FormatInputFileName == 'gff2': 
-   -f gff2
-   #elif $inputFormatType.FormatInputFileName == 'bam': 
-   -f blast
-   #elif $inputFormatType.FormatInputFileName == 'sam': 
-   -f sam
-   #elif $inputFormatType.FormatInputFileName == 'gtf': 
-   -f gtf
-          #end if
-   
-  -g $outputFormatType.outFormat
-     #if $optionSequence.choose == 'Yes':
-    -s $optionSequence.value
- #end if 
-
-   
-   -n $name
-   $strand
-   -o $outputFile
-  
-  </command>
-  <inputs>
-   <conditional name="inputFormatType">
-   <param name="FormatInputFileName"  type="select" label="Input File Format">
-   <option value="gff3">GFF3</option> 
-   <option value="bed">BED</option> 
-   <option value="gff2">GFF2</option> 
-   <option value="bam">BAM</option> 
-   <option value="sam">SAM</option> 
-   <option value="gtf">GTF</option> 
-   </param>
-   <when value="gff3">  
-   <param name="inputFileName" format="gff3" type="data" label="Input File"/>
-   </when>
-   <when value="bed">  
-   <param name="inputFileName" format="bed" type="data" label="Input File"/>
-   </when>
-   <when value="gff2">  
-   <param name="inputFileName" format="gff2" type="data" label="Input File"/>
-   </when>
-   <when value="bam">  
-   <param name="inputFileName" format="bam" type="data" label="Input File"/>
-   </when>
-   <when value="sam">  
-   <param name="inputFileName" format="sam" type="data" label="Input File"/>
-   </when>
-   <when value="gtf">  
-   <param name="inputFileName" format="gtf" type="data" label="Input File"/>
-   </when>
- </conditional>    
-  
-  
-    <conditional name="outputFormatType">
-   <param name="outFormat"  type="select" label="Please choose the format that you want to convert to (corresponding to your input file format).">
-   <option value="gff3">GFF3</option> 
-   <option value="bed">BED</option> 
-   <option value="gff2">GFF2</option> 
-   <option value="wig">WIG</option> 
-   <option value="sam">SAM</option> 
-   <option value="csv">CSV</option> 
-   <option value="gtf">GTF</option> 
-   </param> 
-     <when value="gff3">  
-   </when>
-   <when value="bed">  
-   </when>
-     <when value="gff2">  
-   </when>
-   <when value="wig">  
-   </when>
-     <when value="sam">  
-   </when>
-   <when value="csv">  
-   </when>
-   <when value="gtf">  
-   </when>
-  </conditional>  

-  <param name="name" type="text" value="SMART" label="name for the transcripts"/>

-  <conditional name="optionSequence">
-  <param name="choose" type="select" label="give the corresponding Multi-Fasta file (useful for EMBL format)">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
-  <param name="value" type="data" format="mfa" />
- </when>
- <when value="No">
- </when>
- </conditional>
-   
-  <param name="strand" type="boolean" truevalue="-t" falsevalue="" checked="false" label="consider the 2 strands as different (only useful for writing WIG files)"/>
-   
-  </inputs>
-
-  <outputs>
-   <data name="outputFile" format="gff3" label="$inputFormatType.FormatInputFileName to $outputFormatType.outFormat">
- <change_format>
- <when input="outputFormatType.outFormat" value="bed" format="bed" />
- <when input="outputFormatType.outFormat" value="gff2" format="gff2" />
- <when input="outputFormatType.outFormat" value="wig" format="wig" />
- <when input="outputFormatType.outFormat" value="sam" format="sam" />
- <when input="outputFormatType.outFormat" value="csv" format="csv" />
- <when input="outputFormatType.outFormat" value="gtf" format="gtf" />
- </change_format> 
- </data>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_BedToCsv.xml
--- a/SMART/galaxy/ConvertTranscriptFile_BedToCsv.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_BedToCsv" name="Bed -> Csv">
-  <description>Convert Bed File to Csv File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g csv yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="bed"/>
-  </inputs>
-
-  <outputs>
-    <data format="csv" name="outputFile" label="[bed -> csv] Output File"/>
-    <data format="txt" name="logFile" label="[bed -> csv] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_BedToGff2.xml
--- a/SMART/galaxy/ConvertTranscriptFile_BedToGff2.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_BedToGff2" name="Bed -> Gff2">
-  <description>Convert Bed File to Gff2 File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g gff yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="bed"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff" name="outputFile" label="[bed -> gff2] Output File"/>
-    <data format="txt" name="logFile" label="[bed -> gff2] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_BedToGff3.xml
--- a/SMART/galaxy/ConvertTranscriptFile_BedToGff3.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_BedToGff3" name="Bed -> Gff3">
-  <description>Convert Bed File to Gff3 File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g gff3 yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="bed"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[bed -> gff3] Output File"/>
-    <data format="txt" name="logFile" label="[bed -> gff3] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_BedToSam.xml
--- a/SMART/galaxy/ConvertTranscriptFile_BedToSam.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_BedToSam" name="Bed -> Sam">
-  <description>Convert Bed File to Sam File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f bed -o $outputFile -g sam yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="bed"/>
-  </inputs>
-
-  <outputs>
-    <data format="sam" name="outputFile" label="[bed -> sam] Output File"/>
-    <data format="txt" name="logFile" label="[bed -> sam] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_BlastToCsv.xml
--- a/SMART/galaxy/ConvertTranscriptFile_BlastToCsv.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_BlastToCsv" name="Blast (-m 8) -> Csv">
-  <description>Convert Blast (-m 8) File to Csv File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g csv yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="tabular"/>
-  </inputs>
-
-  <outputs>
-    <data format="csv" name="outputFile" label="[blast -> csv] Output File"/>
-    <data format="txt" name="logFile" label="[blast -> csv] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_BlastToGff2.xml
--- a/SMART/galaxy/ConvertTranscriptFile_BlastToGff2.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_BlastToGff2" name="Blast (-m 8) -> Gff2">
-  <description>Convert Blast (-m 8) File to Gff2 File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g gff2 yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="tabular"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff" name="outputFile" label="[blast -> gff2] Output File"/>
-    <data format="txt" name="logFile" label="[blast -> gff2] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_BlastToGff3.xml
--- a/SMART/galaxy/ConvertTranscriptFile_BlastToGff3.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_BlastToGff3" name="Blast (-m 8) -> Gff3">
-  <description>Convert Blast (-m 8) File to Gff3 File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g gff3 yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="tabular"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[blast -> gff3] Output File"/>
-    <data format="txt" name="logFile" label="[blast -> gff3] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_BlastToSam.xml
--- a/SMART/galaxy/ConvertTranscriptFile_BlastToSam.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_BlastToSam" name="Blast (-m 8) -> Sam">
-  <description>Convert Blast (-m 8) File to Sam File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f blast -o $outputFile -g sam yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="tabular"/>
-  </inputs>
-
-  <outputs>
-    <data format="sam" name="outputFile" label="[blast -> sam] Output File"/>
-    <data format="txt" name="logFile" label="[blast -> sam] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_FastqToFasta.xml
--- a/SMART/galaxy/ConvertTranscriptFile_FastqToFasta.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_FastqToFasta" name="Fastq -> Fasta">
-  <description>Convert Fastq File to Fasta File.</description>
-  <command interpreter="python"> ../Java/Python/fastqToFasta.py -i $inputFile -o $outputFile 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="fastq"/>
-  </inputs>
-
-  <outputs>
-    <data format="fasta" name="outputFile" label="[fastq -> fasta] Output File"/>
-    <data format="txt" name="logFile" label="[fastq -> fasta] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_Gff2ToCsv.xml
--- a/SMART/galaxy/ConvertTranscriptFile_Gff2ToCsv.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_Gff2ToCsv" name="Gff2 -> Csv">
-  <description>Convert Gff2 File to Csv File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g csv yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff"/>
-  </inputs>
-
-  <outputs>
-    <data format="csv" name="outputFile" label="[gff2 -> csv] Output File"/>
-    <data format="txt" name="logFile" label="[gff2 -> csv] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_Gff2ToGff3.xml
--- a/SMART/galaxy/ConvertTranscriptFile_Gff2ToGff3.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_Gff2ToGff3" name="Gff2 -> Gff3">
-  <description>Convert Gff2 File to Gff3 File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g gff3 yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[gff2 -> gff3] Output File"/>
-    <data format="txt" name="logFile" label="[gff2 -> gff3] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_Gff2ToSam.xml
--- a/SMART/galaxy/ConvertTranscriptFile_Gff2ToSam.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_Gff2ToSam" name="Gff2 -> Sam">
-  <description>Convert Gff2 File to Sam File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff2 -o $outputFile -g sam yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff"/>
-  </inputs>
-
-  <outputs>
-    <data format="sam" name="outputFile" label="[gff2 -> sam] Output File"/>
-    <data format="txt" name="logFile" label="[gff2 -> sam] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_Gff3ToCsv.xml
--- a/SMART/galaxy/ConvertTranscriptFile_Gff3ToCsv.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_Gff3ToCsv" name="Gff3 -> Csv">
-  <description>Convert Gff3 File to Csv File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g csv yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff3"/>
-  </inputs>
-
-  <outputs>
-    <data format="csv" name="outputFile" label="[gff3 -> csv] Output File"/>
-    <data format="txt" name="logFile" label="[gff3 -> csv] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_Gff3ToGff2.xml
--- a/SMART/galaxy/ConvertTranscriptFile_Gff3ToGff2.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_Gff3ToGff2" name="Gff3 -> Gff2">
-  <description>Convert Gff3 File to Gff2 File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g gff2 yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff3"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff" name="outputFile" label="[gff3 -> gff2] Output File"/>
-    <data format="txt" name="logFile" label="[gff3 -> gff2] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_Gff3ToSam.xml
--- a/SMART/galaxy/ConvertTranscriptFile_Gff3ToSam.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_Gff3ToSam" name="Gff3 -> Sam">
-  <description>Convert Gff3 File to Sam File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g sam yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff3"/>
-  </inputs>
-
-  <outputs>
-    <data format="sam" name="outputFile" label="[gff3 -> sam] Output File"/>
-    <data format="txt" name="logFile" label="[gff3 -> sam] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_Gff3ToWig.xml
--- a/SMART/galaxy/ConvertTranscriptFile_Gff3ToWig.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_Gff3ToWig" name="Gff3 -> Wig">
-  <description>Convert Gff3 File to Wig File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f gff3 -o $outputFile -g wig yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff3"/>
-  </inputs>
-
-  <outputs>
-    <data format="wig" name="outputFile" label="[gff3 -> wig] Output File"/>
-    <data format="txt" name="logFile" label="[gff3 -> wig] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_SamToCsv.xml
--- a/SMART/galaxy/ConvertTranscriptFile_SamToCsv.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_SamToCsv" name="Sam -> Csv">
-  <description>Convert Sam File to Csv File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g csv yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="sam"/>
-  </inputs>
-
-  <outputs>
-    <data format="csv" name="outputFile" label="[sam -> csv] Output File"/>
-    <data format="txt" name="logFile" label="[sam -> csv] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_SamToGff2.xml
--- a/SMART/galaxy/ConvertTranscriptFile_SamToGff2.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_SamToGff2" name="Sam -> Gff2">
-  <description>Convert Sam File to Gff2 File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g gff2 yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="sam"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff" name="outputFile" label="[sam -> gff2] Output File"/>
-    <data format="txt" name="logFile" label="[sam -> gff2] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/ConvertTranscriptFile_SamToGff3.xml
--- a/SMART/galaxy/ConvertTranscriptFile_SamToGff3.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="ConvertTranscriptFile_SamToGff3" name="Sam -> Gff3">
-  <description>Convert Sam File to Gff3 File.</description>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFile -f sam -o $outputFile -g gff3 yes 2>$logFile </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="sam"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[sam -> gff3] Output File"/>
-    <data format="txt" name="logFile" label="[sam -> gff3] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/CountReadGCPercent.xml
--- a/SMART/galaxy/CountReadGCPercent.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,16 +0,0 @@
-<tool id="CountReadGCPercent" name="count read GCpercent">
-    <description>Count GC percent for each read against a genome.</description>
-    <command interpreter="python"> ../Java/Python/CountReadGCPercent.py -i $inputFastaFile -j $inputGffFile -o $outputFile</command>
-  <inputs>
-      <param name="inputFastaFile" type="data" label="Input reference fasta File" format="fasta"/>
-      <param name="inputGffFile" type="data" label="Input File" format="gff3"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[CountReadGCPercent] Output File"/>
-   </outputs>
-
-  <help>
-  </help>
-</tool>
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/DiffExpAnal.xml
--- a/SMART/galaxy/DiffExpAnal.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,19 +0,0 @@
-<tool id="testDiffExpAnal" name="Differential Expression Analysis">
-  <description>Differential expression analysis for sequence count data</description>
-  <command interpreter="sh"> ../DiffExpAnal/testR.sh $inputFile $columnsOfGeneName $columnsOfCondition1 $columnsOfCondition2 $outputFileCSV $outputFilePNG 2>$outputLog </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="tabular"/>
- <param name="columnsOfGeneName" type="text" value="0" label="Please indicate the column numbers of gene names with ',' separator. If There are not gene names, default value is 0."/>
- <param name="columnsOfCondition1" type="text" value="1,2" label="Please indicate the column numbers of condition1 with ',' separator."/>
- <param name="columnsOfCondition2" type="text" value="3,4" label="Please indicate the column numbers of condition2 with ',' separator."/>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFileCSV" label="[DiffExpAnal] Output CSV File"/>
- <data format="png" name="outputFilePNG" label="[DiffExpAnal] Output PNG File"/>
-    <data format="tabular" name="outputLog" label="[DiffExpAnal] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/FindOverlaps_optim.xml
--- a/SMART/galaxy/FindOverlaps_optim.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,16 +0,0 @@
-<tool id="findOverlaps" name="findOverlaps">
- <description>Finds the overlapped reference reads.</description>
- <command interpreter="python">
- ../Java/Python/FindOverlaps_optim.py -i $inputRef -j $inputQ -o $outputFileGff 
- </command>
-
-  <inputs>
-    <param name="inputRef" type="data" label="Input Reference File" format="gff3"/>
-    <param name="inputQ" type="data" label="Input Query File" format="gff3"/>
-  </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- </outputs> 
-
-</tool>
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/GetDifferentialExpression.xml
--- a/SMART/galaxy/GetDifferentialExpression.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,210 +0,0 @@
-<tool id="GetDifferentialExpression" name="get differential expression">
- <description>Get the differential expression between 2 conditions using Fisher's exact test, on regions defined by a third file.</description>
- <command interpreter="python">
- ../Java/Python/GetDifferentialExpression.py -i $formatType.inputFileName1
- #if $formatType.FormatInputFileName1 == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName1 == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'gtf':
- -f gtf
- #end if
-
- -j $formatType2.inputFileName2
- #if $formatType2.FormatInputFileName2 == 'bed':
- -g bed
- #elif $formatType2.FormatInputFileName2 == 'gff':
- -g gff
- #elif $formatType2.FormatInputFileName2 == 'gff2':
- -g gff2
- #elif $formatType2.FormatInputFileName2 == 'gff3':
- -g gff3
- #elif $formatType2.FormatInputFileName2 == 'sam':
- -g sam
- #elif $formatType2.FormatInputFileName2 == 'gtf':
- -g gtf
- #end if
-
- -k $formatTypeRef.inputFileNameRef
- #if $formatTypeRef.FormatInputFileNameRef == 'bed':
- -l bed
- #elif $formatTypeRef.FormatInputFileNameRef == 'gff':
- -l gff
- #elif $formatTypeRef.FormatInputFileNameRef == 'gff2':
- -l gff2
- #elif $formatTypeRef.FormatInputFileNameRef == 'gff3':
- -l gff3
- #elif $formatTypeRef.FormatInputFileNameRef == 'sam':
- -l sam
- #elif $formatTypeRef.FormatInputFileNameRef == 'gtf':
- -l gtf
- #end if
-
- -o $outputFileGff 
-
- $simple
- $adjusted
-
-
- #if $optionSimplePara.simplePara == 'Yes':
- -S $optionSimplePara.paraValue
- #end if
-
- #if $optionFixedSizeFactor.FSF == 'Yes':
- -x $optionFixedSizeFactor.FSFValue
- #end if
-
- #if $optionFDR.FDR == 'Yes':
- -d $optionFDR.FDRValue
- #end if
- $plot $outputFilePNG
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="Input File Format 1">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
- </when>
- <when value="gff">
- <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
- </when>
- <when value="gff2">
- <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
- </when>
- <when value="gtf">
- <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
- </when>
- </conditional>
-
- <conditional name="formatType2">
- <param name="FormatInputFileName2" type="select" label="Input File Format 2">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
- </when>
- <when value="gff">
- <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
- </when>
- <when value="gff2">
- <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
- </when>
- <when value="gff3">
- <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
- </when>
- <when value="sam">
- <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
- </when>
- <when value="gtf">
- <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
- </when>
- </conditional>
-
- <conditional name="formatTypeRef">
- <param name="FormatInputFileNameRef" type="select" label="Input Ref File Format ">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileNameRef" format="bed" type="data" label="Input Ref File"/>
- </when>
- <when value="gff">
- <param name="inputFileNameRef" format="gff" type="data" label="Input Ref File"/>
- </when>
- <when value="gff2">
- <param name="inputFileNameRef" format="gff2" type="data" label="Input Ref File"/>
- </when>
- <when value="gff3">
- <param name="inputFileNameRef" format="gff3" type="data" label="Input Ref File"/>
- </when>
- <when value="sam">
- <param name="inputFileNameRef" format="sam" type="data" label="Input Ref File"/>
- </when>
- <when value="gtf">
- <param name="inputFileNameRef" format="gtf" type="data" label="Input Ref File"/>
- </when>
- </conditional>
-
- <param name="simple" type="boolean" truevalue="-s" falsevalue="" checked="false" label="normalize using the number of reads in each condition"/>
- <param name="adjusted" type="boolean" truevalue="-a" falsevalue="" checked="false" label="normalize using the number of reads of 'mean' regions"/>
-
- <conditional name="optionSimplePara">
- <param name="simplePara" type="select" label="provide the number of reads" >
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="paraValue" type="text" value="None" label="provide the number of reads" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionFixedSizeFactor">
- <param name="FSF" type="select" label="give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization)">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="FSFValue" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionFDR">
- <param name="FDR" type="select" label="use FDR">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="FDRValue" type="float" value="0.0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <param name="plot" type="boolean" truevalue="-p" falsevalue="" checked="false" label="plot option" help="plot cloud plot"/>
-
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3" label="[GetDifferentialExpression]out file"/>
- <data name="outputFilePNG" format="PNG" label="[GetDifferentialExpression]PNG file">
- <filter>plot</filter>
- </data>
- </outputs> 
-
- <help>
- example: python GetDifferentialExpression.py -i input1 -f gff3 -j input2 -g gff3 -k ref -l gff3 -o output.gff3
- </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/GetFlanking.xml
--- a/SMART/galaxy/GetFlanking.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,179 +0,0 @@
-<tool id="GetFlanking" name="get flanking">
- <description>Get the flanking regions of a set of reference.</description>
- <command interpreter="python">
- ../Java/Python/GetFlanking.py -i $formatType.inputFileName1
- #if $formatType.FormatInputFileName1 == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName1 == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'gtf':
- -f gtf
- #end if
-
- -j $formatType2.inputFileName2
- #if $formatType2.FormatInputFileName2 == 'bed':
- -g bed
- #elif $formatType2.FormatInputFileName2 == 'gff':
- -g gff
- #elif $formatType2.FormatInputFileName2 == 'gff2':
- -g gff2
- #elif $formatType2.FormatInputFileName2 == 'gff3':
- -g gff3
- #elif $formatType2.FormatInputFileName2 == 'sam':
- -g sam
- #elif $formatType2.FormatInputFileName2 == 'gtf':
- -g gtf
- #end if
-
-   #if $OptionUpDownStream.OptionUD == 'UpStream':
- -5 
- #elif $OptionUpDownStream.OptionUD == 'DownStream':
- -3
- #end if
-
-
-   #if $OptionColinearOrAntiSens.OptionCA == 'Colinear':
- -c 
- #elif $OptionColinearOrAntiSens.OptionCA == 'AntiSens':
- -a
- #end if
-
- #if $OptionMax.maximum == "Yes":
- -D $OptionMax.max
- #end if
- #if $OptionMin.minimum == "Yes":
- -d $OptionMin.min
- #end if
-  
-   -o $outputFile  
-
-
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="query File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
- </when>
- <when value="gff">
- <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
- </when>
- <when value="gff2">
- <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
- </when>
- <when value="gtf">
- <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
- </when>
- </conditional>
-
- <conditional name="formatType2">
- <param name="FormatInputFileName2" type="select" label="Refence File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
- </when>
- <when value="gff">
- <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
- </when>
- <when value="gff2">
- <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
- </when>
- <when value="gff3">
- <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
- </when>
- <when value="sam">
- <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
- </when>
- <when value="gtf">
- <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
- </when>
- </conditional>
-
- <conditional name="OptionUpDownStream">
- <param name="OptionUD" type="select" label="UpStream or DownStream">
- <option value="UpStream">UpStream</option>
- <option value="DownStream">DownStream</option>
- <option value="NONE" selected="true">NONE</option>
- </param>
- <when value="UpStream">
- </when>
- <when value="DownStream">
- </when>
- <when value="NONE">
- </when>
- </conditional>
-
- <conditional name="OptionColinearOrAntiSens">
- <param name="OptionCA" type="select" label="Colinear or anti-sens">
- <option value="Colinear">Colinear</option>
- <option value="AntiSens">AntiSens</option>
- <option value="NONE" selected="true">NONE</option>
- </param>
- <when value="Colinear">
- </when>
- <when value="AntiSens">
- </when>
- <when value="NONE">
- </when>
- </conditional>
-
- <conditional name="OptionMax">
- <param name="maximum" type="select" label="maximum distance between 2 elements">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="max" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionMin">
- <param name="minimum" type="select" label="minimum distance between 2 elements">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="min" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
-
-   <outputs>
-    <data format="gff3" name="outputFile" label="[GetFlanking] Output File"/>
-   </outputs>
-
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/SelectByTag.xml
--- a/SMART/galaxy/SelectByTag.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,122 +0,0 @@
-<tool id="SelectByTag" name="select by tag">
- <description>Keeps the genomic coordinates such that a value of a given tag.</description>
- <command interpreter="python">
- ../Java/Python/SelectByTag.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
-
- -g $Tag
- #if $OptionValue.Value == "Yes":
- -a $OptionValue.valeur
- #end if
- #if $OptionMax.maximum == "Yes":
- -M $OptionMax.max
- #end if
- #if $OptionMin.minimum == "Yes":
- -m $OptionMin.min
- #end if
-
- #if $OptionDefault.default == "Yes":
- -d $OptionDefault.defaultValue
- #end if
-
- -o $outputFileGff 
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="Tag" type="text" value="None" label="tag option" help="A given tag, you must choose a tag."/>
-
- <conditional name="OptionValue">
- <param name="Value" type="select" label="value of tag">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="valeur" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionMax">
- <param name="maximum" type="select" label="maximum value of tag">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="max" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionMin">
- <param name="minimum" type="select" label="minimum value of tag">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="min" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionDefault">
- <param name="default" type="select" label="gives this value if tag is not present">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="defaultValue" type="float" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3" label="[SelectByTag] Output File"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/WrappGetLetterDistribution.py
--- a/SMART/galaxy/WrappGetLetterDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,97 +0,0 @@
-#! /usr/bin/env python
-
-import os
-import sys
-import getopt
-from commons.core.checker.CheckerException import CheckerException
-
-SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
-
-class WrappGetLetterDistribution(object):
-    
-    def __init__(self):
-        self._inputFileName = ""
-        self._inputFileFormat = ""
-        self._outputFileName = "tmpOutputFile"
-        self._csv = False
-
-    def help( self ):
-        print 
-        print "usage: %s [ options ]" % ( sys.argv[0] )
-        print "options:"
-        print "     -h: this help"
-        print "     -i: input file"
-        print "     -f: 'fasta' or 'fastq'"
-        print "     -c: CSV output file"
-        print "     -a: first PNG output file"
-        print "     -b: second PNG output file"
-        print
-        print "Exemple:"
-        print
-        print "1:\n\tpython WrappGetLetterDistribution.py -i inputFile.fasta -f fasta -c outputFile1.csv -a outputFile2.png -b outputFile3.png"
-        print
-        print "2:\n\tpython WrappGetLetterDistribution.py -i inputFile.fastq -f fastq -c outputFile1.csv -a outputFile2.png -b outputFile3.png"
-        print
-        print
-
-
-    def setAttributesFromCommandLine(self):
-        try:
-            opts, args = getopt.getopt( sys.argv[1:], "hi:f:a:b:c:" )
-        except getopt.GetoptError, err:
-            print str(err); sys.exit(1)
-        for o, a in opts:
-            if o == "-h":
-                self.help()
-                sys.exit(0)
-            if o == "-i":
-                self._inputFileName = a
-            elif o == "-f":
-                self._inputFileFormat = a
-            elif o == "-c":             
-                self._outputFileNameCSV = a  
-                self._csv = True
-            elif o == "-a":
-                self._outputFileNamePNG = a
-            elif o == "-b":
-                self._outputFileNamePerNtPNG = a 
-
-    def checkAttributes(self):
-        lMsg = []
-        if self._inputFileName == "" and not os.path.exists(self._inputFileName):
-            lMsg.append("ERROR: This input file doesn't exist!")
-        if self._inputFileFormat == "":
-            lMsg.append("ERROR: No input file format specified in option!")
-        if self._outputFileNamePNG == "":
-            lMsg.append("ERROR: No output file.png specified in option!")            
-        if self._outputFileNamePerNtPNG == "":
-            lMsg.append("ERROR: No output filePerNt.png specified in option!")
-        if self._outputFileNameCSV == "" and self._csv == True :
-            lMsg.append("ERROR: No output file.csv specified in option!")   
-            
-        print ">>> lMsg " + str(lMsg)
-        if lMsg != []:
-            exp = CheckerException()
-            exp.setMessages(lMsg)
-            raise (exp)
-
-    def _cleanWorkingDir(self, cDir):
-        os.system("rm %s/tmpData* %s/tmpScript*" % (cDir, cDir))
-        
-    def wrapp(self):
-        self.checkAttributes()
- cDir = os.getcwd()
-
-        if self._csv == True:
-            os.system("python %s/Java/Python/getLetterDistribution.py -i %s -f %s -o %s/%s -c" % (SMART_PATH, self._inputFileName, self._inputFileFormat, cDir, self._outputFileName))
-            os.system("mv %s/%s.csv %s" % (cDir, self._outputFileName, self._outputFileNameCSV))
-            os.system("mv %s/%s.png %s" % (cDir, self._outputFileName, self._outputFileNamePNG))
-            os.system("mv %s/%sPerNt.png %s" % (cDir, self._outputFileName, self._outputFileNamePerNtPNG))
-
-        self._cleanWorkingDir(cDir)
-
-if __name__ == '__main__':
-    launcher = WrappGetLetterDistribution()
-    launcher.setAttributesFromCommandLine()
-    launcher.wrapp()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/WrappGetLetterDistribution.xml
--- a/SMART/galaxy/WrappGetLetterDistribution.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,40 +0,0 @@
-<tool id="getLetterDistribution1" name="Get Letter Distribution">
-    <description>Calculate distribution for each nucleotide per position for all short reads (S-MART)</description>
-    <command interpreter="python">
-     WrappGetLetterDistribution.py -i $inputFileName
- #if $formatType.FormatInputFileName == 'fasta':
- -f fasta
- #else :
- -f fastq
- #end if
- -c $ouputFileNameCSV -a $ouputFileNamePNG1 -b $ouputFileNamePNG2
-    </command>
-    <inputs>
-             <conditional name="formatType">
-       <param name="FormatInputFileName" type="select" label="Input File Format">
-          <option value="fasta">fasta</option>
-        <option value="fastq" selected="true">fastq</option>
-       </param>
-       <when value="fasta">
-              <param name="inputFileName" format="fasta" type="data" label="Fasta Input File"/>
-       </when>
-       <when value="fastq">
-              <param name="inputFileName" format="fastq" type="data" label="Fastq Input File"/>
-       </when>
-             </conditional>
-    </inputs>
-        
-    <outputs>
-                <data name="ouputFileNameCSV" format="tabular" label="[getLetterDistribution] CSV File"/>
-                <data name="ouputFileNamePNG1" format="png" label="[getLetterDistribution] PNG File 1"/>
-                <data name="ouputFileNamePNG2" format="png" label="[getLetterDistribution] PNG File 2"/>
-    </outputs>
-    <tests>
-     <test>
-            <param name="FormatInputFileName" value="fastq" />
-            <param name="inputFileName" value="short_fastq.fastq" />
-            <output name="outputFileNameCSV" file="exp_getletterdistribution_short_fastq.csv" />     
-        </test>
-    </tests>
-</tool>
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/changeGffFeatures.xml
--- a/SMART/galaxy/changeGffFeatures.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,16 +0,0 @@
-<tool id="changeGffFeatures" name="change gff Features">
- <description>Changes one feature name by an other name (the feature name can be found on the 3rd column).</description>
- <command interpreter="bash">
- ../Java/Python/changeGffFeatures.sh $inputFile $inputFeature $outputFeature >$outputFile
- </command>
-      <inputs>
-       <param name="inputFile" type="data" label="Input File" format="gff"/>
-       <param name="inputFeature" type="text" value="exon" label="A given feature, you must choose a feature name(on the 3rd column)."/>
-       <param name="outputFeature" type="text" value="exon" label="You must choose an other feature name(on the 3rd column)."/>
-      </inputs>
-
-      <outputs>
-             <data name="outputFile" format="gff" label="[changeGffFeatures] Output File"/>
-      </outputs> 
-</tool>
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/changeTagName.xml
--- a/SMART/galaxy/changeTagName.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,54 +0,0 @@
-<tool id="changeTagName" name="change tag name">
- <description>Changes the name of tag of a list of transcripts.</description>
- <command interpreter="python">
- ../Java/Python/changeTagName.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #end if
-
- -t $Tag
- -n $name
-
- -o $outputFileGff 
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="Tag" type="text" value="None" label="tag option" help="A given tag, you must choose a tag."/>
- <param name="name" type="text" value="None" label="name option" help="new name for the tag, you must choose a new name."/>
-
-
-
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3" label="[changeTagName] Output File"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/cleanGff.xml
--- a/SMART/galaxy/cleanGff.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,18 +0,0 @@
-<tool id="cleanGff" name="clean Gff">
-   <description>Cleans a GFF file as given by NCBI and outpus a Gff3 file.</description>
-   <command interpreter="python"> ../Java/Python/cleanGff.py -i $inputFile 
-   -t $type 
-   -o $outputFile
-   </command>
-
-       <inputs>
-       <param name="inputFile" type="data" label="Input File" format="gff"/>
-       <param name="type" type="text" value="tRNA,rRNA,ncRNA,CDS" label="tag option, compulsory option" help="lists of comma separated types that you want to keep.EX: ncRNA,tRNA,rRNA,CDS"/>
-       </inputs>
-
-       <outputs>
-           <data format="gff3" name="outputFile" label="[cleanGff] Output File"/>
-       </outputs>
-
-</tool>
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/clusterize.xml
--- a/SMART/galaxy/clusterize.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,67 +0,0 @@
-<tool id="MergingDataClusterize" name="Clusterize">
- <description>Clusterizes the reads when their genomic intervals overlap.</description>
- <command interpreter="python">
- ../Java/Python/clusterize.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'csv':
- -f csv
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #end if
- -o $outputFileGff 
- $colinear
- $normalize
- -d $distance
- $log $outputFileLog
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="csv">csv</option>
- <option value="sam">sam</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="csv">
- <param name="inputFileName" format="csv" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="colinear" type="boolean" truevalue="-c" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
- <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (attention!! Only for GFF3 file!!!!!)"/>
- <param name="log" type="boolean" truevalue="-l" falsevalue="" checked="false" label="log option" help="This option create a log file"/>
- <param name="distance" type="integer" value="0" label="distance option" help="Limit the maximum distance between two reads"/>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3" label="[clusterize]output file"/>
- <data name="outputFileLog" format="txt" label="[clusterize]output file">
- <filter>log</filter>
- </data>
- </outputs> 
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/clusterizeBySlidingWindows.xml
--- a/SMART/galaxy/clusterizeBySlidingWindows.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,132 +0,0 @@
-<tool id="clusterizeBySlidingWindows" name="clusterize By SlidingWindows">
- <description>Produces a GFF3 file that clusters a list of transcripts using a sliding window. Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.</description>
- <command interpreter="python">
- ../Java/Python/clusterizeBySlidingWindows.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
- -s $size
- -e $overlap
- -o $outputFileGff 
- $normalize
- $strands
-
- #if $OptionTag.tag == "Yes":
- -g $OptionTag.value
- #end if
-
- #if $OptionsOperation.operation == "Yes":
- -r $OptionsOperation.value
- #end if
-
- #if $OptionWriteTag.writeTag == "Yes":
- -w $OptionWriteTag.value
- #end if
-
- $strand
- $plot $plotPng
- $excel $excelOutput
-
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
-
- <param name="size" type="text" value="50000" label="Size option" help="Size of the regions."/>
- <param name="overlap" type="text" value="50" label="Overlap option" help="Overlap between two consecutive regions."/>
- <param name="normalize" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Normalize option for only GFF3 file format" help="This option normalizes (Warning!! Only for GFF3 file!)"/>
- <param name="strands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="strands option" help="Consider the two strands separately."/>
-
- <conditional name="OptionTag">
- <param name="tag" type="select" label="use a given tag as input (instead of summing number of features)">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="value" type="text" value="None" label="tag option" help="write a tag name you want to observe."/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
-
- <conditional name="OptionsOperation">
- <param name="operation" type="select" label="combine tag value with given operation [choice (sum, avg, med, min, max)]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="value" type="text" value="None" label="operation option" help="You can ONLY choose one of fowlling operation : sum, avg, med, min, max."/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
-
- <conditional name="OptionWriteTag">
- <param name="writeTag" type="select" label="write a new tag in output file">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="value" type="text" value="nbElements" label="write tag option" help="print the result in the given tag (default usually is 'nbElements')"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <param name="strand" type="boolean" truevalue="-2" falsevalue="" checked="false" label="strand option" help="This option considers the two strands separately."/>
- <param name="plot" type="boolean" truevalue="-p" falsevalue="" checked="false" label="plot option" help="This option creates a png file."/>
- <param name="excel" type="boolean" truevalue="-x" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
-
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- <data name="excelOutput" format="csv">
- <filter>excel</filter>
- </data>
- <data name="plotPng" format="png">
- <filter>plot</filter>
- </data>
- </outputs> 
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/compareOverlapping.xml
--- a/SMART/galaxy/compareOverlapping.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,288 +0,0 @@\n-<tool id="CompareOverlapping" name="Compare Overlapping">\n-\t<description>Print all the transcripts from a first file which overlap with the transcripts from a second file.</description>\n-\t<command interpreter="python">\n-\t\t../Java/Python/CompareOverlapping.py -i $formatType.inputFileName1\n-\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n-\t\t\t-f bed\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n-\t\t\t-f gff\t\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n-\t\t\t-f gff2\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n-\t\t\t-f gff3\n-\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n-\t\t\t-f sam\n-\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n-\t\t\t-f gtf\n-\t\t#end if\n-\t\t\t\n-\t\t-j $formatType2.inputFileName2\n-\t\t#if $formatType2.FormatInputFileName2 == \'bed\':\n-\t\t\t-g bed\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gff\':\n-\t\t\t-g gff\t\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gff2\':\n-\t\t\t-g gff2\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gff3\':\n-\t\t\t-g gff3\n-\t\t#elif $formatType2.FormatInputFileName2 == \'sam\':\n-\t\t\t-g sam\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gtf\':\n-\t\t    -g gtf\n-\t\t#end if\n-\n-\t\t-o $outputFileGff \n-\n-\t\t#if $optionNFirstFile1.NFirstForFile1 == \'Yes\':\n-\t\t\t-S $optionNFirstFile1.firstNtFile1\n-\t\t#end if\n-\t\t#if $optionNFirstFile2.NFirstForFile2 == \'Yes\':\n-\t\t\t-s $optionNFirstFile2.firstNtFile2\n-\t\t#end if\n-\t\t#if $optionNLastFile1.NLastForFile1 == \'Yes\':\n-\t\t\t-U $optionNLastFile1.lastNtFile1\n-\t\t#end if\n-\t\t#if $optionNLastFile2.NLastForFile2 == \'Yes\':\n-\t\t\t-u $optionNLastFile2.lastNtFile2\n-\t\t#end if\n-\t\n-\t\t#if $optionExtentionCinqFile1.extentionFile1 == \'Yes\':\n-\t\t\t-E $optionExtentionCinqFile1.extention51\n-\t\t#end if\n-\t\t#if $optionExtentionCinqFile2.extentionFile2 == \'Yes\':\n-\t\t\t-e $optionExtentionCinqFile2.extention52\n-\t\t#end if\n-\n-\t\t#if $optionExtentionTroisFile1.extentionFile1 == \'Yes\':\n-\t\t\t-N $optionExtentionTroisFile1.extention31\n-\t\t#end if\n-\t\t#if $optionExtentionTroisFile2.extentionFile2 == \'Yes\':\n-\t\t\t-n $optionExtentionTroisFile2.extention32\n-\t\t#end if\t\n-\n-\t\t#if $OptionColinearOrAntiSens.OptionCA == \'Colinear\':\n-\t\t\t-c \n-\t\t#elif $OptionColinearOrAntiSens.OptionCA == \'AntiSens\':\n-\t\t\t-a\n-\t\t#end if\t\n-\n-\t\t#if $OptionDistance.Dist == \'Yes\':\n-\t\t\t-d $OptionDistance.distance\n-\t\t#end if\n-\n-\t\t#if $OptionMinOverlap.MO == \'Yes\':\n-\t\t\t-m $OptionMinOverlap.minOverlap\n-\t\t#end if\n-\n-\t\t$InvertMatch\n-\t\t$ReportIntron\n-\t\t$NotOverlapping\n-\t\t\n-\t</command>\n-\n-\t<inputs>\n-\t\t<conditional name="formatType">\n-\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n-\t\t\t\t<option value="bed">bed</option>\n-\t\t\t\t<option value="gff">gff</option>\n-\t\t\t\t<option value="gff2">gff2</option>\n-\t\t\t\t<option value="gff3">gff3</option>\n-\t\t\t\t<option value="sam">sam</option>\n-\t\t\t\t<option value="gtf">gtf</option>\n-\t\t\t</param>\n-\t\t\t<when value="bed">\n-\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff">\n-\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff2">\n-\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff3">\n-\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="sam">\n-\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gtf">\n-\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n-\t\t\t\t\t\t\t\t                        </when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="formatType2">\n-\t\t\t<param name="FormatInputFileName2" type="select" label="Input File Format 2">\n-\t\t\t\t<option value="bed">bed</option>\n-\t\t\t\t<option value="gff">gff</option>\n-\t\t\t\t<option value="gff2">gff2</option>\n-\t\t\t\t<option value="gff3">gff3</option>\n-\t\t\t\t<option value="sam">sam</option>\n-\t\t\t\t<option value="gtf">gtf</option>\n-\t\t\t</param>\n-\t\t\t<when value="bed">\n-\t\t\t\t<param name="inputFileName2" format="bed" type="data" label="Inp'..b'e="integer" value="1" label="n last nucleotides for input file 1" help="only consider the n last nucleotides of the transcripts in file 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\t\t<conditional name="optionNLastFile2">\n-\t\t\t<param name="NLastForFile2" type="select" label="NLast for file 2">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="lastNtFile2" type="integer" value="1" label="n last nucleotides for input file 2" help="only consider the n last nucleotides of the transcripts in file 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\n-\n-\t\t<conditional name="optionExtentionCinqFile1">\n-\t\t\t<param name="extentionFile1" type="select" label="Extension towards 5 for file 1">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention51" type="integer" value="1" label="in file 1" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\n-\t\t<conditional name="optionExtentionCinqFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extension towards 5 for file 2">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention52" type="integer" value="1" label="in file 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionTroisFile1">\n-\t\t\t<param name="extentionFile1" type="select" label="Extension towards 3 for file 1">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention31" type="integer" value="1" label="in file 1" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionExtentionTroisFile2">\n-\t\t\t<param name="extentionFile2" type="select" label="Extension towards 3 for file 2">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="extention32" type="integer" value="1" label="in file 2" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionColinearOrAntiSens">\n-\t\t\t<param name="OptionCA" type="select" label="Colinear or anti-sens">\n-\t\t\t\t<option value="Colinear">Colinear</option>\n-\t\t\t\t<option value="AntiSens">AntiSens</option>\n-\t\t\t\t<option value="NONE" selected="true">NONE</option>\n-\t\t\t</param>\n-\t\t\t<when value="Colinear">\n-\t\t\t</when>\n-\t\t\t<when value="AntiSens">\n-\t\t\t</when>\n-\t\t\t<when value="NONE">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionDistance">\n-\t\t\t<param name="Dist" type="select" label="Maximum Distance between two reads">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="distance" type="integer" value="0"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionMinOverlap">\n-\t\t\t<param name="MO" type="select" label="Minimum number of overlapping between two reads">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="minOverlap" type="integer" value="1"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\t\t<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match"/>\n-\t\t<param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Report intron"/>\n-\t\t<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="When there is no overlapping, the number of Overlapping will be set to 0 by defalt."/>\n-\t\t\n-\t</inputs>\n-\n-\t<outputs>\n-\t\t<data name="outputFileGff" format="gff3"/>\n-\t</outputs> \n-\t\n-</tool>\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/computeCoverage.xml
--- a/SMART/galaxy/computeCoverage.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,107 +0,0 @@
-<tool id="ComputeCoverage" name="Compute coverage">
-    <description>Compute the coverage of a set with respect to another set.</description>
-    <command interpreter="python">
-        ../Java/Python/ComputeCoverage.py -i $formatType.inputFileName1
- #if $formatType.FormatInputFileName1 == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName1 == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'gtf':
- -f gtf
- #end if
-
- -j $formatType2.inputFileName2
- #if $formatType2.FormatInputFileName2 == 'bed':
- -g bed
- #elif $formatType2.FormatInputFileName2 == 'gff':
- -g gff
- #elif $formatType2.FormatInputFileName2 == 'gff2':
- -g gff2
- #elif $formatType2.FormatInputFileName2 == 'gff3':
- -g gff3
- #elif $formatType2.FormatInputFileName2 == 'sam':
- -g sam
- #elif $formatType2.FormatInputFileName2 == 'gtf':
- -g gtf
- #end if
-
-                $ReportIntron
-                -o $outputFileGff
-
-    </command>
-
-    <inputs>
-        <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="Input File Format 1">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
- </when>
- <when value="gff">
- <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
- </when>
- <when value="gff2">
- <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
- </when>
- <when value="gtf">
- <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
- </when>
- </conditional>
-
- <conditional name="formatType2">
- <param name="FormatInputFileName2" type="select" label="Input File Format 2">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
- </when>
- <when value="gff">
- <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
- </when>
- <when value="gff2">
- <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
- </when>
- <when value="gff3">
- <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
- </when>
- <when value="sam">
- <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
- </when>
- <when value="gtf">
- <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
- </when>
-                </conditional>
-
-                <param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Include introns."/>
-
-        </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3" label="[computeCoverage] OUTPUT file"/>
- </outputs> 
-
-</tool>
-
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/coordinatesToSequence.xml
--- a/SMART/galaxy/coordinatesToSequence.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,62 +0,0 @@
-<tool id="coordinatesToSequence" name="coordinates to sequence">
- <description>Coordinates to Sequences: Extract the sequences from a list of coordinates.</description>
- <command interpreter="python">
- ../Java/Python/coordinatesToSequence.py -i $formatType.inputFileName1
- #if $formatType.FormatInputFileName1 == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName1 == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'gtf':
- -f gtf
- #end if
-
- -s $sequence
- -o $outputFileFasta
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName1" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName1" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName1" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
-      <param name="sequence" type="data" label="Reference fasta File" format="fasta"/>
-
- </inputs>
-
- <outputs>
- <data name="outputFileFasta" format="fasta" label="coordinates to sequences output"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/findTss.xml
--- a/SMART/galaxy/findTss.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,59 +0,0 @@
-<tool id="findTss" name="findTss">
- <description>Find the transcription start site of a list of transcripts.</description>
- <command interpreter="python">
- ../Java/Python/findTss.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #end if
-
-
- -o $outputFileGff 
- $colinear
- $normalize
- -d $distance
- $excel $excelOutput
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="colinear" type="boolean" truevalue="-e" falsevalue="" checked="false" label="colinear option" help="This option clusterizes only the same strand reads"/>
- <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize option for only GFF3 file format" help="This option normalize (Warning!! Only for GFF3 file!!!!!)"/>
- <param name="distance" type="text" value="10" label="distance option" help="Limit the maximum distance between two reads"/>
- <param name="excel" type="boolean" truevalue="-c" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3" label="[findTss] Output File"/>
- <data name="excelOutput" format="csv" label="[findTss] CSV File">
- <filter>excel</filter>
- </data>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getDifference.xml
--- a/SMART/galaxy/getDifference.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,127 +0,0 @@
-<tool id="getDifference" name="get Difference">
- <description>Gets all the regions of the genome, except the one given or get all the elements from the first set which does not ovelap with the second set (at the nucleotide level).</description>
- <command interpreter="python">
- ../Java/Python/getDifference.py -i $formatType.inputFileName1
- #if $formatType.FormatInputFileName1 == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName1 == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'gtf':
- -f gtf
- #end if
-
- -j $formatType2.inputFileName2
- #if $formatType2.FormatInputFileName2 == 'bed':
- -g bed
- #elif $formatType2.FormatInputFileName2 == 'gff':
- -g gff
- #elif $formatType2.FormatInputFileName2 == 'gff2':
- -g gff2
- #elif $formatType2.FormatInputFileName2 == 'gff3':
- -g gff3
- #elif $formatType2.FormatInputFileName2 == 'sam':
- -g sam
- #elif $formatType2.FormatInputFileName2 == 'gtf':
- -g gtf
- #end if
-
-
- $split
-
- #if $OptionSequence.option == "Yes":
- -s $OptionSequence.sequence
- #end if
-
- -o $outputFileGff
-
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="Input File Format 1">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File "/>
- </when>
- <when value="gff">
- <param name="inputFileName1" format="gff" type="data" label="Input File "/>
- </when>
- <when value="gff2">
- <param name="inputFileName1" format="gff2" type="data" label="Input File "/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File "/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File "/>
- </when>
- <when value="gtf">
- <param name="inputFileName1" format="gtf" type="data" label="Input File "/>
- </when>
- </conditional>
-
- <conditional name="formatType2">
- <param name="FormatInputFileName2" type="select" label="Input File Format 2">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName2" format="bed" type="data" label="reference file"/>
- </when>
- <when value="gff">
- <param name="inputFileName2" format="gff" type="data" label="reference file"/>
- </when>
- <when value="gff2">
- <param name="inputFileName2" format="gff2" type="data" label="reference file"/>
- </when>
- <when value="gff3">
- <param name="inputFileName2" format="gff3" type="data" label="reference file"/>
- </when>
- <when value="sam">
- <param name="inputFileName2" format="sam" type="data" label="reference file"/>
- </when>
- <when value="gtf">
- <param name="inputFileName2" format="gtf" type="data" label="reference file"/>
- </when>
- </conditional>
-
- <param name="split" type="boolean" truevalue="-p" falsevalue="" checked="false" label="split option" help="When comparing to a set of genomic coordinates, do not join."/>
-
-
- <conditional name="OptionSequence">
- <param name="option" type="select" label="Compare with a reference fasta file.">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="sequence" type="data" label="Fasta File" format="fasta"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
-
- <outputs>
- <data name="outputFileGff" format="gff3" label="[getDifference]output File."/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getDistance.xml
--- a/SMART/galaxy/getDistance.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,275 +0,0 @@\n-<tool id="GetDistance" name="get distance">\n-\t<description>Give the distances between every data from the first input set and the data from the second input set</description>\n-\t<command interpreter="python">\n-\t\t../Java/Python/getDistance.py -i $formatType.inputFileName1\n-\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n-\t\t\t-f bed\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n-\t\t\t-f gff\t\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n-\t\t\t-f gff2\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n-\t\t\t-f gff3\n-\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n-\t\t\t-f sam\n-\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n-\t\t\t-f gtf\n-\t\t#end if\n-\t\t\t\n-\t\t-j $formatType2.inputFileName2\n-\t\t#if $formatType2.FormatInputFileName2 == \'bed\':\n-\t\t\t-g bed\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gff\':\n-\t\t\t-g gff\t\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gff2\':\n-\t\t\t-g gff2\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gff3\':\n-\t\t\t-g gff3\n-\t\t#elif $formatType2.FormatInputFileName2 == \'sam\':\n-\t\t\t-g sam\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gtf\':\n-\t\t\t-g gtf\n-\t\t#end if\n-\n-\n-\t\t$absolute $proportion\n-\n-\t\t#if $OptionColinearOrAntiSens.OptionCA == "Colinear":\n-\t\t\t-c \n-\t\t#elif $OptionColinearOrAntiSens.OptionCA == \'AntiSens\':\n-\t\t\t-a\n-\t\t#end if\n-\n-\t\t#if $OptionFirstNucl5.FirstNu5 == "Yes":\n-\t\t\t-s $OptionFirstNucl5.first5File1\n-\t\t\t-S $OptionFirstNucl5.first5File2\n-\t\t#end if\t\t\n-\n-\t\t#if $OptionFirstNucl3.FirstNu3 == "Yes":\n-\t\t\t-e $OptionFirstNucl3.first3File1\n-\t\t\t-E $OptionFirstNucl3.first3File2\n-\t\t#end if\n-\n-\t\t#if $OptionMinDistance.MinD == "Yes":\n-\t\t\t-m $OptionMinDistance.minDistance\n-\t\t#end if\n-\n-\t\t#if $OptionMaxDistance.MaxD == "Yes":\n-\t\t\t-M $OptionMaxDistance.maxDistance\n-\t\t#end if\n-\n-\t\t$fivePrime $threePrime $spearMan\n-\n-\t\t#if $OptionBuckets.OBuckets == "Yes":\n-\t\t\t-u $OptionBuckets.buckets\n-\t\t#end if\n-\n-\t\t#if $OptionMinXaxis.MinX == "Yes":\n-\t\t\t-x $OptionMinXaxis.minXaxis\n-\t\t#end if\n-\n-\t\t#if $OptionMaxXaxis.MaxX == "Yes":\n-\t\t\t-X $OptionMaxXaxis.maxXaxis\n-\t\t#end if\n-\n-\t\t#if $OptionTitle.OTitle == "Yes":\n-\t\t\t-t $OptionTitle.title\n-\t\t#end if\n-\t\t\n-\t\t-o $outputFilePng\n-\t\t$outputDistance $outputFileDistance\n-\n-\t</command>\n-\n-\t<inputs>\n-\t\t<conditional name="formatType">\n-\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n-\t\t\t\t<option value="bed">bed</option>\n-\t\t\t\t<option value="gff">gff</option>\n-\t\t\t\t<option value="gff2">gff2</option>\n-\t\t\t\t<option value="gff3">gff3</option>\n-\t\t\t\t<option value="sam">sam</option>\n-\t\t\t\t<option value="gtf">gtf</option>\n-\t\t\t</param>\n-\t\t\t<when value="bed">\n-\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff">\n-\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff2">\n-\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff3">\n-\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="sam">\n-\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gtf">\n-\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="formatType2">\n-\t\t\t<param name="FormatInputFileName2" type="select" label="Input File Format 2">\n-\t\t\t\t<option value="bed">bed</option>\n-\t\t\t\t<option value="gff">gff</option>\n-\t\t\t\t<option value="gff2">gff2</option>\n-\t\t\t\t<option value="gff3">gff3</option>\n-\t\t\t\t<option value="sam">sam</option>\n-\t\t\t\t<option value="gtf">gtf</option>\n-\t\t\t</param>\n-\t\t\t<when value="bed">\n-\t\t\t\t<param name="inputFileName2" format="bed" type="data" label="Input File 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff">\n-\t\t\t\t<param name="inputFileName2" format="gff" type="data" label="Input File 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff2">\n-\t\t\t\t<param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>\n-\t\t\t</when>'..b'ption>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="first5File1" type="integer" value="1" label="in file 1" help="Be Careful! The value must be upper than 0"/>\n-\t\t\t\t<param name="first5File2" type="integer" value="1" label="in file 2" help="Be Careful! The value must be upper than 0"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionFirstNucl3">\n-\t\t\t<param name="FirstNu3" type="select" label="only consider the n first 3\' nucleotides for input files">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="first3File1" type="integer" value="1" label="in file 1" help="Be Careful! The value must be upper than 0"/>\n-\t\t\t\t<param name="first3File2" type="integer" value="1" label="in file 2" help="Be Careful! The value must be upper than 0"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionMinDistance">\n-\t\t\t<param name="MinD" type="select" label="minimum distance considered between two transcripts">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="minDistance" type="integer" value="1"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionMaxDistance">\n-\t\t\t<param name="MaxD" type="select" label="maximum distance considered between two transcripts">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="maxDistance" type="integer" value="1000"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<param name="fivePrime" type="boolean" truevalue="-5" falsevalue="" checked="false" label="five prime option" help="Consider the elements from input file 1 which are upstream of elements of input file 2"/>\n-\t\t<param name="threePrime" type="boolean" truevalue="-3" falsevalue="" checked="false" label="three prime option" help="Consider the elements from input file1 which are downstream of elements of input file 2"/>\n-\t\t<param name="spearMan" type="boolean" truevalue="-r" falsevalue="" checked="false" label="spearman option" help="Compute Spearman rho."/>\n-\n-\n-\t\t<conditional name="OptionBuckets">\n-\t\t\t<param name="OBuckets" type="select" label="Plots histogram instead of line plot with given interval size.">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="buckets" type="integer" value="1" label="Interval size"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionMinXaxis">\n-\t\t\t<param name="MinX" type="select" label="Minimum value on the x-axis to plot.">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="minXaxis" type="integer" value="1"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionMaxXaxis">\n-\t\t\t<param name="MaxX" type="select" label="Maximum value on the x-axis to plot.">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="maxXaxis" type="integer" value="1"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="OptionTitle">\n-\t\t\t<param name="OTitle" type="select" label="Title for the graph.">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="title" type="text" value=""/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t</inputs>\n-\n-\n-\t<outputs>\n-\t\t<data name="outputFilePng" format="png"/>\n-\t\t<data name="outputFileDistance" format="gff3">\n-\t\t\t<filter>outputDistance</filter>\n-\t\t</data>\n-\t</outputs> \n-\n-</tool>\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getDistribution.xml
--- a/SMART/galaxy/getDistribution.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,248 +0,0 @@
-<tool id="getDistribution" name="get distribution">
- <description>Get Distribution: Get the distribution of the genomic coordinates on a genome.</description>
- <command interpreter="python">
- ../Java/Python/GetDistribution.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'csv':
- -f csv
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
-
- -r $refFile
-
- #if $optionNbBin.Nb == 'Yes':
- -b $optionNbBin.nbBins
- #end if
-
- #if $optionStart.start == 'Yes':
- -s $optionStart.startValue
- #end if
-
- #if $optionEnd.end == 'Yes':
- -e $optionEnd.endValue
- #end if
-
- #if $optionHeight.height == 'Yes':
- -H $optionHeight.heightValue
- #end if
-
- #if $optionWidth.width == 'Yes':
- -W $optionWidth.widthValue
- #end if
-
- #if $optionYMin.YMin == 'Yes':
- -y $optionYMin.YMinValue
- #end if
-
- #if $optionYMax.YMax == 'Yes':
- -Y $optionYMax.YMaxValue
- #end if
-
- #if $optionChrom.chrom == 'Yes':
- -c $optionChrom.chromValue
- #end if
-
- #if $optionColor.color == 'Yes':
- -l $optionColor.colorValue
- #end if
-
- $bothStrands
- $average
- $normalize
- $csv $outputCSV
- $gff $outputGFF
- -m
- -o $outputFile
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="csv">csv</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="csv">
- <param name="inputFileName" format="csv" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="refFile" format="fasta" type="data" label="reference genome file"/>
-
- <conditional name="optionNbBin">
- <param name="Nb" type="select" label="number of bins">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="nbBins" type="integer" value="1000" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionStart">
- <param name="start" type="select" label="start from a given region">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="startValue" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionEnd">
- <param name="end" type="select" label="end from a given region">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="endValue" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionHeight">
- <param name="height" type="select" label="height of the graphics">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="heightValue" type="integer" value="300" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionWidth">
- <param name="width" type="select" label="width of the graphics">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="widthValue" type="integer" value="1000" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionYMin">
- <param name="YMin" type="select" label="minimum value on the y-axis to plot">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="YMinValue" type="integer" value="1000" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionYMax">
- <param name="YMax" type="select" label="maximum value on the y-axis to plot">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="YMaxValue" type="integer" value="1000" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionChrom">
- <param name="chrom" type="select" label="plot only one given chromosome">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="chromValue" type="text" value="chromName" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionColor">
- <param name="color" type="select" label="color of the lines (separated by commas and no space)">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="colorValue" type="text" value="red,blue" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
-
- <param name="bothStrands" type="boolean" truevalue="-2" falsevalue="" checked="false" label="plot one curve per strand"/>
- <param name="average" type="boolean" truevalue="-a" falsevalue="" checked="false" label="plot plot average (instead of sum)"/>
-
- <conditional name="optionNames">
- <param name="names" type="select" label="name for the tags (separated by commas and no space)">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="namesValue" type="text" value="nbElements" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <param name="normalize" type="boolean" truevalue="-z" falsevalue="" checked="false" label="normalize data (when panels are different)"/>
- <param name="csv" type="boolean" truevalue="-x" falsevalue="" checked="false" label="write a .csv file."/>
- <param name="gff" type="boolean" truevalue="-g" falsevalue="" checked="false" label="write a .gff file."/>
- </inputs>
-
- <outputs>
- <data name="outputFile" format="png" label="[getDistribution] out png file"/>
- <data name="outputCSV" format="csv" label="[getDistribution] output csv file">
- <filter>csv</filter>
- </data>
-
- <data name="outputGFF" format="gff" label="[getDistribution] output gff file">
- <filter>gff</filter>
- </data>
- </outputs> 
-
-    <help>
-        This script gives a .tar out file, if you want to take look at the results, you have to download it.
-    </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getExons.xml
--- a/SMART/galaxy/getExons.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,80 +0,0 @@
-<tool id="getExons" name="get exons">
-    <description>Get the exons of a set of transcripts.</description>
-    <command interpreter="python">
- ../Java/Python/getExons.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
-
- #if $optionSelect.Value == "Yes":
- -s $optionSelect.selectValue
- #end if
-
- -o $outputFileGff 
- </command>
-
-    <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <conditional name="optionSelect">
- <param name="Value" type="select" label="select some of the exons (like '1,2,5..-3,-1')">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="selectValue" type="text" value="None" label="select option" help="like '1,2,5..-3,-1'"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-    </inputs>
-        
-    <outputs>
-        <data format="gff3" name="outputFileGff" label="[getExons -> gff3] Output File"/>       
-    </outputs> 
-<tests>
-    <test>
-      <param name="FormatInputFileName" value="gtf" />
-      <param name="inputFileName" value="genes.gtf" />
-      <param name="Value" value="No"/>
-      <output name="outputFileGff" file="exp_getExons.gff3" />
-    </test>
-</tests>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getIntrons.xml
--- a/SMART/galaxy/getIntrons.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,64 +0,0 @@
-<tool id="getIntrons" name="get introns">
-    <description>Get the introns of a set of transcripts.</description>
-    <command interpreter="python">
- ../Java/Python/getIntrons.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
- -o $outputFileGff 
- </command>
-
-    <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
-    </inputs>
-        
-    <outputs>
-        <data format="gff3" name="outputFileGff" label="[getIntrons -> gff3] Output File"/>       
-    </outputs> 
-<tests>
-    <test>
-      <param name="FormatInputFileName" value="gtf" />
-      <param name="inputFileName" value="genes.gtf" />
-      <output name="outputFileGff" file="exp_getIntrons.gff3" />
-    </test>
-  </tests>
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getNb.xml
--- a/SMART/galaxy/getNb.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,78 +0,0 @@
-<tool id="getNumber" name="get number">
- <description>Get the distribution of exons per transcripts, or mapping per read, or transcript per cluster.</description>
- <command interpreter="python">
- ../Java/Python/getNb.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
- -o $outputFilePNG
- -q $query
- $barPlot
- #if $optionXMAX.XMAX == 'Yes':
- -x $optionXMAX.xMaxValue
- #end if
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="query" type="text" value="None" label="compulsory option, choice (exon, transcript, cluster)" />
- <param name="barPlot" type="boolean" truevalue="-b" falsevalue="" checked="false" label="use barplot representation"/>
-
- <conditional name="optionXMAX">
- <param name="XMAX" type="select" label="maximum value on the x-axis to plot ">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="xMaxValue" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
- <outputs>
- <data name="outputFilePNG" format="png" label="[getNB]out file"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getReadDistribution.xml
--- a/SMART/galaxy/getReadDistribution.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,67 +0,0 @@
-<tool id="getReadDistribution" name="get read distribution">
- <description>Get Read Distribution v1.0.1: Plot the number of identical reads and give the most represented.</description>
- <command interpreter="python">
- ../Java/Python/WrappGetReadDistribution.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'fasta':
- -f fasta
- #elif $formatType.FormatInputFileName == 'fastq':
- -f fastq
- #end if
-
- #if $optionnumber.number == 'Yes':
- -n $optionnumber.bestNumber
- #end if
- #if $optionpercent.percent == 'Yes':
- -p $optionpercent.percentage
- #end if
- -o $outputFile
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Sequence input File Format ">
- <option value="fasta">fasta</option>
- <option value="fastq">fastq</option>
- </param>
- <when value="fasta">
- <param name="inputFileName" format="fasta" type="data" label="Sequence input File"/>
- </when>
- <when value="fastq">
- <param name="inputFileName" format="fastq" type="data" label="Sequence input File"/>
- </when>
- </conditional>
-
- <conditional name="optionnumber">
- <param name="number" type="select" label="keep the best n">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="bestNumber" type="integer" value="0"  />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionpercent">
- <param name="percent" type="select" label="keep the best n percentage">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="percentage" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
- <outputs>
- <data name="outputFile" format="tar" label="[getReadDistribution] tar out file" help="You can not see the results directly from galaxy, but you can download this tar output file."/>
- </outputs> 
-
-    <help>
-        This script gives a .tar out file, if you want to take look at the results, you have to download it.
-    </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getSequence.xml
--- a/SMART/galaxy/getSequence.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,21 +0,0 @@
-<tool id="getSequence" name="get sequence">
-  <description>Get a single sequence in a FASTA file.</description>
-  <command interpreter="python"> ../Java/Python/getSequence.py -i $inputFile 
- -n $name
-   -o $outputFile  
-  
-  </command>
-  
-  
-  <inputs>
-    <param name="inputFile" type="data" label="Input fasta File" format="fasta"/>
-   <param name="name" type="text" value="None" label="name of the sequence [compulsory option]"/>
-  </inputs>
-
-  <outputs>
-    <data format="fasta" name="outputFile" label="[getSequence] Output File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getSizes.xml
--- a/SMART/galaxy/getSizes.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,135 +0,0 @@
-<tool id="GetSizes" name="get sizes">
- <description>Get the sizes of a set of genomic coordinates.</description>
- <command interpreter="python">
- ../Java/Python/getSizes.py -i $formatType.inputFileName $formatType.FormatInputFileName
-
- #if $OptionQuery.OptionQ == 'NONE':
- -q size
- #else:
- $OptionQuery.OptionQ
- #end if
-
- -o $outputFile
-
- #if $OptionXMax.xMax == "Yes":
- -x $OptionXMax.maxValue
- #end if
- #if $OptionX.xLab == "Yes":
-         -a $OptionX.xLabValue
- #end if
-                #if $OptionY.yLab == "Yes":
-         -b $OptionY.yLabValue
- #end if
- $barPlot
- $excel $excelOutput
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="-f bed">bed</option>
- <option value="-f gff">gff</option>
- <option value="-f gff2">gff2</option>
- <option value="-f gff3">gff3</option>
- <option value="-f sam">sam</option>
- <option value="-f gtf">gtf</option>
- <option value="-f fasta">fasta</option>
- <option value="-f fastq">fastq</option>
- </param>
- <when value="-f bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="-f gff">
- <param name="inputFileName" format="gff" type="data" label="Input gff File"/>
- </when>
- <when value="-f gff2">
- <param name="inputFileName" format="gff" type="data" label="Input gff2 File"/>
- </when>
- <when value="-f gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input gff3 File"/>
- </when>
- <when value="-f sam">
- <param name="inputFileName" format="sam" type="data" label="Input gff2 File"/>
- </when>
- <when value="-f gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input gff3 File"/>
- </when>
- <when value="-f fasta">
- <param name="inputFileName" format="fasta" type="data" label="Input fasta File"/>
- </when>
- <when value="-f fastq">
- <param name="inputFileName" format="fastq" type="data" label="Input fastq File"/>
- </when>
- </conditional>
-
- <conditional name="OptionQuery">
- <param name="OptionQ" type="select" label="mesure type">
- <option value="-q size">size</option>
- <option value="-q intron size">intron size</option>
- <option value="-q exon size">exon size</option>
- <option value="-q 1st exon size">1st exon size</option>
- <option value="NONE" selected="true">NONE</option>
- </param>
- <when value="-q size">
- </when>
- <when value="-q intron size">
- </when>
- <when value="-q exon size">
- </when>
- <when value="-q 1st exon size">
- </when>
- <when value="NONE">
-
- </when>
- </conditional>
-
- <conditional name="OptionXMax">
- <param name="xMax" type="select" label="maximum value on the x-axis to plot [format: int]">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="maxValue" type="integer" value="1000"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionX">
-  <param name="xLab" type="select" label="X label title">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="xLabValue" type="text" value="Size" label="Notice: The title should not have spaces. EX. Size_of_transcript"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionY">
- <param name="yLab" type="select" label="Y label title">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="yLabValue" type="text" value="#_reads" label="Notice: The title should not have spaces. EX. Number_of_reads"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
-
-
- <param name="barPlot" type="boolean" truevalue="-B" falsevalue="" checked="false" label="use barplot representation"/>
-
- <param name="excel" type="boolean" truevalue="-c" falsevalue="" checked="false" label="excel option" help="This option creates a csv file."/>
- </inputs>
-
- <outputs>
- <data name="outputFile" format="png" label="[Get sizes] output file"/>
- <data name="excelOutput" format="csv">
- <filter>excel</filter>
- </data>
- </outputs>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getWigData.xml
--- a/SMART/galaxy/getWigData.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,17 +0,0 @@
-<tool id="getWigData" name="get wig data">
-    <description>Compute the average data for some genomic coordinates using WIG files</description>
-    <command interpreter="python">
- ../Java/Python/getWigData.py -i $inputGff3File -f gff3 -w $inputWigFile -t $tagName -$strand -o $outputFile
- </command>
-
-    <inputs>
-     <param name="inputGff3File" type="data" label="Input Gff3 File (compulsory option)" format="gff3"/>
-    <param name="inputWigFile" type="data" label="Input Wig File (compulsory option)" format="wig"/>
- <param name="tagName" type="text" value="None" label="tag option (compulsory option)" help="choose a tag name to write the wig information to output file."/>
- <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately."/>    
-    </inputs>
-        
-    <outputs>
-        <data format="gff3" name="outputFile" label="[getWigData -> gff3] Output File"/>       
-    </outputs> 
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getWigDistance.xml
--- a/SMART/galaxy/getWigDistance.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,17 +0,0 @@
-<tool id="getWigDistance" name="get wig distance">
-    <description>Compute the average data around some genomic coordinates using WIG files (thus covering a large proportion of the genome).</description>
-    <command interpreter="python">
- ../Java/Python/getWigDistance.py -i $inputGff3File -f gff3 -w $inputWigFile -a 0.0 -d $distance $strand -o $outputFile
- </command>
-
-    <inputs>
-     <param name="inputGff3File" type="data" label="Input Gff3 File (compulsory option)" format="gff3"/>
-    <param name="inputWigFile" type="data" label="Input Wig File (compulsory option)" format="wig"/>
- <param name="distance" type="integer" value="1000" label="distance option (compulsory option)" help="Distance around position.Be Careful! The value must be upper than 0"/>
- <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately."/>    
-    </inputs>
-        
-    <outputs>
-        <data name="outputFile" format="png" label="[getWigDistance] PNG output File"/>    
-    </outputs> 
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/getWigProfile.xml
--- a/SMART/galaxy/getWigProfile.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,70 +0,0 @@
-<tool id="getWigProfile" name="get wig profile">
- <description>Compute the average profile of some genomic coordinates using WIG files (thus covering a large proportion of the genome).</description>
- <command interpreter="python">
- ../Java/Python/getWigProfile.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #end if
- -w $inputWigFile
- -p $nbPoints
- -d $distance
- $strands
- -o $outputFilePNG
- #if $optionSMO.SMO == 'Yes':
- -m $optionSMO.smoothen
- #end if
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="inputWigFile" type="data" label="Input Wig File" format="wig"/>
- <param name="nbPoints" type="integer" value="1000" label="number of points on the x-axis"/>
- <param name="distance" type="integer" value="0" label="distance around genomic coordinates"/>
- <param name="strands" type="boolean" truevalue="-s" falsevalue="" checked="false" label="consider both strands separately"/>
-
- <conditional name="optionSMO">
- <param name="SMO" type="select" label="smoothen the curve">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="smoothen" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
- <outputs>
- <data name="outputFilePNG" format="png" label="[getWigProfile]out file"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/mapperAnalyzer.xml
--- a/SMART/galaxy/mapperAnalyzer.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,186 +0,0 @@
-<tool id="mapperAnalyzer" name="mapper analyzer">
- <description>Read the output of an aligner, print statistics and possibly translate into BED or GBrowse formats. </description>
- <command interpreter="python">
- ../Java/Python/mapperAnalyzer.py -i $formatType.inputFileName1
- #if $formatType.FormatInputFileName1 == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'bam':
- -f bam
- #elif $formatType.FormatInputFileName1 == 'seqmap':
- -f seqmap
- #end if
-
- -q $formatType2.inputFileName2
- #if $formatType2.FormatInputFileName2 == 'fasta':
- -k fasta
- #elif $formatType2.FormatInputFileName2 == 'fastq':
- -k fastq
- #end if
-
-
- #if $optionnumber.number == 'Yes':
- -n $optionnumber.numberVal
- #end if
- #if $optionsize.size == 'Yes':
- -s $optionsize.sizeVal
- #end if
- #if $optionidentity.identity == 'Yes':
- -d $optionidentity.identityVal
- #end if
- #if $optionmismatch.mismatch == 'Yes':
- -m $optionmismatch.mismatchVal
- #end if
- #if $optiongap.gap == 'Yes':
- -p $optiongap.gapVal
- #end if
- #if $optiontitle.title == 'Yes':
- -t $optiontitle.titleVal
- #end if
- #if $optionappend.append == 'Yes':
- -a $optionappend.appendfile
- #end if
-
- $merge
- $remove
- $remain
- -o $outputFileGFF
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="Input File mapping Format">
- <option value="bed">bed</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="bam">bam</option>
- <option value="seqmap" selected="true">seqmap</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File"/>
- </when>
- <when value="bam">
- <param name="inputFileName1" format="bam" type="data" label="Input File"/>
- </when>
- <when value="seqmap">
- <param name="inputFileName1" format="seqmap" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <conditional name="formatType2">
- <param name="FormatInputFileName2" type="select" label="Reference sequence File Format">
- <option value="fasta" selected="true">fasta</option>
- <option value="fastq">fastq</option>
- </param>
- <when value="fasta">
- <param name="inputFileName2" format="fasta" type="data" label="Reference sequence File Format"/>
- </when>
- <when value="fastq">
- <param name="inputFileName2" format="fastq" type="data" label="Reference sequence File Format"/>
- </when>
- </conditional>
-
-
- <conditional name="optionnumber">
- <param name="number" type="select" label="max. number of occurrences of a sequence">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="numberVal" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionsize">
- <param name="size" type="select" label="minimum pourcentage of size ">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="sizeVal" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionidentity">
- <param name="identity" type="select" label="minimum pourcentage of identity ">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="identityVal" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionmismatch">
- <param name="mismatch" type="select" label="maximum number of mismatches">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="mismatchVal" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optiongap">
- <param name="gap" type="select" label="maximum number of gaps">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="gapVal" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optiontitle">
- <param name="title" type="select" label="title of the plots ">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="titleVal" type="text" value="title of the UCSC track" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionappend">
- <param name="append" type="select" label="append to GFF3 file">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="appendfile" type="data" format="gff3" label="append a file"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <param name="merge" type="boolean" truevalue="-e" falsevalue="" checked="false" label="merge exons when introns are short "/>
- <param name="remove" type="boolean" truevalue="-x" falsevalue="" checked="false" label="remove transcripts when exons are short"/>
- <param name="remain" type="boolean" truevalue="-r" falsevalue="" checked="false" label="print the unmatched sequences "/>
- </inputs>
-
- <outputs>
- <data name="outputFileGFF" format="gff3" label="[mapperAnalyzer] out file"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/mappingToCoordinates.xml
--- a/SMART/galaxy/mappingToCoordinates.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,57 +0,0 @@
-<tool id="mappingToCoordinates" name="mapping to coordinates">
- <description>Converts a mapping type file(given by a mapping tool) to a GFF3 type file.</description>
- <command interpreter="python">
- ../Java/Python/mappingToCoordinates.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'blast -8'
- -f blast
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #end if
-
- -o $outputFileGff 
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="sam">sam</option>
- <option value="blast -8">blast</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="blast -8">
- <param name="inputFileName" format="blast" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- </conditional>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/mergeSlidingWindowsClusters.xml
--- a/SMART/galaxy/mergeSlidingWindowsClusters.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,103 +0,0 @@
-<tool id="mergeSlidingWindowsClusters" name="merge sliding windows clusters">
- <description>Merges two files containing the results of a sliding windows clustering.</description>
- <command interpreter="python">
- ../Java/Python/mergeSlidingWindowsClusters.py -i $formatType.inputFileName1
- #if $formatType.FormatInputFileName1 == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName1 == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'gtf':
- -f gtf
- #end if
-
- -j $formatType2.inputFileName2
- #if $formatType2.FormatInputFileName2 == 'bed':
- -g bed
- #elif $formatType2.FormatInputFileName2 == 'gff':
- -g gff
- #elif $formatType2.FormatInputFileName2 == 'gff2':
- -g gff2
- #elif $formatType2.FormatInputFileName2 == 'gff3':
- -g gff3
- #elif $formatType2.FormatInputFileName2 == 'sam':
- -g sam
- #elif $formatType2.FormatInputFileName2 == 'gtf':
- -g gtf
- #end if
-
- -o $outputFileGff 
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="Input File Format 1">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
- </when>
- <when value="gff">
- <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
- </when>
- <when value="gff2">
- <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
- </when>
- <when value="gtf">
- <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
- </when>
- </conditional>
-
- <conditional name="formatType2">
- <param name="FormatInputFileName2" type="select" label="Input File Format 2">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
- </when>
- <when value="gff">
- <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
- </when>
- <when value="gff2">
- <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
- </when>
- <when value="gff3">
- <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
- </when>
- <when value="sam">
- <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
- </when>
- <when value="gtf">
- <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
- </when>
- </conditional>
-
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/mergeTranscriptLists.xml
--- a/SMART/galaxy/mergeTranscriptLists.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,148 +0,0 @@
-<tool id="mergeTranscriptLists" name="merge transcript lists">
- <description>Merge the elements of two lists of genomic coordinates.</description>
- <command interpreter="python">
- ../Java/Python/mergeTranscriptLists.py -i $formatType.inputFileName1
- #if $formatType.FormatInputFileName1 == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName1 == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName1 == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName1 == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName1 == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName1 == 'gtf':
- -f gtf
- #end if
-
- -j $formatType2.inputFileName2
- #if $formatType2.FormatInputFileName2 == 'bed':
- -g bed
- #elif $formatType2.FormatInputFileName2 == 'gff':
- -g gff
- #elif $formatType2.FormatInputFileName2 == 'gff2':
- -g gff2
- #elif $formatType2.FormatInputFileName2 == 'gff3':
- -g gff3
- #elif $formatType2.FormatInputFileName2 == 'sam':
- -g sam
- #elif $formatType2.FormatInputFileName2 == 'gtf':
- -g gtf
- #end if
-
- $all
- $normalize
-
- #if $OptionDistance.dis == 'Yes':
- -d $OptionDistance.disVal
- #end if
-
- #if $OptionColinearOrAntiSens.OptionCA == 'Colinear':
- -c 
- #elif $OptionColinearOrAntiSens.OptionCA == 'AntiSens':
- -a
- #end if
-
- -o $outputFileGff 
-
-
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName1" type="select" label="Input File Format 1">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
- </when>
- <when value="gff">
- <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
- </when>
- <when value="gff2">
- <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
- </when>
- <when value="gff3">
- <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
- </when>
- <when value="sam">
- <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
- </when>
- <when value="gtf">
- <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
- </when>
- </conditional>
-
- <conditional name="formatType2">
- <param name="FormatInputFileName2" type="select" label="Input File Format 2">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
- </when>
- <when value="gff">
- <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
- </when>
- <when value="gff2">
- <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
- </when>
- <when value="gff3">
- <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
- </when>
- <when value="sam">
- <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
- </when>
- <when value="gtf">
- <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
- </when>
- </conditional>
-
-
- <param name="all" type="boolean" truevalue="-k" falsevalue="" checked="false" label="print all the transcripts, not only those overlapping"/>
- <param name="normalize" type="boolean" truevalue="-n" falsevalue="" checked="false" label="normalize the number of reads per cluster by the number of mappings per read "/>
-
- <conditional name="OptionDistance">
- <param name="dis" type="select" label="provide the number of reads" >
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="disVal" type="integer" value="0" label="max. distance between two transcripts" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionColinearOrAntiSens">
- <param name="OptionCA" type="select" label="Colinear or anti-sens">
- <option value="Colinear">Colinear</option>
- <option value="AntiSens">AntiSens</option>
- <option value="NONE" selected="true">NONE</option>
- </param>
- <when value="Colinear">
- </when>
- <when value="AntiSens">
- </when>
- <when value="NONE">
- </when>
- </conditional>
-
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3" label="[mergeTranscriptLists]out file"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/modifyFasta.xml
--- a/SMART/galaxy/modifyFasta.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,49 +0,0 @@
-<tool id="modifyFasta" name="modify fasta">
-  <description>Extend or shring a list of sequences.</description>
-  <command interpreter="python"> ../Java/Python/modifyFasta.py -i $inputFile 
-   #if $OptionStart.start == "Yes":
- -s $OptionStart.startValue
-   #end if
-  
-   #if $OptionEnd.end == "Yes":
- -e $OptionEnd.endValue
-   #end if
-   -o $outputFile  
-  
-  </command>
-  
-  
-  <inputs>
-    <param name="inputFile" type="data" label="Input fasta File" format="fasta"/>
- <conditional name="OptionStart">
- <param name="start" type="select" label="keep first nucleotides">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="startValue" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionEnd">
- <param name="end" type="select" label="keep last nucleotides">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="endValue" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>   
-  </inputs>
-
-  <outputs>
-    <data format="fasta" name="outputFile" label="[modifyFasta] Output File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/modifyGenomicCoordinates.xml
--- a/SMART/galaxy/modifyGenomicCoordinates.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,126 +0,0 @@
-<tool id="modifyGenomicCoordinates" name="modify genomic coordinates">
-  <description>Extend or shrink a list of genomic coordinates.</description>
-  <command interpreter="python"> ../Java/Python/modifyGenomicCoordinates.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
-
-   #if $OptionStart.start == "Yes":
- -s $OptionStart.startValue
-   #end if
-  
-   #if $OptionEnd.end == "Yes":
- -e $OptionEnd.endValue
-   #end if
-  
-   #if $OptionFivePrim.five == "Yes":
- -5 $OptionFivePrim.fivePValue
-   #end if
-  
-   #if $OptionTroisP.TroisP == "Yes":
- -3 $OptionTroisP.ThreePValue
-   #end if
-  
-   -o $outputFile  
-  </command>
-  
-  
-  <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-  
- <conditional name="OptionStart">
- <param name="start" type="select" label="restrict to the start of the transcript">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="startValue" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionEnd">
- <param name="end" type="select" label="restrict to the end of the transcript">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="endValue" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>    
-
-
- <conditional name="OptionFivePrim">
- <param name="five" type="select" label="extend to the 5' direction">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="fivePValue" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>  
-
- <conditional name="OptionTroisP">
- <param name="TroisP" type="select" label="extend to the 3' direction">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="ThreePValue" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
-  
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[modifyGenomicCoordinates] Output File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/modifySequenceList.xml
--- a/SMART/galaxy/modifySequenceList.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,46 +0,0 @@
-<tool id="modifySequenceList" name="modify sequence list">
-  <description>Extend or shring a list of sequences. </description>
-  <command interpreter="python"> ../Java/Python/modifySequenceList.py -i $inputFile -f fasta
- #if $OptionStart.Start == "Yes":
- -s $OptionStart.StartVal
- #end if
- #if $OptionEnd.End == "Yes":
- -e $OptionEnd.EndVal
- #end if
-   -o $outputFile  
-  </command>
-  
-  
-  <inputs>
- <param name="inputFile" type="data" format="fasta" label="input file"/>
-
- <conditional name="OptionStart">
- <param name="Start" type="select" label="keep first nucleotides">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="StartVal" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionEnd">
- <param name="End" type="select" label="keep last nucleotides">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="EndVal" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-  </inputs>
-
-  <outputs>
-     <data format="fasta" name="outputFile" label="[modifySequenceList] Output File"/>
-  </outputs>
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/plot.xml
--- a/SMART/galaxy/plot.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,71 +0,0 @@
-<tool id="plot" name="Plot">
- <description>Plot some information from a list of transcripts.</description>
- <command interpreter="python">
- ../Java/Python/plot.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #end if
-
- -x $xLabel
-
-                -y $yLabel
-
-         -X $XVal
-                -Y $YVal
-
-         #if $optionLog.log == 'Yes' :
-     -l $optionLog.logOnAxisLabel
-                #end if
-                
-                -s $shape
- -o $outputFile
-
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- </param>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="xLabel" type="text" value="value1" label="x label option" help="Choose one of the tags of 9th column in GFF file to be plotted as X-axis. Warning: You can only choose the tag value is digital."/>
-                <param name="yLabel" type="text" value="value2" label="y label option" help="Choose one of the tags of 9th column in GFF file to be plotted as Y-axis. You can only choose the tag value is digital."/>
-                <param name="XVal" type="float" value="0.0" label="value for x when tag is not present "/>
-
- <param name="YVal" type="float" value="0.0" label="value for y when tag is not present"/>
-
-
-                <conditional name="optionLog">
- <param name="log" type="select" label="calculate log option" help="use log on x- or y-axis (write 'x', 'y' or 'xy')">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="logOnAxisLabel" type="text" value="y" label="use log on x- or y-axis (write 'x', 'y' or 'xy')"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-                <param name="shape" type="text" value="barplot" label="shape of the plot [format: choice (barplot, line, points, heatPoints)]"/>
- </inputs>
-
- <outputs>
- <data name="outputFile" format="png" label="[plot] Output file"/>
- </outputs>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/plotCoverage.xml
--- a/SMART/galaxy/plotCoverage.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,265 +0,0 @@\n-<tool id="plotCoverage" name="plot coverage">\n-\t<description>Plot the coverage of the first data with respect to the second one.</description>\n-\t<command interpreter="python">\n-\t\t../Java/Python/WrappPlotCoverage.py -i $formatType.inputFileName1\n-\t\t#if $formatType.FormatInputFileName1 == \'bed\':\n-\t\t\t-f bed\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff\':\n-\t\t\t-f gff\t\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff2\':\n-\t\t\t-f gff2\n-\t\t#elif $formatType.FormatInputFileName1 == \'gff3\':\n-\t\t\t-f gff3\n-\t\t#elif $formatType.FormatInputFileName1 == \'sam\':\n-\t\t\t-f sam\n-\t\t#elif $formatType.FormatInputFileName1 == \'gtf\':\n-\t\t\t-f gtf\n-\t\t#end if\n-\t\t\t\n-\t\t-j $formatType2.inputFileName2\n-\t\t#if $formatType2.FormatInputFileName2 == \'bed\':\n-\t\t\t-g bed\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gff\':\n-\t\t\t-g gff\t\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gff2\':\n-\t\t\t-g gff2\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gff3\':\n-\t\t\t-g gff3\n-\t\t#elif $formatType2.FormatInputFileName2 == \'sam\':\n-\t\t\t-g sam\n-\t\t#elif $formatType2.FormatInputFileName2 == \'gtf\':\n-\t\t\t-g gtf\n-\t\t#end if\n-\n-\n-\t\t#if $optionRef.Ref == \'Yes\':\n-\t\t\t-q $optionRef.inputSequenceFile\n-\t\t#end if\n-\n-\t\t#if $optionwidth.width == \'Yes\':\n-\t\t\t-w $optionwidth.widthVal\n-\t\t#end if\n-\t\t#if $optionheight.height == \'Yes\':\n-\t\t\t-e $optionheight.heightVal\n-\t\t#end if\n-\t\t#if $optionXlab.Xlab == \'Yes\':\n-\t\t\t-x $optionXlab.XlabVal\n-\t\t#end if\n-\t\t#if $optionYlab.Ylab == \'Yes\':\n-\t\t\t-y $optionYlab.YlabVal\n-\t\t#end if\n-\t\t#if $optiontitle.title == \'Yes\':\n-\t\t\t-t $optiontitle.titleVal\n-\t\t#end if\t\n-\t\n-\t\t#if $optionplusColor.plusColor == \'Yes\':\n-\t\t\t-p $optionplusColor.plusColorVal\n-\t\t#end if\n-\t\t#if $optionminusColor.minusColor == \'Yes\':\n-\t\t\t-m $optionminusColor.minusColorVal\n-\t\t#end if\n-\n-\t\t#if $optionsumColor.sumColor == \'Yes\':\n-\t\t\t-s $optionsumColor.sumColorVal\n-\t\t#end if\n-\t\t#if $optionlineColor.lineColor == \'Yes\':\n-\t\t\t-l $optionlineColor.lineColorVal\n-\t\t#end if\t\n-\n-\t\t$merge\n-\t\t-o $outputFile\n-\t</command>\n-\n-\t<inputs>\n-\t\t<conditional name="formatType">\n-\t\t\t<param name="FormatInputFileName1" type="select" label="Input File Format 1">\n-\t\t\t\t<option value="bed">bed</option>\n-\t\t\t\t<option value="gff">gff</option>\n-\t\t\t\t<option value="gff2">gff2</option>\n-\t\t\t\t<option value="gff3">gff3</option>\n-\t\t\t\t<option value="sam">sam</option>\n-\t\t\t\t<option value="gtf">gtf</option>\n-\t\t\t</param>\n-\t\t\t<when value="bed">\n-\t\t\t\t<param name="inputFileName1" format="bed" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff">\n-\t\t\t\t<param name="inputFileName1" format="gff" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff2">\n-\t\t\t\t<param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff3">\n-\t\t\t\t<param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="sam">\n-\t\t\t\t<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t\t<when value="gtf">\n-\t\t\t\t<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="formatType2">\n-\t\t\t<param name="FormatInputFileName2" type="select" label="Input File Format 2">\n-\t\t\t\t<option value="bed">bed</option>\n-\t\t\t\t<option value="gff">gff</option>\n-\t\t\t\t<option value="gff2">gff2</option>\n-\t\t\t\t<option value="gff3">gff3</option>\n-\t\t\t\t<option value="gff2">sam</option>\n-\t\t\t\t<option value="gff3">gtf</option>\n-\t\t\t</param>\n-\t\t\t<when value="bed">\n-\t\t\t\t<param name="inputFileName2" format="bed" type="data" label="Input File 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff">\n-\t\t\t\t<param name="inputFileName2" format="gff" type="data" label="Input File 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff2">\n-\t\t\t\t<param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="gff3">\n-\t\t\t\t<param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>\n-\t\t\t</when>\n-\t\t\t<when value="sam">\n-\t\t\t\t<param name="inputFileName2" format="sam" type="data" label="'..b'n value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="inputSequenceFile" format="fasta" type="data" value="None"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t\n-\n-\n-\t\t<conditional name="optionwidth">\n-\t\t\t<param name="width" type="select" label="width of the plots (in px)">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="widthVal" type="integer" value="1500"  />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\t\t\n-\t\t<conditional name="optionheight">\n-\t\t\t<param name="height" type="select" label="height of the plots (in px)">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="heightVal" type="integer" value="1000" />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optiontitle">\n-\t\t\t<param name="title" type="select" label="title of the plots ">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="titleVal" type="text" value=" " />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\t\t\n-\t\t<conditional name="optionXlab">\n-\t\t\t<param name="Xlab" type="select" label="label on the x-axis">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="XlabVal" type="text" value=" "/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionYlab">\n-\t\t\t<param name="Ylab" type="select" label="label on the y-axis">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="YlabVal" type="text" value=" " />\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionplusColor">\n-\t\t\t<param name="plusColor" type="select" label="color for the elements on the plus strand">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="plusColorVal" type="text" value="red"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionminusColor">\n-\t\t\t<param name="minusColor" type="select" label="color for the elements on the minus strand">\n-\t\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="minusColorVal" type="text" value="blue"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionsumColor">\n-\t\t\t<param name="sumColor" type="select" label="color for 2 strands coverage line">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="sumColorVal" type="text" value="black"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\n-\t\t<conditional name="optionlineColor">\n-\t\t\t<param name="lineColor" type="select" label="color for the lines">\n-\t\t\t\t<option value="Yes">Yes</option>\n-\t\t\t\t<option value="No" selected="true">No</option>\n-\t\t\t</param>\n-\t\t\t<when value="Yes">\n-\t\t\t\t<param name="lineColorVal" type="text" value="black"/>\n-\t\t\t</when>\n-\t\t\t<when value="No">\n-\t\t\t</when>\n-\t\t</conditional>\n-\t\t\n-\t\t<param name="merge" type="boolean" truevalue="-1" falsevalue="" checked="false" label="merge the 2 plots in 1"/>\n-\t</inputs>\n-\n-\t<outputs>\n-\t\t<data name="outputFile" format="tar" label="[plotCoverage] tar out file" help="You can not see the results directly from galaxy, but you can download this tar output file."/>\n-\t</outputs> \n-\t\n-    <help>\n-        This script gives a .tar out file, if you want to take look at the results, you have to download it.\n-    </help>\t\t\n-</tool>\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/plotGenomeCoverage.xml
--- a/SMART/galaxy/plotGenomeCoverage.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,62 +0,0 @@
-<tool id="plotGenomeCoverage" name="plot genome coverage">
-  <description>Get the coverage of a genome. </description>
-  <command interpreter="python"> ../Java/Python/plotGenomeCoverage.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
-
-  -r $reference
-   -o $outputFile  
-  </command>
-  
-  
-  <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="reference" type="data" label="reference Fasta File" format="fasta"/> 
-  </inputs>
-
-  <outputs>
-    <data format="png" name="outputFile" label="[plotGenomeCoverage] Output File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/plotRepartition.xml
--- a/SMART/galaxy/plotRepartition.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,59 +0,0 @@
-<tool id="plotRepartition" name="plot repartition">
- <description>Plot the repartition of different data on a whole genome. (This tool uses only 1 input file, the different values are stored in the tags. )</description>
- <command interpreter="python">
- ../Java/Python/WrappPlotRepartition.py -i $inputFileName
- -n $names
- $normalize
- #if $optionColor.Color == 'Yes':
- -c $optionColor.colValue
- #end if
- -f $format
-
- #if $optionLog.log == 'Yes':
- -l $optionLog.logVal
- #end if
-
- -o $outputFilePNG
- </command>
-
- <inputs>
- <param name="inputFileName" type="data" label="Input Gff3 File" format="gff3"/>
- <param name="names" type="text" value="None" label="name for the tags (separated by commas and no space) [compulsory option]"/>
- <param name="normalize" type="boolean" truevalue="-r" falsevalue="" checked="false" label="normalize data (when panels are different)"/>
- <param name="format" type="text" value="png" label="format of the output file[default: png]"/>
-
- <conditional name="optionColor">
- <param name="Color" type="select" label="scolor of the lines (separated by commas and no space) ">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="colValue" type="text" value="None"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionLog">
- <param name="log" type="select" label="use log on x- or y-axis (write 'x', 'y' or 'xy')">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="logVal" type="text" value=" "/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
- <outputs>
- <data name="outputFilePNG" format="tar" label="[plotRepartition]out file"/>
- </outputs> 
-
- <help>
-        This script gives a .tar out file, if you want to take look at the results, you have to download it.
-    </help>
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/plotTranscriptList.xml
--- a/SMART/galaxy/plotTranscriptList.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,141 +0,0 @@
-<tool id="plotTranscriptList" name="plot transcript list">
- <description>Plot some information from a list of transcripts. </description>
- <command interpreter="python">
- ../Java/Python/plotTranscriptList.py  -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
-
- -x $xVal
- -y $yVal
- #if $optionz.z == 'Yes':
- -z $optionz.zVal
- #end if
-
- -X $XVal
- -Y $YVal
- -Z $ZVal
-
- #if $optionxLab.xLab == 'Yes':
- -n $optionxLab.labVal
- #end if
- #if $optionyLab.yLab == 'Yes':
- -m $optionyLab.labVal
- #end if
-
- #if $optionyLog.log == 'Yes':
- -l $optionyLog.logVal
- #end if
-
- -s $shape
- -b $bucket
-
- -o $outputFilePNG
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="xVal" type="text" value="None" label="tag for the x value [compulsory option]"/>
- <param name="yVal" type="text" value="None" label="tag for the y value [compulsory option]"/>
-
- <conditional name="optionz">
- <param name="z" type="select" label="tag for the z value ">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="zVal" type="text" value="None"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <param name="XVal" type="float" value="0.0" label="value for x when tag is not present "/>
-
- <param name="YVal" type="float" value="0.0" label="value for y when tag is not present"/>
-
- <param name="ZVal" type="float" value="0.0" label="value for z when tag is not present"/>
-
- <conditional name="optionxLab">
- <param name="xLab" type="select" label="label on the x-axis ">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="labVal" type="text" value=" "/>
- </when>
- <when value="No">
- </when>
- </conditional>
- <conditional name="optionyLab">
- <param name="yLab" type="select" label="label on the y-axis ">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="labVal" type="text" value=" "/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="optionyLog">
- <param name="log" type="select" label="use log on x- or y-axis (write 'x', 'y' or 'xy')">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="logVal" type="text" value=" "/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <param name="shape" type="text" value="barplot" label="shape of the plot [format: choice (barplot, line, points, heatPoints)]"/>
- <param name="bucket" type="float" value="1.0" label="bucket size (for the line plot)"/>
-
- </inputs>
-
- <outputs>
- <data name="outputFilePNG" format="png" label="[plotTranscriptList]out file"/>
- </outputs> 
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/qualToFastq.xml
--- a/SMART/galaxy/qualToFastq.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="qualToFastq" name="qual -> Fastq">
-  <description>Convert a file in FASTA/Qual format to FastQ format.</description>
-  <command interpreter="python"> ../Java/Python/qualToFastq.py -f $inputFastaFile -q $inputQualFile -o $outputFile </command>
-  <inputs>
-    <param name="inputFastaFile" type="data" label="Input fasta File" format="fasta"/>
-    <param name="inputQualFile" type="data" label="Input qual File" format="txt"/>
-  </inputs>
-
-  <outputs>
-    <data format="fastq" name="outputFile" label="[qual -> Fastq] Output File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/removeExonLines.sh
--- a/SMART/galaxy/removeExonLines.sh Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-#!/bin/bash
-sed '/exon/d' $1
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/removeExonLines.xml
--- a/SMART/galaxy/removeExonLines.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,15 +0,0 @@
-<tool id="removeExonLines" name="remove exon lines">
-  <description>Removes the lines containing Exon.</description>
-  <command interpreter="sh"> ../Java/Python/removeExonLines.sh $inputFile > $outputFile  </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff3"/>
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[removeExonLine] Output File"/>
-  </outputs>
-
-  <help>
- command example: sh removeExonLines.sh input.gff3
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/restrictFromSize.xml
--- a/SMART/galaxy/restrictFromSize.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,93 +0,0 @@
-<tool id="restrictFromSize" name="restrict from size">
- <description>Select the elements of a list of sequences or transcripts with a given size.</description>
- <command interpreter="python">
- ../Java/Python/restrictFromSize.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
-
- #if $OptionMax.maximum == "Yes":
- -M $OptionMax.max
- #end if
- #if $OptionMin.minimum == "Yes":
- -m $OptionMin.min
- #end if
-
- -o $outputFileGff 
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <conditional name="OptionMax">
- <param name="maximum" type="select" label="maximum number of np">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="max" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionMin">
- <param name="minimum" type="select" label="minimum number of np">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="min" type="integer" value="1" help="Be Careful! The value must be upper than 0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3" label="[restrictFromSize] Output File"/>
- </outputs> 
-
- <help>
- command example: restrictFromSize.py -i cis_e10_cluster20InSeed2515_nbEUp10.gff3 -f gff -o cis_e10_cluster20InSeed2515_nbEUp10_lgUp50 -m 50
- </help>
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/restrictSequenceList.xml
--- a/SMART/galaxy/restrictSequenceList.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,16 +0,0 @@
-<tool id="restrictSequenceList" name="restrict sequence list">
-  <description>Keep the elements of a list of sequences whose name is mentionned in a given file.</description>
-  <command interpreter="python"> ../Java/Python/restrictSequenceList.py -i $inputFile -f fasta -n $name -o $outputFile </command>
-  
-  <inputs>
- <param name="inputFile" type="data" label="Input fasta File" format="fasta"/>
- <param name="name" type="data" label="The txt file contains the names of the transcripts." format="txt"/> 
-  </inputs>
-
-  <outputs>
-    <data format="fasta" name="outputFile" label="[restrictSequenceList] Output File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/restrictTranscriptList.xml
--- a/SMART/galaxy/restrictTranscriptList.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,120 +0,0 @@
-<tool id="restrictTranscriptList" name="restrict transcript list">
-  <description>Keep the coordinates which are located in a given position.</description>
-  <command interpreter="python"> ../Java/Python/restrictTranscriptList.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #elif $formatType.FormatInputFileName == 'sam':
- -f sam
- #elif $formatType.FormatInputFileName == 'gtf':
- -f gtf
- #end if
-
-   #if $OptionChrom.Chrom == "Yes":
- -c $OptionChrom.ChromName
-   #end if
-  
-   #if $OptionStart.start == "Yes":
- -s $OptionStart.startValue
-   #end if
-  
-   #if $OptionEnd.end == "Yes":
- -e $OptionEnd.endValue
-   #end if
-  
-   -o $outputFile  
-  
-  </command>
-  
-  
-  <inputs>
-    <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- <option value="sam">sam</option>
- <option value="gtf">gtf</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- <when value="sam">
- <param name="inputFileName" format="sam" type="data" label="Input File"/>
- </when>
- <when value="gtf">
- <param name="inputFileName" format="gtf" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <conditional name="OptionChrom">
- <param name="Chrom" type="select" label="chromosome name">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="ChromName" type="text" value="None"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-  
- <conditional name="OptionStart">
- <param name="start" type="select" label="restrict to the start of the transcript">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="startValue" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionEnd">
- <param name="end" type="select" label="restrict to the end of the transcript">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="endValue" type="integer" value="0"/>
- </when>
- <when value="No">
- </when>
- </conditional>    
-  </inputs>
-
-  <outputs>
-    <data format="gff3" name="outputFile" label="[restrictTranscriptList] Output File"/>
-  </outputs>
-
-  <help>
-  </help>
-<tests>
-    <test>
-      <param name="FormatInputFileName" value="gtf" />
-      <param name="inputFileName" value="genes.gtf" />
- <param name="Chrom" value="Yes"/>
- <param name="ChromName" value="I"/>
- <param name="start" value="No" />
-<param name="end" value="No" />
-      <output name="outputFile" file="exp_restrictTranscriptList.gff3" />
-    </test>
-  </tests>
-
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/test/CollapseReads.xml
--- a/SMART/galaxy/test/CollapseReads.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,49 +0,0 @@
-<tool id="collapseReads" name="collapseReads">
- <description>Merges two reads if they have exactly the same genomic coordinates.</description>
- <command interpreter="python">
- ../Java/Python/CollapseReads.py -i $formatType.inputFileName
- #if $formatType.FormatInputFileName == 'bed':
- -f bed
- #elif $formatType.FormatInputFileName == 'gff':
- -f gff
- #elif $formatType.FormatInputFileName == 'gff2':
- -f gff2
- #elif $formatType.FormatInputFileName == 'gff3':
- -f gff3
- #end if
-
- -$strand
- -o $outputFileGff 
- --galaxy
- </command>
-
- <inputs>
- <conditional name="formatType">
- <param name="FormatInputFileName" type="select" label="Input File Format">
- <option value="bed">bed</option>
- <option value="gff">gff</option>
- <option value="gff2">gff2</option>
- <option value="gff3">gff3</option>
- </param>
- <when value="bed">
- <param name="inputFileName" format="bed" type="data" label="Input File"/>
- </when>
- <when value="gff">
- <param name="inputFileName" format="gff" type="data" label="Input File"/>
- </when>
- <when value="gff2">
- <param name="inputFileName" format="gff2" type="data" label="Input File"/>
- </when>
- <when value="gff3">
- <param name="inputFileName" format="gff3" type="data" label="Input File"/>
- </when>
- </conditional>
-
- <param name="strand" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Strand option merges 2 different strands[default:False]."/>
- </inputs>
-
- <outputs>
- <data name="outputFileGff" format="gff3"/>
- </outputs> 
-
-</tool>
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py
--- a/SMART/galaxy/test/Test_F_WrappGetLetterDistribution.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,91 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-from SMART.galaxy.WrappGetLetterDistribution import WrappGetLetterDistribution
-
-SMART_PATH = "%s/SMART" % os.environ["REPET_PATH"]
-SMART_DATA = SMART_PATH + "/data"
-
-class Test_F_WrappGetLetterDistribution(unittest.TestCase):
-
-
-    def setUp(self):
-        self._dirTest = "%s/galaxy/test" % SMART_PATH
-        self._iwrappFastq = WrappGetLetterDistribution()
-        self._iwrappFasta = WrappGetLetterDistribution()
-        self._expOutputCSV = "expOutputTomate.csv" 
-       
-    def test_wrappFasta(self):
-        self._iwrappFasta._inputFileName = "%s/SR1.fasta" % SMART_DATA
-        self._iwrappFasta._outputFileNamePrefix = "%s/galaxy/test/TomateFasta_res" % SMART_PATH
-        self._iwrappFasta._outputFileNamePNG = "%s/galaxy/test/TomateFasta_res.png" % SMART_PATH
-        self._iwrappFasta._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFasta_resPerNt.png" % SMART_PATH
-        self._iwrappFasta._outputFileNameCSV = "%s/galaxy/test/TomateFasta_res.csv" % SMART_PATH
-        self._iwrappFasta._inputFileFormat = "fasta"
-        self._iwrappFasta._csv = True
-        if not(FileUtils.isEmpty(self._iwrappFasta._inputFileName)):
-            self._iwrappFasta.wrapp()
-            self.assertTrue(os.path.exists(self._iwrappFasta._outputFileNamePNG))
-            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNamePerNtPNG))    
-            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNameCSV))
-            self.assertTrue(FileUtils.are2FilesIdentical(self._iwrappFasta._outputFileNameCSV,self._expOutputCSV))
-        else:
-            print "Problem : the input fasta file %s is empty!" % self._inputFileFasta
-         
-            
-#    def test_wrappFasta_withoutCSV_Opt(self):
-#        self._iwrappFasta._inputFileName = "%s/SR1.fasta" % SMART_DATA
-#        self._iwrappFasta._outputFileNamePrefix = "%s/galaxy/test/TomateFasta_res" % SMART_PATH
-#        self._iwrappFasta._outputFileNamePNG = "%s/galaxy/test/TomateFasta_res.png" % SMART_PATH
-#        self._iwrappFasta._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFasta_resPerNt.png" % SMART_PATH
-#        self._iwrappFasta._outputFileNameCSV = "%s/galaxy/test/TomateFasta_res.csv" % SMART_PATH
-#        self._iwrappFasta._inputFileFormat = "fasta"
-#        self._iwrappFasta._csv = False
-#        if not(FileUtils.isEmpty(self._iwrappFasta._inputFileName)):
-#            self._iwrappFasta.wrapp()
-#            self.assertTrue(os.path.exists(self._iwrappFasta._outputFileNamePNG))
-#            self.assertTrue (os.path.exists(self._iwrappFasta._outputFileNamePerNtPNG)) 
-#        else:            
-#            print "Problem : the input fasta file %s is empty!" % self._inputFileFasta
-#        os.system("rm %s/galaxy/test/*_res*.png" %SMART_PATH)
-#        os.system("rm %s/galaxy/test/*_res.csv" %SMART_PATH) 
-#        
-#                
-#    def test_wrappFastq(self):
-#        self._iwrappFastq._inputFileName = "%s/SR1.fastq" % SMART_DATA
-#        self._iwrappFastq._outputFileNamePrefix = "%s/galaxy/test/TomateFastq_res" % SMART_PATH
-#        self._iwrappFastq._outputFileNamePNG = "%s/galaxy/test/TomateFastq_res.png" % SMART_PATH
-#        self._iwrappFastq._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFastq_resPerNt.png" % SMART_PATH
-#        self._iwrappFastq._outputFileNameCSV = "%s/galaxy/test/TomateFastq_res.csv" % SMART_PATH
-#        self._iwrappFastq._inputFileFormat = "fastq"
-#        self._iwrappFastq._csv = True
-#        if not(FileUtils.isEmpty(self._iwrappFastq._inputFileName)):
-#            self._iwrappFastq.wrapp()
-#            self.assertTrue(os.path.exists(self._iwrappFastq._outputFileNamePNG))
-#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNamePerNtPNG))    
-#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNameCSV))
-#            self.assertTrue(FileUtils.are2FilesIdentical(self._iwrappFastq._outputFileNameCSV,self._expOutputCSV))
-#        else:
-#            print "Problem : the input fastq file %s is empty!" % self._inputFileFastq   
-#          
-#        
-#    def test_wrappFastq_withoutCSV_Opt(self):
-#        self._iwrappFastq._inputFileName = "%s/SR1.fastq" % SMART_DATA
-#        self._iwrappFastq._outputFileNamePrefix = "%s/galaxy/test/TomateFastq_res" % SMART_PATH
-#        self._iwrappFastq._outputFileNamePNG = "%s/galaxy/test/TomateFastq_res.png" % SMART_PATH
-#        self._iwrappFastq._outputFileNamePerNtPNG = "%s/galaxy/test/TomateFastq_resPerNt.png" % SMART_PATH
-#        self._iwrappFastq._outputFileNameCSV = "%s/galaxy/test/TomateFastq_res.csv" % SMART_PATH
-#        self._iwrappFastq._inputFileFormat = "fastq"
-#        self._iwrappFastq._csv = False
-#        if not(FileUtils.isEmpty(self._iwrappFastq._inputFileName)):
-#            self._iwrappFastq.wrapp()
-#            self.assertTrue(os.path.exists(self._iwrappFastq._outputFileNamePNG))
-#            self.assertTrue (os.path.exists(self._iwrappFastq._outputFileNamePerNtPNG)) 
-#        else:            
-#            print "Problem : the input fastq file %s is empty!" % self._inputFileFastq
-#        os.system("rm %s/galaxy/test/*_res*.png" %SMART_PATH)
-#        os.system("rm %s/galaxy/test/*_res.csv" %SMART_PATH)
-       
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/testArgum.xml
--- a/SMART/galaxy/testArgum.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,24 +0,0 @@
-<tool id="test_argument" name="test_argu" version="1.0.0">
-  <description>To test the arguments from shell.</description>
-  <command> 
-../testArgu.sh $test_out 
-#for $i in $replicate_groups
-#for $j in $i.replicates
-$j.bam_alignment:#slurp
-#end for
-#end for
-    >> $Log_File </command>
-  <inputs>
- <param format="gff3" name="anno_input_selected" type="data" label="Genome annotation in GFF3 file" help="A tab delimited format for storing sequence features and annotations"/>
-   <repeat name="replicate_groups" title="Replicate group" min="2">
-     <repeat name="replicates" title="Replicate">
-      <param format="fastq" name="bam_alignment" type="data" label="BAM alignment file" help="BAM alignment file. Can be generated from SAM files using the SAM Tools."/>
-     </repeat>
-   </repeat>
-  </inputs>
-
-  <outputs>
-    <data format="txt" name="test_out" label="DESeq result"/>
- <data format="txt" name="Log_File" label="DESeq result"/>
-  </outputs>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/testR.xml
--- a/SMART/galaxy/testR.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,19 +0,0 @@
-<tool id="testDiffExpAnal" name="Differential Expression Analysis">
-  <description>Differential expression analysis for sequence count data (DESeq)</description>
-  <command interpreter="sh"> ../DiffExpAnal/testR.sh $inputFile $columnsOfGeneName $columnsOfCondition1 $columnsOfCondition2 $outputFileCSV $outputFilePNG 2>$outputLog </command>
-  <inputs>
-    <param name="inputFile" type="data" label="Input File" format="tabular"/>
- <param name="columnsOfGeneName" type="text" value="0" label="Please indicate the column numbers of gene names with ',' separator. If There are not gene names, default value is 0."/>
- <param name="columnsOfCondition1" type="text" value="1,2" label="Please indicate the column numbers of condition1 with ',' separator."/>
- <param name="columnsOfCondition2" type="text" value="3,4" label="Please indicate the column numbers of condition2 with ',' separator."/>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="outputFileCSV" label="[DiffExpAnal] Output CSV File"/>
- <data format="png" name="outputFilePNG" label="[DiffExpAnal] Output PNG File"/>
-    <data format="tabular" name="outputLog" label="[DiffExpAnal] Log File"/>
-  </outputs>
-
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/trimAdaptor.xml
--- a/SMART/galaxy/trimAdaptor.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,47 +0,0 @@
-<tool id="trimAdaptor" name="trim adaptors">
-  <description>Remove the 3' adaptor of a list of reads.</description>
-  <command interpreter="python"> ../Java/Python/trimAdaptor.py -i $inputFile -f fastq
-   -a $adaptor
-   #if $OptionError.Error == "Yes":
- -e $OptionError.ErrorVal
- #end if
-   $noAdaptor $noAdaptorFile
-   -o $outputFile  
-  </command>
-  
-  
-  <inputs>
-    <param name="inputFile" type="data" label="Input fastq File" format="fastq"/>
- <param name="adaptor" type="text" value="None" label="adaptor [compulsory option]"/> 
- <conditional name="OptionError">
- <param name="Error" type="select" label="number of errors in percent">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="ErrorVal" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
- <param name="noAdaptor" type="boolean" truevalue="-n" falsevalue="" checked="false" label="log option" help="file name where to print sequences with no adaptor"/>
-  </inputs>
-
-  <outputs>
-    <data format="fastq" name="outputFile" label="[trimAdaptor] Output File"/>
- <data name="noAdaptorFile" format="fastq" label="[trimAdaptor] Log File">
- <filter>noAdaptor</filter>
- </data>
-  </outputs>
-  <tests>
-    <test>
-      <param name="inputFile" value="short_fastq.fastq" />
-      <param name="adaptor" value="AAAA" />
-      <param name ="Error" value="No"/>
-      <param name ="noAdaptor" value="False"/>
-      <output name="outputFile" file="exp_trimadaptator_short_fastq.fastq" />     
-    </test>
-  </tests>
-  <help>
-  </help>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada SMART/galaxy/trimSequences.xml
--- a/SMART/galaxy/trimSequences.xml Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,94 +0,0 @@
-<tool id="trimSequences" name="trim sequences">
-  <description>Remove the 5' and/or 3' adaptors of a list of reads.</description>
-  <command interpreter="python"> ../Java/Python/trimSequences.py -i $inputFile -f fastq
-   #if $OptionFPADP.FPADP == "Yes":
- -5 $OptionFPADP.fivePAdaptor
- #end if   
-  #if $OptionTPADP.TPADP == "Yes":
- -3 $OptionTPADP.threePAdaptor
- #end if
-   #if $OptionError.Error == "Yes":
- -e $OptionError.ErrorVal
- #end if
-
- $indels
-   $noAdaptor5p $noAdaptorFile5p
-   $noAdaptor3p $noAdaptorFile3p
-   -o $outputFile  
-  
-  </command>
-  
-  
-  <inputs>
-    <param name="inputFile" type="data" label="Input fastq File" format="fastq"/>
-
- <conditional name="OptionFPADP">
- <param name="FPADP" type="select" label="5'adaptor">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="fivePAdaptor" type="text" value="None" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionTPADP">
- <param name="TPADP" type="select" label="3'adaptor">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="threePAdaptor" type="text" value="None" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <conditional name="OptionError">
- <param name="Error" type="select" label="number of errors in percent">
- <option value="Yes">Yes</option>
- <option value="No" selected="true">No</option>
- </param>
- <when value="Yes">
- <param name="ErrorVal" type="integer" value="0" />
- </when>
- <when value="No">
- </when>
- </conditional>
-
- <param name="indels" type="boolean" truevalue="-d" falsevalue="" checked="false" label="indels option" help="also accept indels"/>
- <param name="noAdaptor5p" type="boolean" truevalue="-n" falsevalue="" checked="false" label="noAdaptor 5' option" help="file name where to print sequences with no 5' adaptor "/>
- <param name="noAdaptor3p" type="boolean" truevalue="-m" falsevalue="" checked="false" label="noAdaptor 3' option" help="file name where to print sequences with no 3' adaptor "/>
-
-
-
-  </inputs>
-
-  <outputs>
-    <data format="fastq" name="outputFile" label="[trimSequences] Output File"/>
- <data name="noAdaptorFile5p" format="fastq" label="[trimSequences] noAdaptor5p File">
- <filter>noAdaptor5p</filter>
- </data>
- <data name="noAdaptorFile3p" format="fastq" label="[trimSequences] noAdaptor3p File">
- <filter>noAdaptor3p</filter>
- </data>
-  </outputs>
-
-  <help>
-  </help>
-  <tests>
- <test>
-  <param name="inputFile" value="short_fastq.fastq" />
-  <param name="FPADP" value="Yes"/>
-       <param name="fivePAdaptor" value="AAAA" />
- <param name="TPADP" value="No"/>
-       <param name ="Error" value="No"/>
- <param name="indels" value="False"/>
-       <param name ="noAdaptor5p" value="False"/>
- <param name= "noAdaptor3p" value="False"/>
-       <output name="outputFile" file="exp_trimsequences_short_fastq.fastq" />
- </test>
-  </tests>
-</tool>
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/__init__.pyc
b
Binary file commons/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/LoggerFactory.py
--- a/commons/core/LoggerFactory.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,139 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-## @mainpage Documentation of the REPET API
-#
-# Welcome to the API documentation!
-# This API is a set of packages and classes for pipeline(s) development.
-#
-# @par The "logger" package
-# 
-# Logging is managed via LoggerFactory. This class creates instances of logging.logging python class. It's strongly encouraged to use this factory each time you need to log something.
-#
-# @par The "checker" package
-#
-# This package is a set of classes designed to facilitate development of different kind of checks: filesystem  checks, environment checks, configuration file checks ...
-#
-# Classes should subclass checker::IChecker or if a logger is needed: checker::AbstractChecker.
-#
-# Methods should raise checker::CheckerException.
-#
-# Use checker::ConfigChecker and checker::ConfigException for configuration files checks.
-#
-# checker::CheckerUtils is a set of small static methods shared by other classes of checker package.
-#
-# @par The "coord" package
-#
-# This package is a set of classes dedicated to coordinates manipulations.
-# 
-# A coord::Range instance records a region on a given sequence (start, end and sequence name).
-#
-# A coord::Map instance is a coord::Range instance and record a named region on a given sequence (start, end, sequence name and name).
-#
-# A coord::Set instance is a coord::Map instance and record a named region on a given sequence with an identifier (start, end, sequence name, name and id).
-#
-# A coord::Align instance handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity).
-#
-# A coord::Path instance is a coord::Align instance and handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity) with an identifier.
-#
-# A coord::Match instance is a coord::Path instance and handle a chain of match(es) between two sequences, query and subject, with an identifier and the length of the input sequences.
-#
-# coord::Align, coord::Map, coord::Path and coord::Set come with utils classes: coord::AlignUtils, coord::MapUtils, coord::PathUtils and coord::SetUtils.        
-#
-# @par The "seq" package
-#
-# This package a set of classes dedicated to sequences manipulations.
-#
-# A seq::Bioseq instance records a sequence with its header. seq::Bioseq comes with an utils class: seq::BioseqUtils.
-#
-# A seq::BioseqDB instance handle a collection of a Bioseq (header-sequence).
-#
-# A seq::AlignedBioseqDB instance is a multiple sequence alignment representation.
-#
-# A seq::FastaUtils is a set of static methods for fasta file manipulation.
-#
-# @par The "sql" package
-#
-# This package is dedicated to persistance of coord package objects.   
-# All classes come with dedicated interfaces. Use these interfaces for class manipulation.
-# Class names patterns are ITable*Adaptator and Table*Adaptator.
-#
-# sql::ITablePathAdaptator, sql::TablePathAdaptator /
-# sql::ITableSetAdaptator, sql::TableSetAdaptator /
-# sql::ITableSeqAdaptator, sql::TableSeqAdaptator /
-# sql::ITableMapAdaptator, sql::TableMapAdaptator /
-# sql::ITableMatchAdaptator, sql::TableMatchAdaptator.
-#   
-
-import logging
-import sys
-
-DEFAULT_LEVEL = 1
-DEFAULT_FORMAT = "%(asctime)s - %(module)s - %(levelname)s - %(message)s"
-DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
-
-## Use this class to create a instance of logging class.
-#
-class LoggerFactory(object):
-    
-    def createLogger(name, verbosity = DEFAULT_LEVEL, format = DEFAULT_FORMAT, out = sys.stdout):
-        log = logging.getLogger(name)
-        
-        hasStreamHandler = False
-        for handler in log.handlers: 
-            if handler.__class__ == logging.StreamHandler:
-                hasStreamHandler = True
-                break
-        if not hasStreamHandler:
-            formatter = logging.Formatter(format, DATE_FORMAT) 
-            handler = logging.StreamHandler(out)
-            handler.setFormatter(formatter)
-            log.addHandler(handler)
-        
-        LoggerFactory.setLevel(log, verbosity)
-        return log
-    
-    createLogger = staticmethod(createLogger)
-
-    def setLevel(log, verbosity):
-        log.disabled = False
-        if verbosity >= 4:
-            log.setLevel(logging.DEBUG)
-        elif verbosity == 3:
-            log.setLevel(logging.INFO)
-        elif verbosity == 2:
-            log.setLevel(logging.WARNING)
-        elif verbosity == 1:
-            log.setLevel(logging.ERROR)
-        elif verbosity == 0:
-            log.disabled = True
-            
-    setLevel = staticmethod(setLevel)
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/AbstractChecker.py
--- a/commons/core/checker/AbstractChecker.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,61 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from commons.core.checker.IChecker import IChecker
-from commons.core.LoggerFactory import LoggerFactory
-
-
-## Enable a Logger in your Checker.
-#
-#  Subclasses of  AbstractChecker have a already a logger enabled (referenced by self._log attribute). Subclasses also already implements IChecker.
-#  All you have to do is to call __init__() method in your own constructor.
-class AbstractChecker( IChecker ):
-    
-    ## Constructor 
-    #
-    # @param logFileName name of log file where logger outputs
-    #
-    def __init__(self, logFileName):
-        self._log = LoggerFactory.createLogger(logFileName)
-        
-        
-    ## Set (change) default logger
-    #
-    # @param logger a new logger
-    # 
-    def setLogger(self, logger):
-        self._log = logger
-        
-        
-    ## Return the logger instance
-    #
-    def getLogger(self):
-        return self._log
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/CheckerException.py
--- a/commons/core/checker/CheckerException.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,52 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-## Exception raised during check 
-#
-# This class wraps Exception class
-#
-class CheckerException( Exception ):
-    
-    ## Constructor
-    #
-    # @param msg  message embedded in Exception class   
-    def __init__(self,msg=""):
-        self.messages = []
-        self.msg = msg
-        Exception.__init__(self, msg)
-        
-        
-    def setMessages(self,lMessages):
-        self.messages = lMessages
-        
-        
-    def getMessages(self):
-        return self.messages
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/CheckerUtils.py
--- a/commons/core/checker/CheckerUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,316 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import os\n-import sys\n-import re\n-import glob\n-import ConfigParser\n-from ConfigParser import NoOptionError\n-from ConfigParser import NoSectionError\n-from commons.core.checker.CheckerException import CheckerException\n-\n-\n-## A set of static methods used to perform checks.\n-#\n-#\n-class CheckerUtils( object ):\n-    \n-    ## Check if blastName param is in ["blastn", "blastp", "blastx", "tblastn", "tblastx"]\n-    # \n-    # @param blastName name to check\n-    # @return True if name is in list False otherwise\n-    #\n-    def isBlastNameNotInBlastValues( blastName ):\n-        blastValuesSet = set( ["blastn", "blastp", "blastx", "tblastn", "tblastx"] )\n-        blastNameSet = set( [ blastName ] )\n-        return not blastNameSet.issubset( blastValuesSet )\n-    \n-    isBlastNameNotInBlastValues = staticmethod( isBlastNameNotInBlastValues )\n-    \n-    \n-    ## Check if param is NOT "TRUE" and NOT false "FALSE"\n-    #\n-    # @param param str to check\n-    # @return True if param is not eq to "TRUE" AND not eq to "FALSE", false otherwise \n-    #\n-    def isNotTRUEisNotFALSE( param ):\n-        return param != "TRUE" and param != "FALSE"\n-    \n-    isNotTRUEisNotFALSE = staticmethod( isNotTRUEisNotFALSE )\n-    \n-    \n-    ## Check if resource (file or dir) do NOT exists\n-    #  \n-    # @param resource file or dir to check\n-    # @return True if resource exists False otherwise\n-    #\n-    def isRessourceNotExits( resource ):\n-        return not os.path.exists( resource )\n-    \n-    isRessourceNotExits = staticmethod( isRessourceNotExits )\n-    \n-    \n-    ## Check a specific E-value format: de-dd \n-    #\n-    # @param param E-value to check\n-    # @return True if format is de-dd False otherwise\n-    #\n-    def isNotAeValueWithOneDigit2DecimalsAtLeast( param ):\n-        # \\d\\d stands for 2 digits and more ???\n-        return not re.match( "\\de\\-\\d\\d", param )\n-    \n-    isNotAeValueWithOneDigit2DecimalsAtLeast = staticmethod( isNotAeValueWithOneDigit2DecimalsAtLeast )\n-    \n-    \n-    ## Check a number format\n-    #\n-    # @param param value to check\n-    # @return True if param is a number (d+) False otherwise\n-    #\n-    def isNotANumber( param ):\n-        return not re.match( "\\d+", param )\n-    \n-    isNotANumber = staticmethod( isNotANumber )\n-    \n-\n-    ## Check if an executable is in the user\'s PATH\n-    #\n-    # @param exeName name of t'..b'me)\n-        \n-    checkSectionInConfigFile = staticmethod( checkSectionInConfigFile )\n-    \n-    \n-    ## Check if an option is in a specified section in the configuration file\n-    #\n-    # @param config filehandle of configuration file\n-    # @param sectionName string of section name\n-    # @param optionName string of option name to check\n-    # @exception NoOptionError: if option not found raise a NoOptionError\n-    #\n-    def checkOptionInSectionInConfigFile( config, sectionName, optionName ):\n-        config.get( sectionName, optionName )\n-    \n-    checkOptionInSectionInConfigFile = staticmethod( checkOptionInSectionInConfigFile )\n-    \n-    \n-    ## Check version number coherency between configFile and CHANGELOG\n-    #\n-    # @param config ConfigParser Instance of configuration file\n-    # @param changeLogFileHandle CHANGELOG file handle\n-    # @exception NoOptionError: if option not found raise a NoOptionError\n-    #\n-    def checkConfigVersion( changeLogFileHandle, config ):\n-        line = changeLogFileHandle.readline()\n-        while not line.startswith("REPET release "):\n-            line = changeLogFileHandle.readline()\n-        numVersionChangeLog = line.split()[2]\n-        \n-        numVersionConfig = config.get("repet_env", "repet_version")\n-        \n-        if not numVersionChangeLog == numVersionConfig:\n-            message = "*** Error: wrong config file version. Expected version num is " + numVersionChangeLog + " but actual in config file is " + numVersionConfig\n-            raise CheckerException(message)\n-    \n-    checkConfigVersion = staticmethod( checkConfigVersion )\n-    \n-    \n-    ## Get version number from CHANGELOG\n-    #\n-    # @param changeLogFile CHANGELOG file name\n-    #\n-    def getVersionFromChangelogFile(changeLogFileName):\n-        with open(changeLogFileName) as changeLogFileHandle:\n-            line = changeLogFileHandle.readline()\n-            while not line.startswith("REPET release "):\n-                line = changeLogFileHandle.readline()\n-            numVersionChangeLog = line.split()[2]\n-            return numVersionChangeLog\n-        \n-            \n-    getVersionFromChangelogFile = staticmethod( getVersionFromChangelogFile )\n-    \n-    \n-    ## Check if headers of an input file contain only alpha numeric characters and "_ : . -"\n-    #\n-    # @param fileHandler file handle\n-    # @exception CheckerException if bad header raise a CheckerException\n-    #\n-    def checkHeaders( fileHandler ):\n-        lHeaders = CheckerUtils._getHeaderFromFastaFile(fileHandler)\n-        p = re.compile(\'[^a-zA-Z0-9_:\\.\\-]\', re.IGNORECASE)\n-        lWrongHeaders = []\n-        for header in lHeaders:\n-            errList=p.findall(header)\n-            if len( errList ) > 0 :\n-                lWrongHeaders.append(header)\n-        if lWrongHeaders != []:\n-            exception = CheckerException()\n-            exception.setMessages(lWrongHeaders)\n-            raise exception\n-        \n-    checkHeaders = staticmethod( checkHeaders )  \n-    \n-    \n-    def _getHeaderFromFastaFile( inFile ):\n-        lHeaders = []\n-        while True:\n-            line = inFile.readline()\n-            if line == "":\n-                break\n-            if line[0] == ">":\n-                lHeaders.append( line[1:-1] )\n-        return lHeaders\n-    \n-    _getHeaderFromFastaFile = staticmethod( _getHeaderFromFastaFile ) \n-\n-\n-    ## Return True if an option is in a specified section in the configuration file, False otherwise\n-    #\n-    # @param config handler of configuration file\n-    # @param sectionName string of section name\n-    # @param optionName string of option name to check\n-    #\n-    def isOptionInSectionInConfig( configHandler, section, option ):\n-        try:\n-            CheckerUtils.checkOptionInSectionInConfigFile( configHandler, section, option ) \n-        except NoOptionError:\n-            return False\n-        return True\n-    \n-    isOptionInSectionInConfig = staticmethod( isOptionInSectionInConfig )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/ConfigChecker.py
--- a/commons/core/checker/ConfigChecker.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,226 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import re\n-import sys\n-from commons.core.utils.RepetConfigParser import RepetConfigParser\n-from commons.core.checker.ConfigValue import ConfigValue\n-from commons.core.checker.IChecker import IChecker\n-from commons.core.checker.RepetException import RepetException\n-from commons.core.utils.FileUtils import FileUtils\n-\n-\n-class Rule(object):\n-    \n-    def __init__(self, mandatory= False, isPattern=False, type="", set=(), help =""):\n-        self.mandatory = mandatory\n-        self.isPattern = isPattern\n-        self.type = type\n-        self.set = set\n-        self.help = help\n-        \n-class ConfigRules(object):\n-    \n-    def __init__(self, configName = "", configDescription = ""):\n-        self.configName = configName\n-        self.configDescription = configDescription\n-        self.dRules4Sections={}\n-        \n-    def _addRule(self, section, option="DEFAULT", mandatory=False, isPattern=False, type="", set=(), help =""):\n-        if not self.dRules4Sections.has_key(section):\n-            self.dRules4Sections[section] = {}\n-        self.dRules4Sections[section][option]=Rule(mandatory, isPattern, type.lower(), set) \n-        \n-    def addRuleSection(self, section, mandatory=False, isPattern=False, help = ""):\n-        self._addRule(section = section, option = "DEFAULT", mandatory = mandatory, isPattern =  isPattern, help = "")\n-   \n-    def addRuleOption(self, section, option, mandatory=False, isPattern=False, type="", set=(), help = ""):\n-        self._addRule(section = section, option = option, mandatory = mandatory, isPattern =  isPattern, type = type, set=set , help = "")\n-            \n-    def isSectionMandatory(self, section):\n-        if self.dRules4Sections.has_key(section):\n-            if self.dRules4Sections[section].has_key("DEFAULT"):\n-                return self.dRules4Sections[section]["DEFAULT"].mandatory\n-        return False\n-        \n-    def isOptionMandatory(self, section, option):\n-        if self.dRules4Sections.has_key(section):\n-            if self.dRules4Sections[section].has_key(option):\n-                return self.dRules4Sections[section][option].mandatory\n-        return False\n-    \n-    def getRule(self, section, option):\n-        if self.dRules4Sections.has_key(section):\n-            if self.dRules4Sections[section].has_key(option):\n-                return self.dRules4Sections[section][option]\n- '..b'on(sectionName, optionName):\n-                        missingOption += "\\n - [%s]: %s" % (sectionName, optionName)\n-        if missingOption != "":\n-            raise RepetException ("Error in configuration file %s, following options are missing: %s\\n" % (self._configFileName, missingOption))\n-    \n-    def getSectionNamesAccordingPatternRules (self, sectionWordOrPattern, isPattern):          \n-        lSectionsFoundAccordingPatternRules=[]\n-        if isPattern == False:\n-            if self._iRawConfig.has_section(sectionWordOrPattern):\n-                lSectionsFoundAccordingPatternRules.append(sectionWordOrPattern)\n-        else:\n-            for sectionName in self._iRawConfig.sections():\n-                if re.search(sectionWordOrPattern, sectionName, re.IGNORECASE):\n-                    lSectionsFoundAccordingPatternRules.append(sectionName)\n-        return lSectionsFoundAccordingPatternRules\n-    \n-    def getOptionsNamesAccordingPatternRules(self, sectionName, optionWordOrPattern, isPattern):\n-        lOptionsFoundAccordingPatternRules=[]\n-        if isPattern == False:\n-            if self._iRawConfig.has_option(sectionName, optionWordOrPattern):\n-                lOptionsFoundAccordingPatternRules.append(optionWordOrPattern)\n-        else :\n-            for optionName in self._iRawConfig.options(sectionName):\n-                if re.search(optionWordOrPattern, optionName, re.IGNORECASE)!= None:\n-                    lOptionsFoundAccordingPatternRules.append(optionName)\n-        return lOptionsFoundAccordingPatternRules\n-    \n-    def extendConfigRulesWithPatternRules(self):\n-        for sectionName in self._iConfigRules.dRules4Sections.keys():\n-            dRules4OptionsOfThisSection = self._iConfigRules.dRules4Sections[sectionName] \n-            lRawSections=[]\n-            if dRules4OptionsOfThisSection.has_key("DEFAULT"):\n-                mandatorySection = dRules4OptionsOfThisSection["DEFAULT"].mandatory\n-                isPatternSection = dRules4OptionsOfThisSection["DEFAULT"].isPattern\n-                lRawSections=self.getSectionNamesAccordingPatternRules(sectionName, isPatternSection)\n-                for rawSectionName in lRawSections:\n-                    self._iExtendedConfigRules.addRuleSection(rawSectionName, "DEFAULT", mandatorySection )\n-                if mandatorySection and (len(lRawSections)==0):\n-                    self._iExtendedConfigRules.addRuleSection(sectionName, "DEFAULT", mandatorySection )\n-            else:\n-                lRawSections.append(sectionName) \n-            for optionName in dRules4OptionsOfThisSection.keys():\n-                setOption = dRules4OptionsOfThisSection[optionName].set\n-                isPatternOption = dRules4OptionsOfThisSection[optionName].isPattern\n-                mandatoryOption = dRules4OptionsOfThisSection[optionName].mandatory\n-                typeOption = dRules4OptionsOfThisSection[optionName].type\n-                if optionName != "DEFAULT":\n-                    for rawSectionName in lRawSections:\n-                        lRawOptions=self.getOptionsNamesAccordingPatternRules(rawSectionName, optionName, isPatternOption)\n-                        for rawOptionName in lRawOptions:\n-                            self._iExtendedConfigRules.addRuleOption(rawSectionName, rawOptionName, mandatoryOption, False, typeOption, setOption)\n-                        if mandatoryOption and (len(lRawOptions)==0):\n-                            self._iExtendedConfigRules.addRuleOption(rawSectionName, optionName, mandatoryOption, False, typeOption, setOption)\n-                                                          \n-    def getConfig(self):\n-        self.checkIfExistsConfigFile()\n-        iConfig = self.readConfigFile()\n-        self.setRawConfig(iConfig)\n-        self.extendConfigRulesWithPatternRules()\n-        self.checkMandatorySections()\n-        self.checkMandatoryOptions()\n-        self.setConfig(iConfig)\n-        return self._iRawConfig\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/ConfigException.py
--- a/commons/core/checker/ConfigException.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,53 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-from commons.core.checker.RepetException import RepetException
-
-##  A exception raised by check() method of class ConfigChecker
-#
-# This class allow storage of multiple messages (see messages attribute).
-# Example: use one instance of ConfigException class for one section in configuration file.
-# All messages relatives to this section are stored in messages attribute.
-class ConfigException( RepetException ):
-    
-    ## Constructor
-    #
-    # @param msg message embedded in Exception class   
-    #
-    def __init__(self, msg, messages = []):
-        RepetException.__init__(self, msg)
-        self.messages = messages
-        
-    def getMessages(self):
-        return self.messages
-        
-    def setMessages(self, messages):
-        self.messages = messages
-        
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/ConfigValue.py
--- a/commons/core/checker/ConfigValue.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,70 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-class ConfigValue(object):
-    
-    def __init__(self):
-        self.dOptionsValues4Sections={}
-        
-    def has_section(self,sectionName):
-        return self.dOptionsValues4Sections.has_key(sectionName)
-    
-    def has_option(self, sectionName, optionName):
-        isOptionExist = False
-        if self.has_section(sectionName):
-            isOptionExist = self.dOptionsValues4Sections[sectionName].has_key(optionName)
-        return isOptionExist
-        
-    def sections(self):    
-        lSectionsKeys = self.dOptionsValues4Sections.keys()
-        return lSectionsKeys
-    
-    def options(self, sectionName):
-        lOptionsKeys = [] 
-        if self.has_section(sectionName):
-            lOptionsKeys = self.dOptionsValues4Sections[sectionName].keys()
-        return lOptionsKeys
-    
-    def get(self, sectionName, optionName):   
-        if self.has_option(sectionName, optionName):
-            return self.dOptionsValues4Sections[sectionName][optionName]
-        return None
-    
-    def set(self, sectionName, optionName, optionValue):   
-        if not (self.has_section(sectionName)):
-            self.dOptionsValues4Sections[sectionName] = {}
-        self.dOptionsValues4Sections[sectionName][optionName] = optionValue
-        
-    def setdOptionsValues4Sections(self, dOptionsValues4Sections):
-        self.dOptionsValues4Sections = dOptionsValues4Sections
-        
-    def __eq__(self, o):
-        return self.dOptionsValues4Sections == o.dOptionsValues4Sections
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/IChecker.py
--- a/commons/core/checker/IChecker.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,45 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-## Interface for a checker
-#
-# This class emulates an interface for a checker.
-#
-# All checkers are subclasses of IChecker. 
-#
-class IChecker( object ):
-    
-    ## perform check, raise a CheckerException if error occurred
-    #
-    # @param arg a collecting parameter: put here all you need to perform check
-    # 
-    def check(self, arg=""):
-        pass
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/OldConfigChecker.py
--- a/commons/core/checker/OldConfigChecker.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,101 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-import ConfigParser
-from ConfigParser import NoOptionError
-from commons.core.checker.IChecker import IChecker
-from commons.core.checker.ConfigException import ConfigException
-
-
-## A checker for a configuration file
-#
-#
-# A configuration file is formatted as follow:
-#
-# [section1]
-#
-# option_name1: option_value1
-# 
-# option_name2: option_value2
-#
-# option_name3: option_value3
-#
-# [section2]
-# 
-# ...
-#
-# 
-# This class performs 3 checkes on a configuration file: 
-#
-# (i) check if file exists
-#
-# (ii) check if section exists
-#
-# (iii) check if option exists
-#
-class ConfigChecker( IChecker ):
-    
-    ## Constructor A checker for configuration file.
-    #
-    # @param  sectionName name of section to check in configuration file
-    # @param  optionsDict dictionary with option(s) to check as keys and empty strings ("") as values
-    def __init__ (self, sectionName, optionsDict):
-        self._sectionName = sectionName
-        self._optionsDict = optionsDict
-        
-        
-    ## Perform 3 checks : file exists, sections exists, option exists
-    # 
-    # @param configFile configuration file to check
-    # @exception ConfigException with a list of messages
-    def check (self, configFile):
-        config = ConfigParser.ConfigParser()
-        msg = []
-        try:
-            config.readfp( open(configFile) )
-        except IOError, e:
-            msg.append("CONFIG FILE not found - " + e.message)
-            raise ConfigException("", msg) 
-
-        if not (config.has_section(self._sectionName)):
-            msg.append("[" + self._sectionName + "]" + " section not found - ")
-            raise ConfigException("", msg)
-         
-        isExceptionOccured = False        
-        for key in self._optionsDict.keys():
-            try:
-                self._optionsDict[key] = config.get(self._sectionName, key) 
-            except NoOptionError, e:
-                msg.append("[" + self._sectionName + "]" + " - " + e.message)
-                isExceptionOccured = True
-        
-        if (isExceptionOccured):
-            raise ConfigException("", msg)
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/RepetException.py
--- a/commons/core/checker/RepetException.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,51 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-class RepetException(Exception):
-    
-    def __init__(self, msg):
-        Exception.__init__(self)
-        self._message = msg
-    
-    def __str__(self):
-        return self._message
-               
-    def getMessage(self):
-        return self._message
-    
-    def setMessage(self, msg):
-        self._message = msg
-
-
-class RepetDataException(RepetException):
-    
-    def __init__(self, msg):
-        RepetException.__init__(self, msg)
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/RepetException.pyc
b
Binary file commons/core/checker/RepetException.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/__init__.pyc
b
Binary file commons/core/checker/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/test/TestSuite_Checker.py
--- a/commons/core/checker/test/TestSuite_Checker.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-import sys
-import unittest
-import Test_CheckerUtils
-import Test_ConfigChecker
-
-
-def main():
-    
-    TestSuite_Checker = unittest.TestSuite()
-    
-    TestSuite_Checker.addTest( unittest.makeSuite( Test_CheckerUtils.Test_CheckerUtils, "test" ) )     
-    TestSuite_Checker.addTest( unittest.makeSuite( Test_ConfigChecker.Test_ConfigChecker, "test" ) )    
-    
-    runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
-    runner.run( TestSuite_Checker )
-    
-if __name__ == "__main__":                 
-    main()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/test/Test_CheckerUtils.py
--- a/commons/core/checker/test/Test_CheckerUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,535 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-import ConfigParser\n-from commons.core.checker.CheckerUtils import CheckerUtils\n-from commons.core.checker.CheckerException import CheckerException\n-from ConfigParser import NoOptionError\n-from ConfigParser import NoSectionError\n-\n-class Test_CheckerUtils( unittest.TestCase ):\n-    \n-    def setUp(self):\n-        self.queueFileName = "queueName.txt"\n-        self.configFileName = "dummyConfig.cfg"\n-    \n-    def tearDown(self):\n-        if os.path.exists(self.queueFileName):\n-            os.remove(self.queueFileName)\n-        if os.path.exists(self.configFileName):\n-            os.remove(self.configFileName)\n-    \n-    def test_isBlastNameInBlastValues( self ):\n-        correctValueList = [ "blastn", "blastp", "blastx", "tblastn", "tblastx" ]\n-        for value in correctValueList:\n-            self.assertFalse( CheckerUtils.isBlastNameNotInBlastValues( value ) )\n-            \n-        incorrectValueList = [ "badbalst", "wublast" ]\n-        for value in incorrectValueList:\n-            self.assertTrue( CheckerUtils.isBlastNameNotInBlastValues( value ) )\n-            \n-    def test_isNotTRUEisNotFALSE( self ):\n-        correctValueList = [ "TRUE", "FALSE" ]\n-        for value in correctValueList:\n-            self.assertFalse( CheckerUtils.isNotTRUEisNotFALSE( value ) )\n-            \n-        incorrectValueList = [ "True", "False" ]\n-        for value in incorrectValueList:\n-            self.assertTrue( CheckerUtils.isNotTRUEisNotFALSE( value ) )\n-            \n-    def test_isRessourceNotExists( self ):\n-        fileName = "dummyFile.txt"\n-        self.assertTrue( CheckerUtils.isRessourceNotExits( fileName ) )\n-        os.system( "touch %s" % ( fileName ) )\n-        self.assertFalse( CheckerUtils.isRessourceNotExits( fileName ) )\n-        os.remove( fileName )\n-        \n-    def test_isNotAeValueWithOneDigit2DecimalsAtLeast( self ):\n-        correctEValueList = [ "5e-32", "7e-45", "1e-2122", "9e-32" ]\n-        for value in correctEValueList:\n-            self.assertFalse( CheckerUtils.isNotAeValueWithOneDigit2DecimalsAtLeast( value ) )\n-            \n-        incorrecEValueStr = [ "10e-32", "2e-3", "2e-2", "1", "cxhhe" ]\n-        for value in incorrecEValueStr:\n-            self.assertTrue( CheckerUtils.isNotAeValueWithOneDigit2DecimalsAtLeast( value ) )\n-            \n-    def test_isNotADigit( self ):\n'..b'CACCTTCAAA\\n")\n-        fastaFileHandler.write(">DmelC:hr4_Blas-ter_Piler_1.0_Map_9\\n")\n-        fastaFileHandler.write("AGTTTAAAAACCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGC\\n")\n-        fastaFileHandler.write("ACTATGCAGCCACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTG\\n")\n-        fastaFileHandler.write("AGAGCGTAAGAAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGA\\n")\n-        fastaFileHandler.write("GAACGCGTATAAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCT\\n")\n-        fastaFileHandler.write("GATCGAAGAAACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAA\\n")\n-        fastaFileHandler.close()\n-    \n-    def _writeFastaFile_with_pipe(self, fastaFileName):    \n-        fastaFileHandler = open(fastaFileName, "w")\n-        fastaFileHandler.write(">DmelChr4_Blaster_Piler_0.0_Map_3\\n")\n-        fastaFileHandler.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\\n")\n-        fastaFileHandler.write("TTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTTTTGTTAGAGAGCGAGAGA\\n")\n-        fastaFileHandler.write("GCTGAGAGCGCTACAGCGAACAGCTCTTTTCTACACATAAAGTGATAGCAGACAACTGTA\\n")\n-        fastaFileHandler.write("TGTGTGCACACGTGTGCTCATGCATTGTAAATTTGACAAAATATGCCCTTCACCTTCAAA\\n")\n-        fastaFileHandler.write(">DmelC|hr4_Blas-ter_Piler_1.0_Map_9\\n")\n-        fastaFileHandler.write("AGTTTAAAAACCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGC\\n")\n-        fastaFileHandler.write("ACTATGCAGCCACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTG\\n")\n-        fastaFileHandler.write("AGAGCGTAAGAAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGA\\n")\n-        fastaFileHandler.write("GAACGCGTATAAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCT\\n")\n-        fastaFileHandler.write("GATCGAAGAAACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAA\\n")\n-        fastaFileHandler.close()\n-    \n-    def _writeFastaFile_with_equal(self, fastaFileName):    \n-        fastaFileHandler = open(fastaFileName, "w")\n-        fastaFileHandler.write(">DmelChr4_Blaster_Piler_0.0_Map_3\\n")\n-        fastaFileHandler.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\\n")\n-        fastaFileHandler.write("TTCGCCGTGGCTCTAGAGGTGGCTCCAGGCTCTCTCGAATTTTTGTTAGAGAGCGAGAGA\\n")\n-        fastaFileHandler.write("GCTGAGAGCGCTACAGCGAACAGCTCTTTTCTACACATAAAGTGATAGCAGACAACTGTA\\n")\n-        fastaFileHandler.write("TGTGTGCACACGTGTGCTCATGCATTGTAAATTTGACAAAATATGCCCTTCACCTTCAAA\\n")\n-        fastaFileHandler.write(">DmelC:hr4_Blas=ter_Piler_1.0_Map_9\\n")\n-        fastaFileHandler.write("AGTTTAAAAACCAAAGACACTAGAATAACAAGATGCGTAACGGCCATACATTGGTTTGGC\\n")\n-        fastaFileHandler.write("ACTATGCAGCCACTTTTTTGGTGACGGCCAAAATTACTCTCTTTCCGCTCACTCCCGCTG\\n")\n-        fastaFileHandler.write("AGAGCGTAAGAAATCTAAAAATATAATTTGCTTGCTTGTGTGAGTAAAAACAAGAGACGA\\n")\n-        fastaFileHandler.write("GAACGCGTATAAGTGTGCGTGTTGTGCTAGAAGACGATTTTCGGGACCGAAATCAATTCT\\n")\n-        fastaFileHandler.write("GATCGAAGAAACGAATTTACATGGTACATATTAGGGTAGTTTTTGCCAATTTCCTAGCAA\\n")\n-        fastaFileHandler.close()\n-\n-    def _writeChangeLogFile(self, changeLogFileName ):\n-        changeLogFileHandler = open(changeLogFileName, "w")\n-        changeLogFileHandler.write("ChangeLog of REPET\\n")\n-        changeLogFileHandler.write("\\n")\n-        changeLogFileHandler.write("\\n")\n-        changeLogFileHandler.write("\\n")\n-        changeLogFileHandler.write("REPET release 1.3.6\\n")\n-        changeLogFileHandler.write("(release date XX/XX/2010)\\n")\n-        changeLogFileHandler.write("\\n")\n-        changeLogFileHandler.close()\n-\n-    def _writeConfigFile(self, lineVersion):\n-        configFileHandler = open(self.configFileName, "w")\n-        configFileHandler.write("[repet_env]\\n")\n-        configFileHandler.write(lineVersion)\n-        configFileHandler.write("repet_host: <your_MySQL_host>\\n")\n-        configFileHandler.close()\n-        \n-        \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_CheckerUtils ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/test/Test_ConfigChecker.py
--- a/commons/core/checker/test/Test_ConfigChecker.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,569 +0,0 @@\n-from commons.core.checker.ConfigChecker import ConfigChecker \n-from commons.core.checker.ConfigChecker import ConfigRules\n-from commons.core.checker.RepetException import RepetException\n-import os\n-import unittest\n-\n-class Test_ConfigChecker(unittest.TestCase):\n-    \n-    def setUp(self):\n-        self._configFileName = "testConfigChecker.cfg"\n-        self._iMock = MockConfig()\n-     \n-    def test_checkIfExistsConfigFile_file_exist(self):\n-        f=open(self._configFileName, "w")\n-        f.close()\n-        \n-        doesFileExists = True\n-        iConfigRules = ConfigRules()\n-        try:\n-            iConfigChecker = ConfigChecker(self._configFileName, iConfigRules)\n-            iConfigChecker.checkIfExistsConfigFile()\n-        except RepetException:\n-            doesFileExists = False\n-        os.remove(self._configFileName)        \n-        self.assertTrue(doesFileExists)\n-        \n-    def test_checkIfExistsConfigFile_file_not_exist(self):\n-        iConfigRules = ConfigRules()\n-        expMsg ="CONFIG FILE not found - \'%s\'" %self._configFileName\n-        doesFileExists = True\n-        try:\n-            iConfigChecker = ConfigChecker(self._configFileName, iConfigRules)   \n-            iConfigChecker.checkIfExistsConfigFile()     \n-        except RepetException, re:\n-            doesFileExists = False\n-        self.assertFalse(doesFileExists)\n-        self.assertEqual(expMsg, re.getMessage())\n-        \n-    def test_readConfigFile(self):\n-        self._iMock.write_config(self._configFileName)\n-        iConfigRules = ConfigRules()\n-        expDictRawConfigValues = {"dir_name" : {"work_dir":"toto"},\n-                                  "organism" : {"abbreviation":"T.aestivum",\n-                                                "genus":"triticum",\n-                                                "species":"aestivum",\n-                                                "common_name":"wheat",\n-                                                "comment":""},\n-                                  \'analysis1\': {\'description\': \'\',\n-                                                \'gff_name\': \'BLASTX.gff2\',\n-                                                \'name\': \'BLASTXWheat2\',\n-                                                \'program\': \'BLASTX2\',\n-                                                \'programversion\': \'3.32\',\n-                                                \'sourcename\': \'dummyDesc_BLASTX2\'}\n-                                 }\n-        isNoExceptionRaised = True\n-        try: \n-            iConfigChecker = ConfigChecker(self._configFileName, iConfigRules)\n-            iConfig = iConfigChecker.readConfigFile()\n-            iConfigChecker.setRawConfig(iConfig)\n-            obsDictRawConfigValues = iConfigChecker._iRawConfig.dOptionsValues4Sections\n-        except RepetException:\n-            isNoExceptionRaised = False\n-        os.remove(self._configFileName)\n-        self.assertTrue(isNoExceptionRaised)\n-        self.assertEquals(obsDictRawConfigValues, expDictRawConfigValues)\n-        \n-    def test_readConfigFile_section_define_twice(self):\n-        self._iMock.write_case_section_define_twice(self._configFileName)\n-        iConfigRules = ConfigRules()\n-        expMsg = "Duplicate section exist in config file %s"  %self._configFileName\n-        expDictRawConfigValues = {"dir_name": {"work_dir":"toto"},\n-                                  "analysis1" : {"name": "BLASTXWheat2",\n-                                                 "program" : "BLASTX2",\n-                                                 "programversion" : "3.32",\n-                                                 "sourcename" :"dummyDesc_BLASTX2",\n-                                                 "description" : "",\n-                                                 "gff_name" :"BLASTX.gff2"}\n-                                 }\n-        doesNoExceptionRaised = True\n-        try:\n-            iConfigChecker = ConfigChecker(self._configFileName, iConfigRu'..b'      configF.write( "sourcename: dummyDesc_BLASTX\\n")\n-        configF.write( "program: BLASTX2\\n")\n-        configF.write( "description: \\n")\n-        configF.write( "gff_name: BLASTX.gff\\n")\n-        configF.write( "\\n")\n-        configF.write( "\\n")\n-        configF.close()\n-        \n-    #configuration file with section with option depends on presence of other options\n-    def write_with_one_option_depends_of_an_other_one(self, configFileName ):\n-        configF = open(configFileName, "w" )\n-        configF.write( "[dir_name]\\n")\n-        configF.write( "work_dir : toto\\n") \n-        configF.write( "\\n")\n-        configF.write( "[organism]\\n")\n-        configF.write( "abbreviation: T.aestivum\\n")\n-        configF.write( "genus: Triticum\\n")\n-        configF.write( "species: aestivum\\n")\n-        configF.write( "common_name: wheat\\n")\n-        configF.write( "comment: \\n")\n-        configF.write( "\\n")\n-        configF.write( "[analysis1]\\n")\n-        configF.write( "name: BLASTXWheat\\n")\n-        configF.write( "program: BLASTX\\n")\n-        configF.write( "programversion: 3.3\\n")\n-        configF.write( "sourcename: src_BLASTX\\n")\n-        configF.write( "description: \\n")\n-        configF.write( "gff_name: BLASTX.gff\\n")\n-        configF.write( "\\n")\n-        configF.write( "[analysis2]\\n")\n-        configF.write( "name: GMHMMWheat\\n")\n-        configF.write( "program: GMHMM\\n")\n-        configF.write( "programversion: 4.3\\n")\n-        configF.write( "sourcename: src_GMHMM\\n")\n-        configF.write( "description: \\n")\n-        configF.write( "gff_name: GMHMM.gff\\n")\n-        configF.write( "\\n")\n-        configF.write( "[target]\\n")\n-        configF.write( "target_used: yes\\n")\n-        configF.write( "target_used_list: target.lst\\n")\n-        configF.close()\n-        \n-    def write_case_pattern_rule(self, configFileName ):\n-        configF = open(configFileName, "w" )\n-        configF.write( "[dir_name]\\n")\n-        configF.write( "work_dir : toto\\n" ) \n-        configF.write( "\\n")\n-        configF.write( "[organism]\\n")\n-        configF.write( "abbreviation: T.aestivum\\n")\n-        configF.write( "genus: Triticum\\n")\n-        configF.write( "species: aestivum\\n")\n-        configF.write( "common_name: wheat\\n")\n-        configF.write( "comment: \\n")\n-        configF.write( "\\n")\n-        configF.write( "[analysis1]\\n")\n-        configF.write( "name: BLASTXWheat\\n")\n-        configF.write( "program: BLASTX\\n")\n-        configF.write( "programversion: 3.3\\n")\n-        configF.write( "sourcename: src_BLASTX\\n")\n-        configF.write( "description: \\n")\n-        configF.write( "gff_name: BLASTX.gff\\n")\n-        configF.write( "\\n")\n-        configF.write( "[analysis2]\\n")\n-        configF.write( "name: GMHMMWheat\\n")\n-        configF.write( "program: GMHMM\\n")\n-        configF.write( "programversion: 4.3\\n")\n-        configF.write( "sourcename: src_GMHMM\\n")\n-        configF.write( "description: \\n")\n-        configF.write( "gff_name: GMHMM.gff\\n")\n-        configF.write( "\\n")\n-        configF.write( "[target]\\n")\n-        configF.write( "target_used: yes\\n")\n-        configF.write( "target_used_list: target.lst\\n")\n-        configF.write( "\\n")\n-        configF.write( "[section_with_option_pattern]\\n")\n-        configF.write( "option1: value1\\n")\n-        configF.write( "option2: value2\\n")\n-        configF.write( "[second_section_with_option_pattern]\\n")\n-        configF.write( "option1: value1\\n")\n-        configF.write( "option2: value2\\n")\n-        configF.close()\n-        \n-    def write_config_case(self, configFileName):\n-        configF = open(configFileName, "w" )\n-        configF.write( "[dir_name]\\n")\n-        configF.write( "work_dir : toto \\n") \n-        configF.write( "\\n")\n-        configF.write( "[organism]\\n")\n-        configF.write( "min_SSR_coverage: 0.50\\n")\n-        configF.write( "\\n")\n-        configF.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/test/Test_ConfigValue.py
--- a/commons/core/checker/test/Test_ConfigValue.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,217 +0,0 @@\n-import unittest\n-from commons.core.checker.ConfigValue import ConfigValue\n-\n-class Test_ConfigValue(unittest.TestCase):\n-    \n-    def setUp(self):\n-        self._iConfigValue = ConfigValue()\n-        \n-    def test__eq__True(self):\n-        self._iConfigValue.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                                        }\n-        iConfigValue1 = ConfigValue()                         \n-        iConfigValue1.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                                }\n-        \n-        self.assertEqual(self._iConfigValue, iConfigValue1)\n-        \n-    def test__eq__False_not_same_section(self):\n-        self._iConfigValue.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organisms" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                                        }\n-        iConfigValue1 = ConfigValue()                         \n-        iConfigValue1.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                                }\n-        \n-        self.assertNotEqual(self._iConfigValue, iConfigValue1)\n-                                                \n-                                                \n-    def test__eq__False_not_same_option(self):\n-        self._iConfigValue.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "family":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                                        }\n-        iConfigValue1 = ConfigValue()                         \n-        iConfigValue1.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                                }\n-        \n-        self.assertNotEqual(self._iConfigValue, iConfigValue1)\n-        \n-    def test__eq__False_not_same_value(self):\n-        self._iConfigValue.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"vitis",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n'..b'-        self._iConfigValue.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                 }\n-        \n-        obsOptionExist = self._iConfigValue.has_option("organism","toto")\n-        self.assertFalse(obsOptionExist)\n-        obsOptionExist = self._iConfigValue.has_option("toto","genus")\n-        self.assertFalse(obsOptionExist)\n-\n-    def test_sections(self):\n-        self._iConfigValue.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                 }\n-        expListSections = ["dir_name", "organism"]\n-        obsListSections = self._iConfigValue.sections()\n-        self.assertEquals(expListSections, obsListSections)\n-        \n-    def test_sections_empty_config(self):\n-        self._iConfigValue.dOptionsValues4Sections = {}\n-        expListSections = []\n-        obsListSections = self._iConfigValue.sections()\n-        self.assertEquals(expListSections, obsListSections)\n-\n-    def test_options(self):\n-        self._iConfigValue.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                 }\n-        expListOptions = [\'abbreviation\', \'common_name\', \'genus\', \'species\', \'comment\']\n-        obsListOptions = self._iConfigValue.options("organism")\n-        self.assertEquals(expListOptions, obsListOptions)\n-   \n-        expListOptions = ["work_dir"]\n-        obsListOptions = self._iConfigValue.options("dir_name")\n-        self.assertEquals(expListOptions, obsListOptions)\n-             \n-    def test_options_empty_config(self):\n-        self._iConfigValue.dOptionsValues4Sections = {}\n-        expListOptions = []\n-        obsListOptions = self._iConfigValue.options("toto")\n-        self.assertEquals(expListOptions, obsListOptions)\n-\n-    def test_set(self):\n-        self._iConfigValue.dOptionsValues4Sections = {}\n-        expDictOptionsValue = {"dir_name" : {"work_dir":"toto"}}\n-        self._iConfigValue.set("dir_name", "work_dir", "toto")\n-        obsDictOptionsValue = self._iConfigValue.dOptionsValues4Sections\n-        self.assertEquals(expDictOptionsValue, obsDictOptionsValue)\n-        \n-    def test_get(self):\n-        self._iConfigValue.dOptionsValues4Sections = {\n-                    "dir_name" : {"work_dir":"toto"},\n-                    "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""}\n-                                 }\n-        expValue = "aestivum"\n-        obsValue = self._iConfigValue.get("organism", "species")\n-        self.assertEquals(expValue, obsValue)\n-        expValue = None\n-        obsValue = self._iConfigValue.get("toto", "species")\n-        self.assertEquals(expValue, obsValue)\n-        expValue = None\n-        obsValue = self._iConfigValue.get("organism", "dummyopt")\n-        self.assertEquals(expValue, obsValue)       \n-        \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/test/Test_F_ConfigChecker.py
--- a/commons/core/checker/test/Test_F_ConfigChecker.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,214 +0,0 @@\n-from commons.core.checker.ConfigChecker import ConfigChecker \n-from commons.core.checker.ConfigChecker import ConfigRules\n-from commons.core.checker.ConfigValue import ConfigValue\n-from commons.core.checker.RepetException import RepetException\n-import unittest\n-import os\n-\n-class Test_F_ConfigChecker(unittest.TestCase):\n-    \n-    #TODO: AJouter test (wrong type, etc..)\n-    def setUp(self):\n-        self._configFileName = "test_conf_checker"\n-        \n-    def tearDown(self):\n-        os.remove(self._configFileName)\n-     \n-    def test_run(self):\n-        iMock = MockConfig()\n-        iMock.write_config(self._configFileName)\n-        \n-        iConfigRules = ConfigRules()\n-        iConfigRules.addRuleSection(section="dir_name", mandatory=True)\n-        iConfigRules.addRuleOption(section="dir_name", option ="work_dir", mandatory=True)\n-        iConfigRules.addRuleSection(section="organism", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="abbreviation", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="genus", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="species", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="common_name", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="comment")\n-        iConfigRules.addRuleSection(section="analysis", mandatory=True, isPattern=True)\n-        iConfigRules.addRuleOption(section="analysis", option ="name", mandatory=True)\n-        iConfigRules.addRuleOption(section="analysis", option ="program", mandatory=True)\n-        iConfigRules.addRuleOption(section="analysis", option ="sourcename", mandatory=True)\n-        iConfigRules.addRuleOption(section="analysis", option ="description")\n-        iConfigRules.addRuleOption(section="analysis", option ="gff_name")\n-        \n-        iConfigChecker = ConfigChecker(self._configFileName,iConfigRules)\n-        \n-        obsValidatedConfig = iConfigChecker.getConfig()\n-        \n-        expValidatedConfig = ConfigValue()\n-        d = {"dir_name" : {"work_dir":"toto"},\n-             "organism" : {"abbreviation":"T.aestivum",\n-                                  "genus":"triticum",\n-                                  "species":"aestivum",\n-                                  "common_name":"wheat",\n-                                  "comment":""},\n-                           \'analysis1\': {\'description\': \'\',\n-                                  \'gff_name\': \'BLASTX.gff2\',\n-                                  \'name\': \'BLASTXWheat2\',\n-                                  \'program\': \'BLASTX2\',\n-                                  \'programversion\': \'3.32\',\n-                                  \'sourcename\': \'dummyDesc_BLASTX2\'}\n-                                 }\n-        expValidatedConfig.setdOptionsValues4Sections(d)\n-        \n-        self.assertEquals(expValidatedConfig, obsValidatedConfig)\n-        \n-        \n-    def test_run_exception_section_missing(self):\n-        iMock = MockConfig()\n-        iMock.write_config_section_missing(self._configFileName)\n-        \n-        iConfigRules = ConfigRules()\n-        iConfigRules.addRuleSection(section="dir_name", mandatory=True)\n-        iConfigRules.addRuleOption(section="dir_name", option ="work_dir", mandatory=True)\n-        iConfigRules.addRuleSection(section="organism", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="abbreviation", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="genus", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="species", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="common_name", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="comment")\n-        iConfigRules.addRuleSection(section="analysis", mandatory=True, isPattern=True)\n-        iConfigRules.addRuleOption('..b'on ="work_dir", mandatory=True)\n-        iConfigRules.addRuleSection(section="organism", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="abbreviation", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="genus", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="species", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="common_name", mandatory=True)\n-        iConfigRules.addRuleOption(section="organism", option ="comment")\n-        iConfigRules.addRuleSection(section="analysis", mandatory=True, isPattern=True)\n-        iConfigRules.addRuleOption(section="analysis", option ="name", mandatory=True)\n-        iConfigRules.addRuleOption(section="analysis", option ="program", mandatory=True)\n-        iConfigRules.addRuleOption(section="analysis", option ="sourcename", mandatory=True)\n-        iConfigRules.addRuleOption(section="analysis", option ="description")\n-        iConfigRules.addRuleOption(section="analysis", option ="gff_name")\n-        \n-        iConfigChecker = ConfigChecker(self._configFileName,iConfigRules)\n-        \n-        expMessage = "Error in configuration file %s, following options are missing: \\n - [organism]: abbreviation\\n"% self._configFileName\n-        \n-        try :\n-            obsValidatedConfig = iConfigChecker.getConfig()\n-        except RepetException, e:\n-            obsMessage = e.getMessage()\n-\n-        self.assertEquals(expMessage, obsMessage)\n-            \n-class MockConfig (object):\n-   \n-    def write_config(self, configFileName):\n-        configF = open(configFileName, "w" )\n-        configF.write( "[dir_name]\\n")\n-        configF.write( "work_dir : toto \\n") \n-        configF.write( "\\n")\n-        configF.write( "[organism]\\n")\n-        configF.write( "abbreviation: T.aestivum\\n")\n-        configF.write( "genus: triticum\\n")\n-        configF.write( "species: aestivum\\n")\n-        configF.write( "common_name: wheat\\n")\n-        configF.write( "comment: \\n")\n-        configF.write( "[analysis1]\\n")\n-        configF.write( "name: BLASTXWheat2\\n")\n-        configF.write( "program: BLASTX2\\n")\n-        configF.write( "programversion: 3.32\\n")\n-        configF.write( "sourcename: dummyDesc_BLASTX2\\n")\n-        configF.write( "description: \\n")\n-        configF.write( "gff_name: BLASTX.gff2\\n")\n-        configF.write( "\\n")\n-        configF.close()\n-        \n-    def write_config_section_missing(self, configFileName):\n-        configF = open(configFileName, "w" )\n-        configF.write( "[dir_name]\\n")\n-        configF.write( "work_dir : toto \\n") \n-        configF.write( "\\n")\n-        configF.write( "[analysis1]\\n")\n-        configF.write( "name: BLASTXWheat2\\n")\n-        configF.write( "program: BLASTX2\\n")\n-        configF.write( "programversion: 3.32\\n")\n-        configF.write( "sourcename: dummyDesc_BLASTX2\\n")\n-        configF.write( "description: \\n")\n-        configF.write( "gff_name: BLASTX.gff2\\n")\n-        configF.write( "\\n")\n-        configF.close()\n-        \n-    def write_config_option_missing(self, configFileName):\n-        configF = open(configFileName, "w" )\n-        configF.write( "[dir_name]\\n")\n-        configF.write( "work_dir : toto \\n") \n-        configF.write( "\\n")\n-        configF.write( "[organism]\\n")\n-        configF.write( "genus: triticum\\n")\n-        configF.write( "species: aestivum\\n")\n-        configF.write( "common_name: wheat\\n")\n-        configF.write( "comment: \\n")\n-        configF.write( "[analysis1]\\n")\n-        configF.write( "name: BLASTXWheat2\\n")\n-        configF.write( "program: BLASTX2\\n")\n-        configF.write( "programversion: 3.32\\n")\n-        configF.write( "sourcename: dummyDesc_BLASTX2\\n")\n-        configF.write( "description: \\n")\n-        configF.write( "gff_name: BLASTX.gff2\\n")\n-        configF.write( "\\n")\n-        configF.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/checker/test/Test_OldConfigChecker.py
--- a/commons/core/checker/test/Test_OldConfigChecker.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,104 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-import unittest
-import os
-from commons.core.checker.OldConfigChecker import ConfigChecker
-from commons.core.checker.ConfigException import ConfigException
-
-class Test_ConfigChecker( unittest.TestCase ):
-    
-    def setUp(self):
-        pass
-    
-    def tearDown(self):
-        pass
-    
-    
-    def testFileNotFound(self):
-        exceptionExpected = None
-        configChecker = ConfigChecker("",{})
-        try :
-            configChecker.check("noExistsFile.cfg")
-        except ConfigException, ce:
-            exceptionExpected = ce
-        
-        self.assertTrue(exceptionExpected != None)
-        msg = exceptionExpected.messages[0]
-        self.assertTrue(msg.startswith("CONFIG FILE not found - "))
-        
-        
-    def testNoSectionInConfigFile (self):
-        exceptionExpected = None
-        dummyFile = open("dummyFile.cfg", "w")
-        configChecker = ConfigChecker("dummySection",{})
-        try :
-            configChecker.check("dummyFile.cfg")
-        except ConfigException, ce:
-            exceptionExpected = ce
-        
-        self.assertTrue(exceptionExpected != None)
-        msg = exceptionExpected.messages[0]
-        self.assertTrue(msg.startswith("[dummySection]" + " section not found - "))
-        
-        os.remove("dummyFile.cfg")
-        
-        
-    def testNoOptionInConfigFile (self):
-        exceptionExpected = None
-        MockConfigFile("dummyConfig.cfg",{})
-        configChecker = ConfigChecker("blaster_config",{"dummy":""})
-        try :
-            configChecker.check("dummyConfig.cfg")
-        except ConfigException, ce:
-            exceptionExpected = ce
-        
-        self.assertTrue(exceptionExpected != None)
-        msg = exceptionExpected.messages[0]
-        self.assertTrue(msg.startswith("[blaster_config] - No option 'dummy' in section: 'blaster_config'"))
-        os.remove("dummyConfig.cfg")
-        
-        
-class MockConfigFile:
-    
-    def __init__ (self, fileName, optionsDict):
-        self._fileName = fileName
-        config = open(fileName, "w");
-        config.write("[blaster_config]\n")
-        for key in optionsDict.keys():
-            config.write(key + ":" + optionsDict[key] + "\n")
-        config.close()
-        
-        
-test_suite = unittest.TestSuite()
-test_suite.addTest( unittest.makeSuite( Test_ConfigChecker ) )
-if __name__ == "__main__":
-    unittest.TextTestRunner(verbosity=2).run( test_suite )
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/Align.py
--- a/commons/core/coord/Align.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,428 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-import time\n-\n-from commons.core.coord.Range import Range\n-from commons.core.coord.Map import Map\n-\n-\n-## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity)\n-#\n-class Align( object ):\n-    \n-    ## Constructor\n-    #\n-    # @param range_q: a Range instance for the query\n-    # @param range_s: a Range instance for the subject\n-    # @param e_value: E-value of the match \n-    # @param identity: identity percentage of the match\n-    # @param score: score of the match\n-    #\n-    def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0):\n-        self.range_query = range_q\n-        self.range_subject = range_s\n-        self.e_value = float(e_value)\n-        self.score = float(score)\n-        self.identity = float(identity)\n-       \n-    ## Return True if the instance is empty, False otherwise\n-    #\n-    def isEmpty(self):\n-        return self.range_query.isEmpty() or self.range_subject.isEmpty()\n-        \n-    ## Equal operator\n-    #\n-    def __eq__(self, o):\n-        if self.range_query==o.range_query and self.range_subject==o.range_subject and \\\n-        self.e_value==o.e_value and self.score==o.score and self.identity==o.identity:\n-            return True\n-        return False\n-    \n-    ## Unequal operator\n-    #\n-    # @param o a Range instance\n-    #\n-    def __ne__(self, o):\n-        return not self.__eq__(o)\n-    \n-    ## Convert the object into a string\n-    #\n-    # @note used in \'print myObject\'\n-    #\n-    def __str__( self ):\n-        return self.toString()\n-    \n-    ## Read attributes from an Align file\n-    # \n-    # @param fileHandler: file handler of the file being read\n-    # @return: 1 on success, 0 at the end of the file \n-    #\n-    def read(self, fileHandler):\n-        self.reset()\n-        line = fileHandler.readline()\n-        if line == "":\n-            return 0\n-        tokens = line.split("\\t")\n-        if len(tokens) < len(self.__dict__.keys()):\n-            return 0\n-        self.setFromTuple(tokens)\n-        return 1\n-    \n-    ## Set attributes from tuple\n-    #\n-    # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)\n-    # @note data are loaded such that the query is always on the direct strand\n-    #\n-    def setFromTuple( self, tuple ):\n-        #'..b' self.identity = max(self.identity,o.identity)\n-        \n-    ## Return a Map instance with the subject mapped on the query\n-    #\n-    def getSubjectAsMapOfQuery(self):\n-        iMap = Map()\n-        iMap.name = self.range_subject.seqname\n-        iMap.seqname = self.range_query.seqname\n-        if self.range_subject.isOnDirectStrand():\n-            iMap.start = self.range_query.start\n-            iMap.end = self.range_query.end\n-        else:\n-            iMap.start = self.range_query.end\n-            iMap.end = self.range_query.start\n-        return iMap\n-    \n-    ## Return True if query is on direct strand\n-    #\n-    def isQueryOnDirectStrand( self ):\n-        return self.range_query.isOnDirectStrand()\n-    \n-    ## Return True if subject is on direct strand\n-    #\n-    def isSubjectOnDirectStrand( self ):\n-        return self.range_subject.isOnDirectStrand()\n-    \n-    ## Return True if query and subject are on the same strand, False otherwise\n-    #\n-    def areQrySbjOnSameStrand(self):\n-        return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand()\n-    \n-    ## Return False if query and subject are on the same strand, True otherwise\n-    #\n-    def areQrySbjOnOppositeStrands(self):\n-        return not self.areQrySbjOnSameStrand()\n-\n-    ## Set attributes from string\n-    #\n-    # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity\n-    # @param sep field separator\n-    #\n-    def setFromString(self, string, sep="\\t"):\n-        if string[-1] == "\\n":\n-            string = string[:-1]\n-        self.setFromTuple( string.split(sep) )\n-        \n-    ## Return a first Map instance for the query and a second for the subject\n-    #\n-    def getMapsOfQueryAndSubject(self):\n-        iMapQuery = Map( name="repet",\n-                         seqname=self.range_query.seqname,\n-                         start=self.range_query.start,\n-                         end=self.range_query.end )\n-        iMapSubject = Map( name="repet",\n-                         seqname=self.range_subject.seqname,\n-                         start=self.range_subject.start,\n-                         end=self.range_subject.end )\n-        return iMapQuery, iMapSubject\n-    \n-    ## Write query coordinates as Map in a file\n-    #\n-    # @param fileHandler: file handler of the file being filled\n-    #\n-    def writeSubjectAsMapOfQuery( self, fileHandler ):\n-        m = self.getSubjectAsMapOfQuery()\n-        m.write( fileHandler )\n-        \n-    ## Return a bin for fast database access\n-    #\n-    def getBin(self):\n-        return self.range_query.getBin()\n-    \n-    ## Switch query and subject\n-    #\n-    def switchQuerySubject( self ):\n-        tmpRange = self.range_query\n-        self.range_query = self.range_subject\n-        self.range_subject = tmpRange\n-        if not self.isQueryOnDirectStrand():\n-            self.reverse()\n-            \n-    ## Return True if the query overlaps with the query of another Align instance, False otherwise\n-    #\n-    def isQueryOverlapping( self, iAlign ):\n-        return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() )\n-    \n-    ## Return True if the subject overlaps with the subject of another Align instance, False otherwise\n-    #\n-    def isSubjectOverlapping( self, iAlign ):\n-        return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() )\n-    \n-    ## Return True if the Align instance overlaps with another Align instance, False otherwise\n-    #\n-    def isOverlapping( self, iAlign ):\n-        if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ):\n-            return True\n-        else:\n-            return False\n-        \n-    ## Update the score\n-    #\n-    # @note the new score is the length on the query times the percentage of identity\n-    #\n-    def updateScore( self ):\n-        newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0\n-        self.score = newScore\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/Align.pyc
b
Binary file commons/core/coord/Align.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/AlignUtils.py
--- a/commons/core/coord/AlignUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,359 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import os\n-import sys\n-import shutil\n-from commons.core.coord.Align import Align\n-\n-\n-## Static methods manipulating Align instances\n-#\n-class AlignUtils( object ):\n-    \n-    ## Return a list with Align instances from the given file\n-    #\n-    # @param inFile name of a file in the Align format\n-    #\n-    def getAlignListFromFile( inFile ):\n-        lAlignInstances = []\n-        inFileHandler = open( inFile, "r" )\n-        while True:\n-            line = inFileHandler.readline()\n-            if line == "":\n-                break\n-            a = Align()\n-            a.setFromString( line )\n-            lAlignInstances.append( a )\n-        inFileHandler.close()\n-        return lAlignInstances\n-\n-    getAlignListFromFile = staticmethod( getAlignListFromFile )\n-    \n-    \n-    ## Return a list with all the scores\n-    #\n-    # @param lAlignInstances: list of Align instances\n-    #\n-    def getListOfScores( lAlignInstances ):\n-        lScores = []\n-        for iAlign in lAlignInstances:\n-            lScores.append( iAlign.score )\n-        return lScores\n-    \n-    getListOfScores = staticmethod( getListOfScores )\n-\n-    \n-    ## Return a list with all the scores from the given file\n-    #\n-    # @param inFile name of a file in the Align format\n-    #\n-    def getScoreListFromFile(inFile):\n-        lScores = []\n-        append = lScores.append\n-        with open(inFile, "r") as inFileHandler:\n-            line = inFileHandler.readline()\n-            while line:\n-                if line != "\\n":\n-                    append(int(line.split(\'\\t\')[7]))\n-                line = inFileHandler.readline()\n-        return lScores\n-    \n-    getScoreListFromFile = staticmethod( getScoreListFromFile )\n-    \n-    \n-    ## for each line of a given Align file, write the coordinates on the query and the subject as two distinct lines in a Map file\n-    #\n-    # @param alignFile: name of the input Align file\n-    # @param mapFile: name of the output Map file\n-    #\n-    def convertAlignFileIntoMapFileWithQueriesAndSubjects( alignFile, mapFile ):\n-        alignFileHandler = open( alignFile, "r" )\n-        mapFileHandler = open( mapFile, "w" )\n-        iAlign = Align()\n-        while True:\n-            line = alignFileHandler.readline()\n-            if line == "":\n-                break\n-            iAlign.setFromString( line )\n-            iMapQ, iMap'..b'Dir)\n-            \n-    createAlignFiles = staticmethod( createAlignFiles )\n-    \n-    \n-    ## Return a list with Align instances sorted by query name, subject name, query start, query end and score\n-    #\n-    def sortList( lAligns ):\n-        return sorted( lAligns, key=lambda iAlign: ( iAlign.getQueryName(),\n-                                                     iAlign.getSubjectName(),\n-                                                     iAlign.getQueryStart(),\n-                                                     iAlign.getQueryEnd(),\n-                                                     iAlign.getScore() ) )\n-        \n-    sortList = staticmethod( sortList )\n-    \n-    \n-    ## Return a list after merging all overlapping Align instances\n-    #\n-    def mergeList( lAligns ):\n-        lMerged = []\n-        \n-        lSorted = AlignUtils.sortList( lAligns )\n-        \n-        prev_count = 0\n-        for iAlign in lSorted:\n-            if prev_count != len(lSorted):\n-                for i in lSorted[ prev_count + 1: ]:\n-                    if iAlign.isOverlapping( i ):\n-                        iAlign.merge( i )\n-                IsAlreadyInList = False\n-                for newAlign in lMerged:\n-                    if newAlign.isOverlapping( iAlign ):\n-                        IsAlreadyInList = True\n-                        newAlign.merge( iAlign )\n-                        lMerged [ lMerged.index( newAlign ) ] = newAlign\n-                if not IsAlreadyInList:\n-                    lMerged.append( iAlign )\n-                prev_count += 1\n-                \n-        return lMerged\n-    \n-    mergeList = staticmethod( mergeList )\n-    \n-    \n-    ## Merge all Align instance in a given Align file\n-    #\n-    def mergeFile( inFile, outFile="" ):\n-        if outFile == "":\n-            outFile = "%s.merged" % ( inFile )\n-        if os.path.exists( outFile ):\n-            os.remove( outFile )\n-            \n-        tmpFile = "%s.sorted" % ( inFile )\n-        AlignUtils.sortAlignFile( inFile, tmpFile )\n-        \n-        tmpF = open( tmpFile, "r" )\n-        dQrySbj2Aligns = {}\n-        prevPairQrySbj = ""\n-        while True:\n-            line = tmpF.readline()\n-            if line == "":\n-                break\n-            iAlign = Align()\n-            iAlign.setFromString( line )\n-            pairQrySbj = "%s_%s" % ( iAlign.getQueryName(), iAlign.getSubjectName() )\n-            if not dQrySbj2Aligns.has_key( pairQrySbj ):\n-                if prevPairQrySbj != "":\n-                    lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )\n-                    AlignUtils.writeListInFile( lMerged, outFile, "a" )\n-                    del dQrySbj2Aligns[ prevPairQrySbj ]\n-                    prevPairQrySbj = pairQrySbj\n-                else:\n-                    prevPairQrySbj = pairQrySbj\n-                dQrySbj2Aligns[ pairQrySbj ] = []\n-            dQrySbj2Aligns[ pairQrySbj ].append( iAlign )\n-        lMerged = []\n-        if len(dQrySbj2Aligns.keys()) > 0:\n-            lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )\n-        AlignUtils.writeListInFile( lMerged, outFile, "a" )\n-        tmpF.close()\n-        os.remove( tmpFile )\n-        \n-    mergeFile = staticmethod( mergeFile )\n-\n-\n-    ## Update the scores of each match in the input file\n-    #\n-    # @note the new score is the length on the query times the percentage of identity\n-    #\n-    def updateScoresInFile( inFile, outFile ):\n-        inHandler = open( inFile, "r" )\n-        outHandler = open( outFile, "w" )\n-        iAlign = Align()\n-        \n-        while True:\n-            line = inHandler.readline()\n-            if line == "":\n-                break\n-            iAlign.reset()\n-            iAlign.setFromString( line, "\\t" )\n-            iAlign.updateScore()\n-            iAlign.write( outHandler )\n-            \n-        inHandler.close()\n-        outHandler.close()\n-        \n-    updateScoresInFile = staticmethod( updateScoresInFile )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/ConvCoord.py
--- a/commons/core/coord/ConvCoord.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,504 +0,0 @@\n-#!/usr/bin/env python\n-\n-##@file\n-# Convert coordinates from chunks to chromosomes or the opposite.\n-#\n-# usage: ConvCoord.py [ options ]\n-# options:\n-#      -h: this help\n-#      -i: input data with coordinates to convert (file or table)\n-#      -f: input data format (default=\'align\'/\'path\')\n-#      -c: coordinates to convert (query, subject or both; default=\'q\'/\'s\'/\'qs\')\n-#      -m: mapping of chunks on chromosomes (format=\'map\')\n-#      -x: convert from chromosomes to chunks (opposite by default)\n-#      -o: output data (file or table, same as input)\n-#      -C: configuration file (for database connection)\n-#      -v: verbosity level (default=0/1/2)\n-\n-\n-import os\n-import sys\n-import getopt\n-import time\n-from commons.core.sql.DbFactory import DbFactory\n-from commons.core.coord.MapUtils import MapUtils\n-from commons.core.sql.TableMapAdaptator import TableMapAdaptator\n-from commons.core.sql.TablePathAdaptator import TablePathAdaptator\n-from commons.core.coord.PathUtils import PathUtils\n-from commons.core.coord.Align import Align\n-from commons.core.coord.Path import Path\n-from commons.core.coord.Range import Range\n-\n-\n-## Class to handle coordinate conversion\n-#\n-class ConvCoord( object ):\n-    \n-    ## Constructor\n-    #\n-    def __init__( self, inData="", mapData="", outData="", configFile="", verbosity=0):\n-        self._inData = inData\n-        self._formatInData = "align"\n-        self._coordToConvert = "q"\n-        self._mapData = mapData\n-        self._mergeChunkOverlaps = True\n-        self._convertChunks = True\n-        self._outData = outData\n-        self._configFile = configFile\n-        self._verbose = verbosity\n-        self._typeInData = "file"\n-        self._typeMapData = "file"\n-        self._tpa = None\n-        if self._configFile != "" and os.path.exists(self._configFile):\n-            self._iDb = DbFactory.createInstance(self._configFile)\n-        else:\n-            self._iDb = DbFactory.createInstance()\n-        \n-        \n-    ## Display the help on stdout\n-    #\n-    def help( self ):\n-        print\n-        print "usage: ConvCoord.py [ options ]"\n-        print "options:"\n-        print "     -h: this help"\n-        print "     -i: input data with coordinates to convert (file or table)"\n-        print "     -f: input data format (default=\'align\'/\'path\')"\n-        print "     -c: coordinates to convert (query, subject or both; default=\'q\'/\'s\'/\'qs\')"\n-        print "     -m: mapping of chunks on chromosomes (format=\'map\')"\n-        print "     -M: merge chunk overlaps (default=yes/no)"\n-        print "     -x: convert from chromosomes to chunks (opposite by default)"\n-        print "     -o: output data (file or table, same as input)"\n-        print "     -C: configuration file (for database connection)"\n-        print "     -v: verbosity level (default=0/1/2)"\n-        print\n-        \n-        \n-    ## Set the attributes from the command-line\n-    #\n-    def setAttributesFromCmdLine( self ):\n-        try:\n-            opts, args = getopt.getopt(sys.argv[1:],"hi:f:c:m:M:xo:C:v:")\n-        except getopt.GetoptError, err:\n-            sys.stderr.write( "%s\\n" % ( str(err) ) )\n-            self.help(); sys.exit(1)\n-        for o,a in opts:\n-            if o == "-h":\n-                self.help(); sys.exit(0)\n-            elif o == "-i":\n-                self.setInputData( a )\n-            elif o == "-f":\n-                self.setInputFormat( a )\n-            elif o == "-c":\n-                self.setCoordinatesToConvert( a )\n-            elif o == "-m":\n-                self.setMapData( a )\n-            elif o == "-M":\n-                self.setMergeChunkOverlaps( a )\n-            elif o == "-o":\n-                self.setOutputData( a )\n-            elif o == "-C":\n-                self.setConfigFile( a )\n-            elif o == "-v":\n-                self.setVerbosityLevel( a )\n-                \n-                \n-    def setInputData( self, inData ):\n-        self._inData = inData\n-        '..b'ile( tmpPathTable, tmpPathTable, False )\n-        self._iDb.dropTable( tmpPathTable )\n-        if self._formatInData == "align":\n-            PathUtils.convertPathFileIntoAlignFile( tmpPathTable, outFile )\n-            os.remove( tmpPathTable )\n-        elif self._formatInData == "path":\n-            os.rename( tmpPathTable, outFile )\n-            \n-            \n-    def saveChrCoordsAsTable( self, tmpPathTable, outTable ):\n-        if self._formatInData == "align":\n-            self._iDb.convertPathTableIntoAlignTable( tmpPathTable, outTable )\n-            self._iDb.dropTable( tmpPathTable )\n-        elif self._formatInData == "path":\n-            self._iDb.renameTable( tmpPathTable, outTable )\n-            \n-            \n-    ## Convert coordinates from chunks to chromosomes\n-    #\n-    def convertCoordinatesFromChunksToChromosomes( self ):\n-        dChunks2CoordMaps = self.getChunkCoordsOnChromosomes()\n-        \n-        if self._typeInData == "file":\n-            tmpPathTable = self.convCoordsChkToChrFromFile( self._inData, self._formatInData, dChunks2CoordMaps )\n-        elif self._typeInData == "table":\n-            tmpPathTable = self.convCoordsChkToChrFromTable( self._inData, self._formatInData, dChunks2CoordMaps )\n-            \n-        if self._mergeChunkOverlaps:\n-            self.mergeCoordsOnChunkOverlaps( dChunks2CoordMaps, tmpPathTable );\n-            \n-        if self._typeInData == "file":\n-            self.saveChrCoordsAsFile( tmpPathTable, self._outData )\n-        elif self._typeInData == "table":\n-            self.saveChrCoordsAsTable( tmpPathTable, self._outData )\n-            \n-            \n-    ## Convert coordinates from chromosomes to chunks\n-    #\n-    def convertCoordinatesFromChromosomesToChunks( self ):\n-        msg = "ERROR: convert coordinates from chromosomes to chunks not yet available"\n-        sys.stderr.write( "%s\\n" % ( msg ) )\n-        sys.exit(1)\n-        \n-        \n-    ## Useful commands before running the program\n-    #\n-    def start( self ):\n-        self.checkAttributes()\n-        if self._verbose > 0:\n-            msg = "START ConvCoord.py (%s)" % ( time.strftime("%m/%d/%Y %H:%M:%S") )\n-            msg += "\\ninput data: %s" % ( self._inData )\n-            if self._typeInData == "file":\n-                msg += " (file)\\n"\n-            else:\n-                msg += " (table)\\n"\n-            msg += "format: %s\\n" % ( self._formatInData )\n-            msg += "coordinates to convert: %s\\n" % ( self._coordToConvert )\n-            msg += "mapping data: %s" % ( self._mapData )\n-            if self._typeMapData == "file":\n-                msg += " (file)\\n"\n-            else:\n-                msg += " (table)\\n"\n-            if self._mergeChunkOverlaps:\n-                msg += "merge chunk overlaps\\n"\n-            else:\n-                msg += "don\'t merge chunk overlaps\\n"\n-            if self._convertChunks:\n-                msg += "convert chunks to chromosomes\\n"\n-            else:\n-                msg += "convert chromosomes to chunks\\n"\n-            msg += "output data: %s" % ( self._outData )\n-            if self._typeInData == "file":\n-                msg += " (file)\\n"\n-            else:\n-                msg += " (table)\\n"\n-            sys.stdout.write( msg )\n-            \n-            \n-    ## Useful commands before ending the program\n-    #\n-    def end( self ):\n-        self._iDb.close()\n-        if self._verbose > 0:\n-            msg = "END ConvCoord.py (%s)" % ( time.strftime("%m/%d/%Y %H:%M:%S") )\n-            sys.stdout.write( "%s\\n" % ( msg ) )\n-            \n-            \n-    ## Run the program\n-    #\n-    def run( self ):\n-        self.start()\n-        \n-        if self._convertChunks:\n-            self.convertCoordinatesFromChunksToChromosomes()\n-        else:\n-            self.convertCoordinatesFromChromosomesToChunks()\n-            \n-        self.end()\n-        \n-        \n-if __name__ == "__main__":\n-    i = ConvCoord()\n-    i.setAttributesFromCmdLine()\n-    i.run()\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/Map.py
--- a/commons/core/coord/Map.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,161 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from commons.core.coord.Range import Range
-
-
-## Record a named region on a given sequence
-#
-class Map( Range ):
-    
-    ## Constructor
-    #
-    # @param name the name of the region
-    # @param seqname the name of the sequence
-    # @param start the start coordinate
-    # @param end the end coordinate
-    # 
-    def __init__(self, name="", seqname="", start=-1, end=-1):
-        self.name = name
-        Range.__init__( self, seqname, start, end )
-        
-    ## Equal operator
-    #
-    # @param o a Map instance
-    #    
-    def __eq__(self, o):
-        if self.name == o.name:
-            return Range.__eq__(self, o)
-        return False
-    
-    ## Return name
-    #
-    def getName( self ):
-        return self.name
-    
-    ## Set attributes from tuple
-    #
-    # @param tuple: a tuple with (name,seqname,start,end)
-    # 
-    def setFromTuple(self, tuple):
-        self.name = tuple[0]
-        Range.setFromTuple(self, tuple[1:])
-    
-    ## Set attributes from string
-    #
-    # @param string a string formatted like name<sep>seqname<sep>start<sep>end
-    # @param sep field separator
-    #
-    def setFromString(self, string, sep="\t"):
-        if string[-1] == "\n":
-            string = string[:-1]
-        self.setFromTuple( string.split(sep) )
-        
-    ## Reset
-    #
-    def reset(self):
-        self.setFromTuple( [ "", "", -1, -1 ] )
-        
-    ## Read attributes from a Map file
-    # 
-    # @param fileHandler: file handler of the file being read
-    # @return: 1 on success, 0 at the end of the file
-    #
-    def read(self, fileHandler):
-        self.reset()
-        line = fileHandler.readline()
-        if line == "":
-            return 0
-        tokens = line.split("\t")
-        if len(tokens) < len(self.__dict__.keys()):
-            return 0
-        self.setFromTuple(tokens)
-        return 1
-    
-    ## Return the attributes as a formatted string
-    #
-    def toString(self):
-        string = "%s" % (self.name)
-        string += "\t%s" % (Range.toString(self))
-        return string
-    
-    ## Write attributes into a Map file
-    #
-    # @param fileHandler: file handler of the file being filled
-    #
-    def write(self, fileHandler):
-        fileHandler.write("%s\n" % (self.toString()))
-        
-    ## Save attributes into a Map file
-    #
-    # @param file: name of the file being filled
-    #
-    def save(self, file):
-        fileHandler = open( file, "a" )
-        self.write( fileHandler )
-        fileHandler.close()
-        
-    ## Return a Range instance with the attributes
-    #
-    def getRange(self):
-        return Range( self.seqname, self.start, self.end)
-    
-    ## Remove in the instance the region overlapping with another Map instance
-    #
-    # @param o a Map instance
-    # 
-    def diff(self, o):
-        iRange = Range.diff(self, o.getRange())
-        new = Map()
-        if not iRange.isEmpty():
-            new.name = self.name
-            new.seqname = self.seqname
-            new.start = iRange.start
-            new.end = iRange.end
-        return new
-    
-    ## Write attributes in a Path file, the name being the subject and the rest the Range query
-    #
-    # @param fileHandler: file handler of a Path file
-    #
-    def writeAsQueryOfPath(self, fileHandler):
-        string = "0"
-        string += "\t%s" % ( self.seqname )
-        string += "\t%i" % ( self.getMin() )
-        string += "\t%i" % ( self.getMax() )
-        string += "\t%s" % ( self.name )
-        string += "\t0"
-        string += "\t0"
-        string += "\t0.0"
-        string += "\t0"
-        string += "\t0"
-        fileHandler.write( "%s\n" % ( string ) )
-        
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/Map.pyc
b
Binary file commons/core/coord/Map.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/MapUtils.py
--- a/commons/core/coord/MapUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,246 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import sys\n-import os\n-from commons.core.coord.Map import Map\n-from commons.core.coord.Set import Set\n-try:\n-    from commons.core.checker.CheckerUtils import CheckerUtils\n-except ImportError:\n-    pass\n-\n-\n-## static methods manipulating Map instances\n-#\n-class MapUtils( object ):\n-    \n-    ## Return a list of Map instances sorted in increasing order according to the min, then the max, and finally their initial order\n-    #\n-    # @param lMaps list of Map instances\n-    #\n-    def getMapListSortedByIncreasingMinThenMax( lMaps ):\n-        return sorted( lMaps, key=lambda iMap: ( iMap.getMin(), iMap.getMax() ) )    \n-    \n-    getMapListSortedByIncreasingMinThenMax = staticmethod( getMapListSortedByIncreasingMinThenMax )\n-    \n-    \n-    ## Return a list of Map instances sorted in increasing order according to the name, then the seqname, then the min, then the max\n-    #\n-    # @param lMaps list of Map instances\n-    #\n-    def getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax( lMaps ):\n-        return sorted( lMaps, key=lambda iMap: ( iMap.getName(), iMap.getSeqname(), iMap.getMin(), iMap.getMax() ) )    \n-    \n-    getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax = staticmethod( getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax )\n-    \n-    \n-    ## Return a dictionary which keys are Map names and values the corresponding Map instances\n-    #\n-    def getDictPerNameFromMapFile( mapFile ):\n-        dName2Maps = {}\n-        mapFileHandler = open( mapFile, "r" )\n-        while True:\n-            line = mapFileHandler.readline()\n-            if line == "":\n-                break\n-            iMap = Map()\n-            iMap.setFromString( line, "\\t" )\n-            if dName2Maps.has_key( iMap.name ):\n-                if iMap == dName2Maps[ iMap.name ]:\n-                    continue\n-                else:\n-                    msg = "ERROR: in file \'%s\' two different Map instances have the same name \'%s\'" % ( mapFile, iMap.name )\n-                    sys.stderr.write( "%s\\n" % ( msg ) )\n-                    sys.exit(1)\n-            dName2Maps[ iMap.name ] = iMap\n-        mapFileHandler.close()\n-        return dName2Maps\n-    \n-    getDictPerNameFromMapFile = staticmethod( getDictPerNameFromMapFile )\n-\n-    \n-    ## Give a list of Set instances from a list of Map instances\n-    #\n-    # @param lMaps list of Map '..b's.rename( "%s.merge" % inFile,\n-                       outFile )\n-            \n-    mergeCoordsInFile = staticmethod( mergeCoordsInFile )\n-    \n-    \n-    ## Return a dictionary which keys are Map seqnames and values the corresponding Map instances\n-    #\n-    def getDictPerSeqNameFromMapFile( mapFile ):\n-        dSeqName2Maps = {}\n-        mapFileHandler = open( mapFile, "r" )\n-        while True:\n-            line = mapFileHandler.readline()\n-            if line == "":\n-                break\n-            iMap = Map()\n-            iMap.setFromString( line, "\\t" )\n-            if not dSeqName2Maps.has_key( iMap.seqname ):\n-                dSeqName2Maps[ iMap.seqname ] = []\n-            dSeqName2Maps[ iMap.seqname ].append( iMap )\n-        mapFileHandler.close()\n-        return dSeqName2Maps\n-    \n-    getDictPerSeqNameFromMapFile = staticmethod( getDictPerSeqNameFromMapFile )\n-    \n-    \n-    ## Convert an Map file into a Set file\n-    #\n-    # @param mapFile string input map file name\n-    # @param setFile string output set file name\n-    #\n-    def convertMapFileIntoSetFile( mapFileName, setFileName = "" ):\n-        if setFileName == "":\n-            setFileName = "%s.set" % mapFileName\n-        mapFileHandler = open( mapFileName, "r" )\n-        setFileHandler = open( setFileName, "w" )\n-        iMap = Map()\n-        count = 0\n-        while True:\n-            line = mapFileHandler.readline()\n-            if line == "":\n-                break\n-            iMap.setFromString(line)\n-            count += 1\n-            iSet = Set()\n-            iSet.id = count\n-            iSet.name = iMap.getName()\n-            iSet.seqname = iMap.getSeqname()\n-            iSet.start = iMap.getStart()\n-            iSet.end = iMap.getEnd()\n-            iSet.write(setFileHandler)\n-        mapFileHandler.close()\n-        setFileHandler.close()\n-        \n-    convertMapFileIntoSetFile = staticmethod( convertMapFileIntoSetFile )\n-    \n-    ## Write Map instances contained in the given list\n-    #\n-    # @param lMaps list of Map instances\n-    # @param fileName a file name\n-    # @param mode the open mode of the file \'"w"\' or \'"a"\' \n-    #\n-    def writeListInFile(lMaps, fileName, mode="w"):\n-        fileHandler = open(fileName, mode)\n-        for iMap in lMaps:\n-            iMap.write(fileHandler)\n-        fileHandler.close()\n-        \n-    writeListInFile = staticmethod( writeListInFile )\n-\n-    \n-    ## Get the length of the shorter seq in map file\n-    #\n-    # @param mapFileName\n-    # @param mode the open mode of the file \'"w"\' or \'"a"\' \n-    #\n-    def getMinLengthOfMapFile(self, mapFileName):\n-        fileHandler = open(mapFileName, "r")\n-        line = fileHandler.readline()\n-        start = int (line.split(\'\\t\')[2])\n-        end = int (line.split(\'\\t\')[3])\n-        min = end - start + 1\n-        while True:\n-            line = fileHandler.readline()\n-            if line == "":\n-                break\n-            start = int (line.split(\'\\t\')[2])\n-            end = int (line.split(\'\\t\')[3])\n-            currentMin = end - start + 1\n-            if min >= currentMin:\n-                min = currentMin\n-        fileHandler.close()\n-        return min\n-\n-    ## Get the max length of the shorter seq in map file\n-    #\n-    # @param mapFileName\n-    # @param mode the open mode of the file \'"w"\' or \'"a"\' \n-    #\n-    def getMaxLengthOfMapFile(self, mapFileName):\n-        fileHandler = open(mapFileName, "r")\n-        line = fileHandler.readline()\n-        start = int (line.split(\'\\t\')[2])\n-        end = int (line.split(\'\\t\')[3])\n-        max = end - start + 1\n-        while True:\n-            line = fileHandler.readline()\n-            if line == "":\n-                break\n-            start = int (line.split(\'\\t\')[2])\n-            end = int (line.split(\'\\t\')[3])\n-            currentMax = end - start + 1\n-            if max <= currentMax:\n-                max = currentMax\n-        fileHandler.close()\n-        return max\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/Match.py
--- a/commons/core/coord/Match.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,206 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import sys\n-from commons.core.coord.Range import Range\n-from commons.core.coord.Path import Path\n-\n-\n-## Handle a chain of match(es) between two sequences, query and subject, with an identifier and the length of the input sequences\n-#\n-class Match( Path ):\n-    \n-    ## Constructor\n-    #\n-    def __init__(self):\n-        Path.__init__(self)\n-        self.query_length = -1\n-        self.query_length_perc = -1    # length of the match on the query / length of the query\n-        self.query_seqlength = -1\n-        self.match_length_perc = -1    # length of the match on the query / total length of the subject\n-        self.subject_length = -1\n-        self.subject_length_perc = -1    # length of the match on the subject / length of the subject\n-        self.subject_seqlength = -1\n-        \n-    ## Equal operator\n-    #\n-    def __eq__(self, o):\n-        if o == None \\\n-        or self.query_length != o.query_length or self.query_length_perc != o.query_length_perc\\\n-        or self.query_seqlength != o.query_seqlength or self.subject_length != o.subject_length\\\n-        or self.subject_length_perc != o.subject_length_perc or self.subject_seqlength != o.subject_seqlength\\\n-        or self.match_length_perc != o.match_length_perc:\n-            return False\n-        return Path.__eq__(self, o)\n-        \n-    ## Return the length of the match on the query divided by the total length of the query\n-    #\n-    def getLengthPercOnQuery(self):\n-        return self.query_length_perc\n-    \n-    ## Return the length of the match on the subject divided by the total length of the subject\n-    #\n-    def getLengthPercOnSubject(self):\n-        return self.subject_length_perc\n-    \n-    ## Return the length of the match on the subject\n-    #\n-    def getLengthMatchOnSubject(self):\n-        return self.subject_length\n-    \n-    ## Set attributes from a tuple\n-    # \n-    # @param tuple: a tuple with (query name,query start,query end,\n-    #  query length, query length perc (between 0-1), match length perc (between 0-1), subject name,\n-    #  subject start,subject end,subject length, subject length percentage (between 0-1), e_value,score,identity,id)\n-    #\n-    def setFromTuple( self, tuple ):\n-        queryStart = int(tuple[1])\n-        queryEnd = int(tuple[2])\n-        subjectStart = int(tuple[7])\n-        subjectEnd = int(tuple[8])\n-        if quer'..b'gth = -1\n-        self.match_length_perc = -1\n-        self.subject_length = -1\n-        self.subject_length_perc = -1\n-        self.subject_seqlength = -1\n-        \n-    ## Return a formated string of the attribute data\n-    # \n-    def toString( self ):\n-        string = "%s" % ( self.range_query.toString() )\n-        string += "\\t%i\\t%f" % ( self.query_length,\n-                                     self.query_length_perc )\n-        string += "\\t%f" % ( self.match_length_perc )\n-        string += "\\t%s" % ( self.range_subject.toString() )\n-        string += "\\t%i\\t%f" % ( self.subject_length,\n-                                 self.subject_length_perc )\n-        string += "\\t%g\\t%i\\t%f" % ( self.e_value,\n-                                     self.score,\n-                                     self.identity )\n-        string += "\\t%i" % ( self.id )\n-        return string\n-    \n-    ## Return a Path instance\n-    #\n-    def getPathInstance( self ):\n-        p = Path()\n-        tuple = ( self.id,\n-                  self.range_query.seqname,\n-                  self.range_query.start,\n-                  self.range_query.end,\n-                  self.range_subject.seqname,\n-                  self.range_subject.start,\n-                  self.range_subject.end,\n-                  self.e_value,\n-                  self.score,\n-                  self.identity )\n-        p.setFromTuple( tuple )\n-        return p\n-    \n-    ## Give information about a match whose query is included in the subject\n-    # \n-    # @return string\n-    #\n-    def getQryIsIncluded( self ):\n-        string = "query %s (%d bp: %d-%d) is contained in subject %s (%d bp: %d-%d): id=%.2f - %.3f - %.3f - %.3f" %\\\n-                 ( self.range_query.seqname, self.query_seqlength, self.range_query.start, self.range_query.end,\n-                   self.range_subject.seqname, self.subject_seqlength, self.range_subject.start, self.range_subject.end,\n-                   self.identity, self.query_length_perc, self.match_length_perc, self.subject_length_perc )\n-        return string\n-    \n-    def increaseLengthPercOnQuery(self, coverage):\n-        self.query_length_perc += coverage\n-    \n-    ## Compare the object with another match and see if they are equal\n-    # (same identity, E-value and score + same subsequences whether in query or subject)\n-    #\n-    # @return True if objects are equals False otherwise\n-    #\n-    def isDoublonWith( self, match, verbose=0 ):\n-\n-        # if both matches have same identity, score and E-value\n-        if self.identity == match.identity and self.score == match.score and self.e_value == match.e_value:\n-\n-            # if query and subject are identical\n-            if ( self.range_query.seqname == match.range_query.seqname \\\n-                 and self.range_subject.seqname == match.range_subject.seqname ):\n-\n-                # if the coordinates are equal\n-                if self.range_query.__eq__( match.range_query ) and self.range_subject.__eq__( match.range_subject ):\n-                    return True\n-\n-                else:\n-                    if verbose > 0: print "different coordinates"; sys.stdout.flush()\n-                    return False\n-\n-            # if query and subject are reversed but identical\n-            elif self.range_query.seqname == match.range_subject.seqname and self.range_subject.seqname == match.range_query.seqname:\n-\n-                # if the coordinates are equal\n-                if self.range_query.__eq__( match.range_subject ) and self.range_subject.__eq__( match.range_query ):\n-                    return True\n-\n-                else:\n-                    if verbose > 0: print "different coordinates"; sys.stdout.flush()\n-                    return False\n-\n-            else:\n-                if verbose > 0: print "different sequence names"; sys.stdout.flush()\n-                return False\n-\n-        else:\n-            if verbose > 0: print "different match numbers"; sys.stdout.flush()\n-            return False\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/MatchUtils.py
--- a/commons/core/coord/MatchUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,288 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-import math\n-import os\n-import sys\n-from commons.core.coord.Match import Match\n-from commons.core.checker.RepetException import RepetException\n-\n-## Static methods for the manipulation of Match instances\n-#\n-class MatchUtils ( object ):\n-    \n-    ## Return a list with Match instances from the given file\n-    #\n-    # @param inFile name of a file in the Match format\n-    # @return a list of Match instances\n-    #\n-    def getMatchListFromFile(inFile ):\n-        lMatchInstances = []\n-        inFileHandler = open( inFile, "r" )\n-        while True:\n-            line = inFileHandler.readline()\n-            if line == "":\n-                break\n-            if line[0:10] == "query.name":\n-                continue\n-            m = Match()\n-            m.setFromString( line )\n-            lMatchInstances.append( m )\n-        inFileHandler.close()\n-        return lMatchInstances\n-    \n-    getMatchListFromFile = staticmethod( getMatchListFromFile )\n-    \n-    ##  Split a Match list in several Match lists according to the subject\n-    #\n-    #  @param lMatches a list of Match instances\n-    #  @return a dictionary which keys are subject names and values Match lists\n-    #\n-    def getDictOfListsWithSubjectAsKey( lMatches ):\n-        dSubject2MatchList = {}\n-        for iMatch in lMatches:\n-            if not dSubject2MatchList.has_key( iMatch.range_subject.seqname ):\n-                dSubject2MatchList[ iMatch.range_subject.seqname ] = []\n-            dSubject2MatchList[ iMatch.range_subject.seqname ].append( iMatch )\n-        return dSubject2MatchList\n-    \n-    getDictOfListsWithSubjectAsKey = staticmethod( getDictOfListsWithSubjectAsKey )\n-    \n-    ##  Split a Match list in several Match lists according to the query\n-    #\n-    #  @param lMatches a list of Match instances\n-    #  @return a dictionary which keys are query names and values Match lists\n-    #\n-    def getDictOfListsWithQueryAsKey ( lMatches ):\n-        dQuery2MatchList = {}\n-        for iMatch in lMatches:\n-            if not dQuery2MatchList.has_key( iMatch.range_query.seqname ):\n-                dQuery2MatchList[ iMatch.range_query.seqname ] = []\n-            dQuery2MatchList[ iMatch.range_query.seqname ].append( iMatch )\n-        return dQuery2MatchList\n-    \n-    getDictOfListsWithQueryAsKey = staticmethod( getDictOfListsWithQueryAsKey )   \n-         \n-    ## Write M'..b'    else:\n-            dMatches = MatchUtils.getDictOfListsWithSubjectAsKey(lMatches)\n-            \n-        for qry in dMatches.keys():\n-            countMatch = 0\n-            for match in dMatches[ qry ]:\n-                \n-                if match.identity >= thresIdentityPerc and getattr(match,whatToCount.lower() +"_length_perc") >= thresLength:\n-                    countMatch += 1\n-            if countMatch > 0:\n-                countSbj += 1\n-        return countSbj\n-    \n-    getNbDistinctSequencesInsideMatchesWithThresh = staticmethod(getNbDistinctSequencesInsideMatchesWithThresh)\n-    \n-    ## Convert a \'match\' file (output from Matcher) into an \'align\' file\n-    ## replace old parser.tab2align\n-    #\n-    # @param inFileName  a string input file name\n-    #\n-    def convertMatchFileToAlignFile(inFileName):\n-        basename = os.path.splitext(inFileName)[0]\n-        outFileName = "%s.align" % basename\n-        outFile = open(outFileName, "w")\n-        \n-        lMatches = MatchUtils.getMatchListFromFile(inFileName) \n-        \n-        for match in lMatches:\n-            string = "%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\n" % ( match.getQueryName(), match.getQueryStart(), match.getQueryEnd(), match.getSubjectName(), match.getSubjectStart(), match.getSubjectEnd(), match.getEvalue(), match.getScore(), match.getIdentity() )\n-            outFile.write( string )\n-            \n-        outFile.close()\n-        \n-    convertMatchFileToAlignFile = staticmethod(convertMatchFileToAlignFile)\n-    \n-    ## Convert a \'match\' file (output from Matcher) into an \'abc\' file (MCL input file)\n-    # Use coverage on query for arc value\n-    #\n-    # @param matchFileName string input match file name\n-    # @param outFileName string output abc file name\n-    # @param coverage float query coverage filter threshold\n-    #\n-    @staticmethod\n-    def convertMatchFileIntoABCFileOnQueryCoverage(matchFileName, outFileName, coverage = 0):\n-        with open(matchFileName) as inF:\n-            with open(outFileName, "w") as outF:\n-                inF.readline()\n-                inLine = inF.readline()\n-                while inLine:\n-                    splittedLine = inLine.split("\\t")\n-                    if float(splittedLine[4]) >= coverage:\n-                        outLine = "\\t".join([splittedLine[0], splittedLine[6], splittedLine[4]])\n-                        outLine += "\\n"\n-                        outF.write(outLine)\n-                    inLine = inF.readline()\n-\n-    ## Adapt the path IDs as the input file is the concatenation of several \'Match\' files, and remove the extra header lines. \n-    ## replace old parser.tabnum2id\n-    #\n-    # @param fileName  a string input file name\n-    # @param  outputFileName  a string output file name (optional)\n-    #\n-    def generateMatchFileWithNewPathId(fileName, outputFileName=None):\n-        if outputFileName is None:   \n-            outFile = open(fileName, "w")\n-        else:\n-            outFile = open(outputFileName, "w")      \n-        outFile.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n-      \n-        lMatches = MatchUtils.getMatchListFromFile(fileName) \n-        count = 1\n-        dMatchKeyIdcount = {}\n-        \n-        for match in lMatches:\n-            key_id = str(match.getIdentifier()) + "-" + match.getQueryName() + "-" + match.getSubjectName()\n-            if not key_id in dMatchKeyIdcount.keys():\n-                newPath = count\n-                count += 1\n-                dMatchKeyIdcount[ key_id ] = newPath\n-            else:\n-                newPath = dMatchKeyIdcount[ key_id ]\n-                \n-            match.id = newPath\n-            outFile.write( match.toString()+"\\n" )  \n-        outFile.close()\n-        \n-    generateMatchFileWithNewPathId = staticmethod(generateMatchFileWithNewPathId)\n-     \n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/MergedRange.py
--- a/commons/core/coord/MergedRange.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,98 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-## Record a region on multiple sequence using Path ID information
-#
-class MergedRange(object):
-    
-    ## Constructor
-    #
-    # @param lId list of Path ID
-    # @param start the start coordinate
-    # @param end the end coordinate
-    #
-    def __init__(self, lId = None, start = -1, end = -1):
-        self._lId = lId or []
-        self._start = start
-        self._end = end 
-        
-    ## Equal operator
-    #
-    # @param o a MergedRange instance
-    #        
-    def __eq__(self, o):
-        return o._lId == self._lId and o._start == self._start and o._end == self._end
-    
-    
-    ## Return True if the MergedRange instance overlaps with another MergedRange instance, False otherwise 
-    #
-    # @param o a MergedRange instance
-    # @return boolean False or True
-    #
-    def isOverlapping(self, o):
-        if o._start <= self._start and o._end >= self._end:
-            return True
-        if o._start >= self._start and o._start <= self._end or o._end >= self._start and o._end <= self._end:
-            return True
-        return False
-    
-    ## Merge coordinates and ID of two Merged Range     
-    #
-    # @param o a MergedRange instance
-    #
-    def merge(self, o):
-        self._start = min(self._start, o._start)
-        self._end = max(self._end, o._end)   
-        self._lId.extend(o._lId)
-        self._lId.sort()
-        
-    ## Set a Merged Range instance using a Match instance
-    #
-    # @param iMatch instance Match instance 
-    # 
-    def setFromMatch(self, iMatch):
-        self._lId= [iMatch.id]
-        self._start = iMatch.range_query.start
-        self._end = iMatch.range_query.end
-     
-    ## Get a Merged Range instance list using a Match instance list
-    #
-    # @param lIMatch list Match instance list
-    # @return lMergedRange list MergedRange instance list
-    #     
-    def getMergedRangeListFromMatchList(lIMatch):
-        lMergedRange = []
-        for iMatch in lIMatch:
-            mr = MergedRange()
-            mr.setFromMatch(iMatch)
-            lMergedRange.append(mr)
-        return lMergedRange
-    
-    getMergedRangeListFromMatchList = staticmethod(getMergedRangeListFromMatchList)
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/Path.py
--- a/commons/core/coord/Path.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,149 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from commons.core.coord.Align import Align
-from commons.core.coord.Set import Set
-from commons.core.coord.Range import Range
-
-
-## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity) with an identifier
-#
-class Path( Align ):
-    
-    ## Constructor
-    #
-    # @param id identifier
-    # @param range_q: a Range instance for the query
-    # @param range_s: a Range instance for the subject
-    # @param e_value: E-value of the match 
-    # @param score: score of the match
-    # @param identity: identity percentage of the match
-    #
-    def __init__( self, id=-1, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0 ):
-        self.id = int( id )
-        Align.__init__( self, range_q, range_s, e_value, score, identity )
-        
-    ## Equal operator
-    #
-    def __eq__(self, o):
-        if o == None or self.id != o.id:
-            return False
-        return Align.__eq__(self, o)
-        
-    ## Set attributes from tuple
-    #
-    # @param tuple a tuple with (id,queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)
-    # @note data are loaded such that the query is always on the direct strand
-    #
-    def setFromTuple(self, tuple):
-        self.id = int(tuple[0])
-        Align.setFromTuple(self, tuple[1:])
-        
-    ## Reset
-    #
-    def reset(self):
-        self.id = -1
-        Align.reset(self)
-        
-    ## Return the attributes as a formatted string
-    #
-    def toString(self):
-        string = "%i" % ( self.id )
-        string += "\t%s" % (Align.toString(self))
-        return string
-    
-    
-    ## Return the identifier of the Path instance
-    #
-    def getIdentifier( self ):
-        return self.id
-    
-    ## Return a Set instance with the subject mapped on the query
-    #
-    def getSubjectAsSetOfQuery(self):
-        iSet = Set()
-        iSet.id = self.id
-        iSet.name = self.range_subject.seqname
-        iSet.seqname = self.range_query.seqname
-        if self.range_subject.isOnDirectStrand():
-            iSet.start = self.range_query.start
-            iSet.end = self.range_query.end
-        else:
-            iSet.start = self.range_query.end
-            iSet.end = self.range_query.start
-        return iSet
-    
-    #TODO: add tests !!!!
-    #WARNING: subject always in direct strand !!!
-    ## Return a Set instance with the subject mapped on the query
-    #
-    def getQuerySetOfSubject(self):
-        iSet = Set()
-        iSet.id = self.id
-        iSet.name = self.range_query.seqname
-        iSet.seqname = self.range_subject.seqname
-        if self.range_subject.isOnDirectStrand():
-            iSet.start = self.range_subject.start
-            iSet.end = self.range_subject.end
-        else:
-            iSet.start = self.range_subject.end
-            iSet.end = self.range_subject.start
-        return iSet
-    
-    ## Return True if the instance can be merged with another Path instance, False otherwise
-    #
-    # @param o a Path instance
-    #
-    def canMerge(self, o):
-        return o.id != self.id \
-            and o.range_query.seqname == self.range_query.seqname \
-            and o.range_subject.seqname == self.range_subject.seqname \
-            and o.range_query.isOnDirectStrand() == self.range_query.isOnDirectStrand() \
-            and o.range_subject.isOnDirectStrand() == self.range_subject.isOnDirectStrand() \
-            and o.range_query.isOverlapping(self.range_query) \
-            and o.range_subject.isOverlapping(self.range_subject)
-            
-    ## Return an Align instance with the same attributes, except the identifier
-    #
-    def getAlignInstance(self):
-        iAlign = Align()
-        lAttributes = []
-        lAttributes.append( self.range_query.seqname )
-        lAttributes.append( self.range_query.start )
-        lAttributes.append( self.range_query.end )
-        lAttributes.append( self.range_subject.seqname )
-        lAttributes.append( self.range_subject.start )
-        lAttributes.append( self.range_subject.end )
-        lAttributes.append( self.e_value )
-        lAttributes.append( self.score )
-        lAttributes.append( self.identity )
-        iAlign.setFromTuple( lAttributes )
-        return iAlign
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/PathUtils.py
--- a/commons/core/coord/PathUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,858 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import os\n-import sys\n-import copy\n-from commons.core.coord.Path import Path\n-from commons.core.coord.SetUtils import SetUtils\n-from commons.core.coord.Map import Map\n-from commons.core.coord.AlignUtils import AlignUtils\n-from commons.core.checker.RepetException import RepetDataException\n-\n-## Static methods for the manipulation of Path instances\n-#\n-class PathUtils ( object ):\n-    \n-    ## Change the identifier of each Set instance in the given list\n-    #\n-    # @param lPaths list of Path instances\n-    # @param newId new identifier\n-    #\n-    def changeIdInList(lPaths, newId):\n-        for iPath in lPaths:\n-            iPath.id = newId\n-            \n-    changeIdInList = staticmethod( changeIdInList )\n-    \n-    \n-    ## Return a list of Set instances containing the query range from a list of Path instances\n-    # \n-    # @param lPaths a list of Path instances\n-    #  \n-    def getSetListFromQueries(lPaths):\n-        lSets = []\n-        for iPath in lPaths:\n-            lSets.append( iPath.getSubjectAsSetOfQuery() )\n-        return lSets\n-    \n-    getSetListFromQueries = staticmethod( getSetListFromQueries )\n-    \n-    #TODO: add tests !!!!\n-    ## Return a list of Set instances containing the query range from a list of Path instances\n-    # \n-    # @param lPaths a list of Path instances\n-    #\n-    @staticmethod\n-    def getSetListFromSubjects(lPaths):\n-        lSets = []\n-        for iPath in lPaths:\n-            lSets.append( iPath.getQuerySetOfSubject() )\n-        return lSets\n-    \n-    \n-    ## Return a sorted list of Range instances containing the subjects from a list of Path instances\n-    # \n-    # @param lPaths a list of Path instances\n-    # @note meaningful only if all Path instances have same identifier\n-    #\n-    def getRangeListFromSubjects( lPaths ):\n-        lRanges = []\n-        for iPath in lPaths:\n-            lRanges.append( iPath.range_subject )\n-        if lRanges[0].isOnDirectStrand():\n-            return sorted( lRanges, key=lambda iRange: ( iRange.getMin(), iRange.getMax() ) )\n-        else:\n-            return sorted( lRanges, key=lambda iRange: ( iRange.getMax(), iRange.getMin() ) )\n-        \n-    getRangeListFromSubjects = staticmethod( getRangeListFromSubjects )\n-    \n-    \n-    ## Return a tuple with min and max of query coordinates from Path instances in the given list\n-    #\n-    # @param '..b'te the \'path\' query is supposed to correspond to the \'gff\' first column\n-    #\n-    def convertPathFileIntoGffFile( pathFile, gffFile, source="REPET", verbose=0 ):\n-        dId2PathList = PathUtils.getDictOfListsWithIdAsKeyFromFile( pathFile )\n-        if verbose > 0:\n-            msg = "number of chains: %i" % ( len(dId2PathList.keys()) )\n-            sys.stdout.write( "%s\\n" % msg )\n-            sys.stdout.flush()\n-        gffFileHandler = open( gffFile, "w" )\n-        for id in dId2PathList.keys():\n-            if len( dId2PathList[ id ] ) == 1:\n-                iPath = dId2PathList[ id ][0]\n-                string = iPath.toStringAsGff( ID="%i" % iPath.getIdentifier(),\n-                                              source=source )\n-                gffFileHandler.write( "%s\\n" % string )\n-            else:\n-                iPathrange = PathUtils.convertPathListToPathrange( dId2PathList[ id ] )\n-                string = iPathrange.toStringAsGff( ID="ms%i" % iPathrange.getIdentifier(),\n-                                                   source=source )\n-                gffFileHandler.write( "%s\\n" % string )\n-                count = 0\n-                for iPath in dId2PathList[ id ]:\n-                    count += 1\n-                    string = iPath.toStringAsGff( type="match_part",\n-                                                  ID="mp%i-%i" % ( iPath.getIdentifier(), count ),\n-                                                  Parent="ms%i" % iPathrange.getIdentifier(),\n-                                                  source=source )\n-                    gffFileHandler.write( "%s\\n" % string )\n-        gffFileHandler.close()\n-        \n-    convertPathFileIntoGffFile = staticmethod( convertPathFileIntoGffFile )\n-    \n-    \n-    ## Convert a Path file into a Set file\n-    # replace old parser.pathrange2set\n-    # @param pathFile: name of the input Path file\n-    # @param setFile: name of the output Set file\n-    #\n-    def convertPathFileIntoSetFile( pathFile, setFile ):\n-        pathFileHandler = open( pathFile, "r" )\n-        setFileHandler = open( setFile, "w" )\n-        iPath = Path()\n-        while True:\n-            line = pathFileHandler.readline()\n-            if line == "":\n-                break\n-            iPath.setFromString( line )\n-            iSet = iPath.getSubjectAsSetOfQuery()\n-            iSet.write( setFileHandler )\n-        pathFileHandler.close()\n-        setFileHandler.close()\n-        \n-    convertPathFileIntoSetFile = staticmethod( convertPathFileIntoSetFile )\n-    \n-    ## Write Path File without duplicated Path (same query, same subject and same coordinate)\n-    #\n-    # @param inputFile: name of the input Path file\n-    # @param outputFile: name of the output Path file\n-    #\n-    def removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(inputFile, outputFile):\n-        f = open(inputFile, "r")\n-        line = f.readline()\n-        previousQuery = ""\n-        previousSubject = ""\n-        lPaths = []\n-        while line:\n-            iPath = Path()\n-            iPath.setFromString(line)\n-            query = iPath.getQueryName()\n-            subject = iPath.getSubjectName()\n-            if (query != previousQuery or subject != previousSubject) and lPaths != []: \n-                lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)\n-                PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a")\n-                lPaths = []\n-            lPaths.append(iPath)\n-            previousQuery = query\n-            previousSubject = subject\n-            line = f.readline()\n-        lPathsWithoutDuplicate = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)\n-        PathUtils.writeListInFile(lPathsWithoutDuplicate, outputFile, "a")\n-        f.close()\n-    removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName = staticmethod(removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName)\n- \n-   \n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/Range.py
--- a/commons/core/coord/Range.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,361 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-## Record a region on a given sequence\n-#\n-class Range( object ):\n-\n-    ## Constructor\n-    #\n-    # @param seqname the name of the sequence\n-    # @param start the start coordinate\n-    # @param end the end coordinate\n-    #\n-    def __init__(self, seqname="", start=-1, end=-1):\n-        self.seqname = seqname\n-        self.start = int(start)\n-        self.end = int(end)\n-        \n-    ## Equal operator\n-    #\n-    # @param o a Range instance\n-    #\n-    def __eq__(self, o):\n-        if self.seqname == o.seqname and self.start == o.start and self.end == o.end:\n-            return True\n-        return False\n-        \n-    ## Unequal operator\n-    #\n-    # @param o a Range instance\n-    #\n-    def __ne__(self, o):\n-        return not self.__eq__(o)\n-    \n-    ## Convert the object into a string\n-    #\n-    # @note used in \'print myObject\'\n-    #\n-    def __str__( self ):\n-        return self.toString()\n-    \n-    ## Convert the object into a string\n-    #\n-    # @note used in \'repr(myObject)\' for debugging\n-    #\n-    def __repr__( self ):\n-        return self.toString().replace("\\t",";")\n-    \n-    def setStart(self, start):\n-        self.start = start\n-        \n-    def setEnd(self, end):\n-        self.end = end\n-        \n-    def setSeqName(self, seqName):\n-        self.seqname = seqName\n-    \n-    ## Reset\n-    #\n-    def reset(self):\n-        self.seqname = ""\n-        self.start = -1\n-        self.end = -1\n-        \n-    ## Return the attributes as a formatted string\n-    #   \n-    def toString(self):\n-        string = "%s" % (self.seqname)\n-        string += "\\t%d" % (self.start)\n-        string += "\\t%d" % (self.end)\n-        return string\n-    \n-    ## Show the attributes\n-    #\n-    def show(self):\n-        print self.toString()\n-    \n-    ## Return seqname\n-    #\n-    def getSeqname(self):\n-        return self.seqname\n-    \n-    ## Return the start coordinate\n-    #\n-    def getStart(self):\n-        return self.start\n-    \n-    ## Return the end coordinate\n-    #\n-    def getEnd(self):\n-        return self.end\n-    \n-    ## Return the lowest value between start and end coordinates\n-    #\n-    def getMin(self):\n-        return min(self.start, self.end)\n-    \n-    ## Return the greatest value between start and end attributes\n-    # \n-    def getMax(self):\n-        return max(self.start, self.end)\n-    \n-    ## Return Tr'..b', o ):\n-        if o.seqname != self.seqname:\n-            return False\n-        if self.getMin() >= o.getMin() and self.getMax() <= o.getMax():\n-            return True\n-        else:\n-            return False\n-\n-        \n-    ## Return the distance between the start of the instance and the start of another Range instance\n-    #\n-    # @param o a Range instance\n-    #\n-    def getDistance(self, o):\n-        if self.isOnDirectStrand() == o.isOnDirectStrand():\n-            if self.isOverlapping(o):\n-                return 0\n-            elif self.isOnDirectStrand():\n-                if self.start > o.start:\n-                    return self.start - o.end\n-                else:\n-                    return o.start - self.end\n-            else:\n-                if self.start > o.start:\n-                    return self.end - o.start\n-                else:\n-                    return o.end - self.start\n-        return -1\n-    \n-    ## Remove in the instance the region overlapping with another Range instance\n-    #\n-    # @param o a Range instance\n-    # \n-    def diff(self, o):\n-        new_range = Range(self.seqname)\n-        if not self.isOverlapping(o) or self.seqname != o.seqname:\n-            return new_range\n-\n-        istart = min(self.start, self.end)\n-        iend = max(self.start, self.end)\n-        jstart = min(o.start, o.end)\n-        jend = max(o.start, o.end)\n-        if istart < jstart:\n-            if iend <= jend:\n-                if self.isOnDirectStrand():\n-                    self.start = istart\n-                    self.end = jstart - 1\n-                else:\n-                    self.start = jstart - 1\n-                    self.end = istart\n-            else:\n-                if self.isOnDirectStrand():\n-                    self.start = istart\n-                    self.end = jstart - 1\n-                    new_range.start = jend + 1\n-                    new_range.end = iend\n-                else:\n-                    self.start = jstart - 1;\n-                    self.end = istart;\n-                    new_range.start = iend\n-                    new_range.end = jend + 1\n-        else: #istart>=jstart\n-            if iend <= jend:\n-                self.start = 0\n-                self.end = 0\n-            else:\n-                if self.isOnDirectStrand():\n-                    self.start = jend + 1\n-                    self.end = iend\n-                else:\n-                    self.start = iend\n-                    self.end = jend + 1\n-        return new_range\n-        \n-    ## Find the bin that contains the instance and compute its index\n-    #\n-    # @note Required for coordinate indexing via a hierarchical bin system\n-    #\n-    def findIdx(self):\n-        min_lvl = 3\n-        max_lvl = 6\n-        for bin_lvl in xrange(min_lvl, max_lvl):\n-            if getBin(self.start, bin_lvl) == getBin(self.end, bin_lvl):\n-                return getIdx(self.start, bin_lvl)\n-        return getIdx(self.start, max_lvl) \n-    \n-    ## Get a bin for fast database access\n-    #\n-    # @return bin number (float)\n-    #\n-    def getBin(self):\n-        for i in xrange(3, 8):\n-            bin_lvl = pow(10, i)\n-            if int(self.start/bin_lvl) == int(self.end/bin_lvl):\n-                return float(bin_lvl+(int(self.start/bin_lvl)/1e10))\n-        bin_lvl = pow(10, 8)\n-        return float(bin_lvl+(int(self.start/bin_lvl)/1e10))\n-    \n-    \n-# Functions\n-\n-# Get the bin number of a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system\n-#    \n-def getBin(val, bin_lvl):\n-    bin_size = pow(10, bin_lvl)\n-    return long(val / bin_size)\n-    \n-# Get an index from a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system\n-#\n-def getIdx(val, bin_lvl):\n-    min_lvl = 3\n-    max_lvl = 6\n-    if bin_lvl >= max_lvl:\n-        return long((bin_lvl-min_lvl+1)*pow(10,max_lvl))\n-    return long(((bin_lvl-min_lvl+1)*pow(10,max_lvl))+getBin(val,bin_lvl))\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/Range.pyc
b
Binary file commons/core/coord/Range.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/Set.py
--- a/commons/core/coord/Set.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,125 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from commons.core.coord.Map import Map
-
-
-## Record a named region on a given sequence with an identifier
-#  
-class Set( Map ):
-    
-    ## Constructor
-    #
-    # @param id identifier
-    # @param name the name of the region
-    # @param seqname the name of the sequence
-    # @param start the start coordinate
-    # @param end the end coordinate
-    #
-    def __init__(self, id=-1, name="", seqname="", start=-1, end=-1):
-        Map.__init__( self, name, seqname, start, end )
-        self.id = id
-        
-    ## Equal operator
-    #    
-    def __eq__(self, o):
-        if self.id != o.id:
-            return False
-        else:
-            return Map.__eq__(self, o)
-    
-    def getId(self):
-        return self.id
-        
-    ## Reset
-    #
-    def reset(self):
-        self.setFromTuple([-1, "", "", -1, -1 ])
-            
-    ## Set attributes from tuple
-    #
-    # @param tuple: a tuple with (id, name, seqname, start, end)
-    # 
-    def setFromTuple(self, tuple):
-        self.id = int(tuple[0])
-        Map.setFromTuple(self, tuple[1:])
-        
-    ## Return the attributes as a formatted string
-    #
-    def toString(self):
-        string = "%i" % (self.id)
-        string += "\t%s" % (Map.toString(self))
-        return string
-    
-    ## Merge the instance with another Set instance
-    #
-    # @param o a Set instance
-    #
-    def merge(self, o):
-        if self.seqname == o.seqname:
-            Map.merge(self, o)
-            self.id = min(self.id, o.id)
-    
-    ## Return a Map instance with the attributes
-    #
-    def getMap(self):
-        return Map(self.name, self.seqname, self.start, self.end)
-    
-    ## Remove in the instance the region overlapping with another Set instance
-    #
-    # @param o a Set instance
-    #  
-    def diff(self, o):
-        iMap = Map.diff(self, o.getMap())
-        new = Set()
-        if not iMap.isEmpty():
-            new.id = self.id
-            new.name = self.name
-            new.seqname = self.seqname
-            new.start = iMap.start
-            new.end = iMap.end
-        return new
-    
-    ## Return a Map instance with the identifier in the name
-    #
-    def set2map(self):
-        return Map(self.name+"::"+str(self.id),self.seqname,self.start,self.end)
-    
-    
-    def getMapInstance( self ):
-        iMap = Map()
-        lAttributes = []
-        lAttributes.append( self.name )
-        lAttributes.append( self.seqname )
-        lAttributes.append( self.start )
-        lAttributes.append( self.end )
-        iMap.setFromTuple( lAttributes )
-        return iMap
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/SetUtils.py
--- a/commons/core/coord/SetUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,553 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-from commons.core.coord.Set import Set\n-\n-## Static methods for the manipulation of Path instances\n-#\n-class SetUtils( object ):\n-    \n-    ## Change the identifier of each Set instance in the given list\n-    #\n-    # @param lSets list of Set instances\n-    # @param newId new identifier\n-    #\n-    def changeIdInList(lSets, newId):\n-        for iSet in lSets:\n-            iSet.id = newId\n-            \n-    changeIdInList = staticmethod( changeIdInList )\n-    \n-    ## Return the length of the overlap between two lists of Set instances\n-    #\n-    # @param lSets1 list of Set instances\n-    # @param lSets2 list of Set instances\n-    # @return length of overlap\n-    # @warning sequence names are supposed to be identical\n-    #\n-    def getOverlapLengthBetweenLists(lSets1, lSets2):\n-        lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)\n-        lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)\n-        osize = 0\n-        i = 0\n-        j = 0\n-        while i!= len(lSet1Sorted):\n-            while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\\\n-                and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):\n-                j+=1\n-            jj=j\n-            while jj!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[jj]):\n-                osize+=lSet1Sorted[i].getOverlapLength(lSet2Sorted[jj])\n-                jj+=1\n-            i+=1\n-        return osize\n-    \n-    getOverlapLengthBetweenLists = staticmethod( getOverlapLengthBetweenLists )\n-    \n-    ## Return True if the two lists of Set instances overlap, False otherwise    \n-    #\n-    # @param lSets1 list of Set instances\n-    # @param lSets2 list of Set instances\n-    #    \n-    def areSetsOverlappingBetweenLists( lSets1, lSets2 ):\n-        lSet1Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets1)\n-        lSet2Sorted = SetUtils.getSetListSortedByIncreasingMinThenMax(lSets2)\n-        i=0\n-        j=0\n-        while i!= len(lSet1Sorted):\n-            while j!= len(lSet2Sorted) and lSet1Sorted[i].getMin()>lSet2Sorted[j].getMax()\\\n-                and not(lSet1Sorted[i].isOverlapping(lSet2Sorted[j])):\n-                j+=1\n-            if j!= len(lSet2Sorted) and lSet1Sorted[i].isOverlapping(lSet2Sorted[j]):\n-                return True\n-            i+=1\n-        return False\n- '..b'  def getListOfSetWithoutOverlappingBetweenTwoListOfSet(lSet1, lSet2):\n-        for i in lSet1:\n-            for idx,j in enumerate(lSet2):\n-                n=j.diff(i)\n-                if not n.isEmpty() and n.getLength()>=20:\n-                    lSet2.append(n)\n-        lSet2WithoutOverlaps=[]\n-        for i in lSet2:\n-            if not i.isEmpty() and i.getLength()>=20:\n-                lSet2WithoutOverlaps.append(i)\n-        return lSet2WithoutOverlaps\n-        \n-    getListOfSetWithoutOverlappingBetweenTwoListOfSet = staticmethod (getListOfSetWithoutOverlappingBetweenTwoListOfSet)\n-\n-    ## Return a Set list from a Set file\n-    #\n-    # @param setFile string name of a Set file\n-    # @return a list of Set instances\n-    #\n-    def getSetListFromFile( setFile ):\n-        lSets = []\n-        setFileHandler = open( setFile, "r" )\n-        while True:\n-            line = setFileHandler.readline()\n-            if line == "":\n-                break\n-            iSet = Set()\n-            iSet.setFromString( line )\n-            lSets.append( iSet )\n-        setFileHandler.close()\n-        return lSets\n-    \n-    getSetListFromFile = staticmethod( getSetListFromFile )\n-    \n-    \n-    def convertSetFileIntoMapFile( setFile, mapFile ):\n-        setFileHandler = open( setFile, "r" )\n-        mapFileHandler = open( mapFile, "w" )\n-        iSet = Set()\n-        while True:\n-            line = setFileHandler.readline()\n-            if line == "":\n-                break\n-            iSet.setFromString( line )\n-            iMap = iSet.getMapInstance()\n-            iMap.write( mapFileHandler )\n-        setFileHandler.close()\n-        mapFileHandler.close()\n-        \n-    convertSetFileIntoMapFile = staticmethod( convertSetFileIntoMapFile )\n-\n-\n-    def getDictOfListsWithSeqnameAsKey( lSets ):\n-        dSeqnamesToSetList = {}\n-        for iSet in lSets:\n-            if not dSeqnamesToSetList.has_key( iSet.seqname ):\n-                dSeqnamesToSetList[ iSet.seqname ] = []\n-            dSeqnamesToSetList[ iSet.seqname ].append( iSet )\n-        return dSeqnamesToSetList\n-    \n-    getDictOfListsWithSeqnameAsKey = staticmethod( getDictOfListsWithSeqnameAsKey )\n-    \n-    \n-    def filterOnLength( lSets, minLength=0, maxLength=10000000000 ):\n-        if minLength == 0 and maxLength == 0:\n-            return lSets\n-        lFiltered = []\n-        for iSet in lSets:\n-            if minLength <= iSet.getLength() <= maxLength:\n-                lFiltered.append( iSet )\n-        return lFiltered\n-    \n-    filterOnLength = staticmethod( filterOnLength )\n-    \n-    \n-    def getListOfNames( setFile ):\n-        lNames = []\n-        setFileHandler = open( setFile, "r" )\n-        iSet = Set()\n-        while True:\n-            line = setFileHandler.readline()\n-            if line == "":\n-                break\n-            iSet.setFromTuple( line[:-1].split("\\t") )\n-            if iSet.name not in lNames:\n-                lNames.append( iSet.name )\n-        setFileHandler.close()\n-        return lNames\n-    \n-    getListOfNames = staticmethod( getListOfNames )\n-\n-\n-    def getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile ):\n-        dNames2DictsId = {}\n-        setFileHandler = open( setFile, "r" )\n-        while True:\n-            line = setFileHandler.readline()\n-            if line == "":\n-                break\n-            iSet = Set()\n-            iSet.setFromTuple( line[:-1].split("\\t") )\n-            if not dNames2DictsId.has_key( iSet.name ):\n-                dNames2DictsId[ iSet.name ] = { iSet.id: [ iSet ] }\n-            else:\n-                if not dNames2DictsId[ iSet.name ].has_key( iSet.id ):\n-                    dNames2DictsId[ iSet.name ][ iSet.id ] = [ iSet ]\n-                else:\n-                    dNames2DictsId[ iSet.name ][ iSet.id ].append( iSet )\n-        setFileHandler.close()\n-        return dNames2DictsId\n-    \n-    getDictOfDictsWithNamesThenIdAsKeyFromFile = staticmethod( getDictOfDictsWithNamesThenIdAsKeyFromFile )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/SlidingWindow.py
--- a/commons/core/coord/SlidingWindow.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,73 +0,0 @@
-class SlidingWindow(object):
-        
-    def __init__( self, length = 1, overlap = 1 ):        
-        self._length = length
-        self._overlap = overlap
-        self._start = 1
-        self._end = length
-        self._step = length - overlap
-
-    def slideWindowOnce(self):
-        self._start = self._start + self._step
-        self._end = self._end + self._step
-    
-    def getStart(self):
-        return self._start
-    
-    def getEnd(self):
-        return self._end
-    
-    def setStart(self, start):
-        self._start = start
-    
-    def setEnd(self, end):
-        self._end = end
-    
-    def getLength(self):
-        return self._length
-    
-    def getOverlap(self):
-        return self._overlap
-    
-    def setLength(self, length):
-        self._length = length
-        
-    def setOverlap(self, overlap):
-        self._overlap = overlap
-    
-    def getSlidingMsg(self):
-        return "Window is sliding : %s %s" %(self._start, self._end)
-
-class SlidingWindowToCountMatchingBases(SlidingWindow):
-    
-    def getSetLengthOnWindow( self, iSet ):
-        if self._isSetIncludedInTheWindow(iSet):
-            return iSet.getLength()
-        if self._isWindowIncludedInTheSet(iSet):
-            return self._length
-        elif self._isSetOverlapTheRightSideOfTheWindow(iSet):
-            return self._end - iSet.getMin()+1
-        elif self._isSetOverlapTheLeftSideOfTheWindow(iSet):
-            return iSet.getMax() - self._start+1
-        
-    def getCoordSetOnWindow( self, iSet ):
-        if self._isSetIncludedInTheWindow(iSet):
-            return iSet.getStart(), iSet.getEnd()
-        if self._isWindowIncludedInTheSet(iSet):
-            return self.getStart(), self.getEnd()
-        elif self._isSetOverlapTheRightSideOfTheWindow(iSet):
-            return iSet.getStart(), self.getEnd()
-        elif self._isSetOverlapTheLeftSideOfTheWindow(iSet):
-            return self.getStart(), iSet.getEnd()
-        
-    def _isSetIncludedInTheWindow(self, feature):
-        return feature.getMin() >= self._start and feature.getMax() <= self._end
-    
-    def _isWindowIncludedInTheSet(self, feature):
-        return self._start >= feature.getMin() and self._end <= feature.getMax()
-
-    def _isSetOverlapTheRightSideOfTheWindow(self, feature):
-        return feature.getMin() <= self._end and feature.getMin() >= self._start
-
-    def _isSetOverlapTheLeftSideOfTheWindow(self, feature):
-        return feature.getMax() <= self._end and feature.getMax() >= self._start    
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/__init__.pyc
b
Binary file commons/core/coord/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/align2set.py
--- a/commons/core/coord/align2set.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,86 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import getopt
-from commons.core.coord.Align import Align
-
-def help():
-    print
-    print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
-    print "options:"
-    print "     -h: this help"
-    print "     -i: input file name (format='align')"
-    print "     -o: output file name (format='set', default=inFileName+'.set')"
-    print "     -v: verbosity level (default=0/1)"
-    print
-    
-
-def align2set( inFileName, outFileName ):
-    alignFileHandler = open( inFileName, "r" )
-    setFileHandler = open( outFileName, "w" )
-    iAlign = Align()
-    countAlign = 0
-    while True:
-        line = alignFileHandler.readline()
-        if line == "":
-            break
-        countAlign += 1
-        iAlign.setFromString( line, "\t" )
-        setFileHandler.write( "%i\t%s\t%s\t%i\t%i\n" % ( countAlign,
-                                                         iAlign.getSubjectName(),
-                                                         iAlign.getQueryName(),
-                                                         iAlign.getQueryStart(),
-                                                         iAlign.getQueryEnd() ) )
-    alignFileHandler.close()
-    setFileHandler.close()
-
-
-def main():
-
-    inFileName = ""
-    outFileName = ""
-    verbose = 0
-
-    try:
-        opts, args = getopt.getopt( sys.argv[1:], "hi:o:v:" )
-    except getopt.GetoptError, err:
-        print str(err)
-        help()
-        sys.exit(1)
-    for o,a in opts:
-        if o == "-h":
-            help()
-            sys.exit(0)
-        elif o == "-i":
-            inFileName = a
-        elif o == "-o":
-            outFileName = a
-        elif o == "-v":
-            verbose = int(a)
-
-    if  inFileName == "":
-        print "ERROR: missing input file name"
-        help()
-        sys.exit(1)
-
-    if verbose > 0:
-        print "START %s" % ( sys.argv[0].split("/")[-1] )
-        sys.stdout.flush()
-
-    if outFileName == "":
-        outFileName = "%s.set" % ( inFileName )
-
-#TODO: move 'align2set' into 'AlignUtils.convertAlignFileIntoPSetFile' with a test
-#    AlignUtils.convertAlignFileIntoPSetFile( inFileName, outFileName )
-
-    align2set( inFileName, outFileName )
-
-    if verbose > 0:
-        print "END %s" % ( sys.argv[0].split("/")[-1] )
-        sys.stdout.flush()
-
-    return 0
-
-
-if __name__ == "__main__":
-    main()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/TestSuite_coord.py
--- a/commons/core/coord/test/TestSuite_coord.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,70 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-import unittest
-import sys
-import Test_Align
-import Test_AlignUtils
-import Test_Map
-import Test_MapUtils
-import Test_Match
-import Test_MatchUtils
-import Test_Path
-import Test_PathUtils
-import Test_Range
-import Test_Set
-import Test_SetUtils
-
-
-def main():
-    
-    TestSuite_coord = unittest.TestSuite() 
-    
-    TestSuite_coord.addTest( unittest.makeSuite( Test_Align.Test_Align, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_AlignUtils.Test_AlignUtils, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_Map.Test_Map, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_MapUtils.Test_MapUtils, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_Match.Test_Match, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_MatchUtils.Test_MatchUtils, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_Path.Test_Path, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_PathUtils.Test_PathUtils, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_Range.Test_Range, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_Set.Test_Set, "test" ) )
-    TestSuite_coord.addTest( unittest.makeSuite( Test_SetUtils.Test_SetUtils, "test" ) )
-    
-    runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
-    runner.run( TestSuite_coord )
-    
-    
-if __name__ == "__main__":
-    main()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_Align.py
--- a/commons/core/coord/test/Test_Align.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,518 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-import time\n-from commons.core.coord.Align import Align\n-from commons.core.coord.Map import Map\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.coord.Range import Range\n-\n-\n-class Test_Align( unittest.TestCase ):\n-    \n-    def setUp(self):\n-        self._align = Align()\n-        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )\n-        \n-    def tearDown(self):\n-        self._align = None\n-        \n-    def test_isEmpty_True(self):\n-        alignInstance = Align()\n-        \n-        self.assertTrue(alignInstance.isEmpty())\n-        \n-    def test_isEmpty_True_query_is_empty(self):\n-        alignInstance = Align()\n-        line = "\\t-1\\t-1\\tTE2\\t3\\t10\\t1e-20\\t30\\t90.2\\n"\n-        alignInstance.setFromString(line)\n-        \n-        self.assertTrue(alignInstance.isEmpty())\n-        \n-    def test_isEmpty_True_subject_is_empty(self):\n-        alignInstance = Align()\n-        line = "chr1\\t2\\t20\\t\\t-1\\t-1\\t1e-20\\t30\\t90.2\\n"\n-        alignInstance.setFromString(line)\n-        \n-        self.assertTrue(alignInstance.isEmpty())\n-        \n-    def test_isEmpty_False(self):\n-        alignInstance = Align()\n-        line = "chr1\\t2\\t20\\tTE2\\t3\\t10\\t1e-20\\t30\\t90.2\\n"\n-        alignInstance.setFromString(line)\n-        \n-        self.assertFalse(alignInstance.isEmpty())\n-        \n-    def test_read(self):\n-        line = "chr2\\t1\\t10\\tTE3\\t11\\t17\\t1e-20\\t30\\t90.2\\n"\n-        expReturn = 1\n-\n-        dummyMockAlignFile = "dummyMockAlignFile"\n-        mockAlignFileHandle = open(dummyMockAlignFile, "w")\n-        mockAlignFileHandle.write(line)\n-        mockAlignFileHandle.close()\n-        \n-        expAlignInstance = Align()\n-        expAlignInstance.setFromString(line)\n-\n-        mockAlignFileHandle = open(dummyMockAlignFile, "r")\n-        obsAlignInstance = Align()\n-        obsReturn = obsAlignInstance.read(mockAlignFileHandle)\n-        \n-        mockAlignFileHandle.close()\n-        os.remove(dummyMockAlignFile)   \n-        \n-        self.assertEquals(expAlignInstance, obsAlignInstance)    \n-        self.assertEquals(expReturn, obsReturn)    \n-        \n-    def test_read_empty_file(self):\n-        expReturn = 0\n-         \n-        dummyMockAlignFile = "dummyMockAlignFile"\n-        mockAlignFileHandle = open(dummyMockAlignFile, "w")\n-        mockAlignFileHandle.close'..b'ject.setFromTuple( ( "repet", "sbj1", "1", "100" ) )\n-        \n-        obsMapQuery, obsMapSubject = self._align.getMapsOfQueryAndSubject()\n-        \n-        self.assertEqual( expMapQuery, obsMapQuery )\n-        self.assertEqual( expMapSubject, obsMapSubject )\n-        \n-    def test_getBin_bin_level_9(self):\n-        tuple = ("chr1","190000000","390000000","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple(tuple)\n-        expRes = 100000000.0\n-        obsRes = self._align.getBin()\n-        self.assertEquals(expRes, obsRes)\n-\n-    def test_getBin_bin_level_8(self):\n-        tuple = ("chr1","19000000","39000000","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple(tuple)\n-        expRes = 100000000.0\n-        obsRes = self._align.getBin()\n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_7(self):\n-        tuple = ("chr1","1900000","3900000","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple(tuple)\n-        expRes = 10000000.0\n-        obsRes = self._align.getBin()\n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_6(self):\n-        tuple = ("chr1","190000","390000","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple(tuple)\n-        obsRes = self._align.getBin()\n-        expRes = 1000000.0\n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_5(self):\n-        tuple = ("chr1","19000","39000","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple(tuple)\n-        obsRes = self._align.getBin()\n-        expRes = 100000.0\n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_4(self):\n-        tuple = ("chr1","1900","3900","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple(tuple)\n-        obsRes = self._align.getBin()\n-        expRes = 10000.0\n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_3(self):\n-        tuple = ("chr1","190","390","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple(tuple)\n-        obsRes = self._align.getBin()\n-        expRes = 1000.0\n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_2(self):\n-        tuple = ("chr1","19","39","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple(tuple)\n-        obsRes = self._align.getBin()\n-        expRes = 1000.0\n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_1(self):\n-        tuple = ("chr1","1","3","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple(tuple)\n-        obsRes = self._align.getBin()\n-        expRes = 1000.0\n-        self.assertEquals(expRes, obsRes)\n-        \n-        \n-    def test_switchQuerySubject_directS( self ):\n-        tuple = ("chr1","1","3","TE2","11","17","1e-20","30","90.2")\n-        self._align.setFromTuple( tuple )\n-        exp = Align( Range("TE2","11","17"), Range("chr1","1","3"), "1e-20", "30", "90.2" )\n-        self._align.switchQuerySubject()\n-        self.assertEquals( exp, self._align )\n-        \n-        \n-    def test_switchQuerySubject_reverseS( self ):\n-        tuple = ("chr1","1","3","TE2","17","11","1e-20","30","90.2")\n-        self._align.setFromTuple( tuple )\n-        exp = Align( Range("TE2","11","17"), Range("chr1","3","1"), "1e-20", "30", "90.2" )\n-        self._align.switchQuerySubject()\n-        self.assertEquals( exp, self._align )\n-        \n-        \n-    def test_toStringAsGff( self ):\n-        self._align.setFromString( "chr1\\t1\\t10\\tTE3\\t11\\t17\\t1e-20\\t30\\t85.2\\n" )\n-        exp = "chr1\\tREPET\\tmatch\\t1\\t10\\t1e-20\\t+\\t.\\tID=23;Target=TE3 11 17"\n-        obs = self._align.toStringAsGff( ID="23" )\n-        self.assertEqual( obs, exp )\n-        \n-        \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_Align ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_AlignUtils.py
--- a/commons/core/coord/test/Test_AlignUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,777 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-import time\n-import shutil\n-from commons.core.coord.AlignUtils import AlignUtils\n-from commons.core.coord.Align import Align\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.coord.Range import Range\n-\n-\n-class Test_AlignUtils( unittest.TestCase ):\n-    \n-    def setUp( self ):\n-        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )\n-        \n-        \n-    def tearDown( self ):\n-        self._uniqId = ""\n-        \n-        \n-    def test_getAlignListFromFile( self ):\n-        a1 = Align()\n-        a1.setFromTuple( ( "chr1", "1", "100", "seq3", "1", "100", "1e-23", "89", "97.26" ) )\n-        a2 = Align()\n-        a2.setFromTuple( ( "chr2", "121", "210", "seq5", "21", "110", "1e-32", "95", "98.13" ) )\n-        \n-        inFileName = "dummyFile_%s" % ( self._uniqId )\n-        inFileHandler = open( inFileName, "w" )\n-        a1.write( inFileHandler )\n-        a2.write( inFileHandler )\n-        inFileHandler.close()\n-        \n-        lExp = [ a1, a2 ]\n-        lObs = AlignUtils.getAlignListFromFile( inFileName )\n-        \n-        self.assertEqual( lExp, lObs )\n-        \n-        if os.path.exists( inFileName ):\n-            os.remove( inFileName )\n-            \n-            \n-    def test_getListOfScores( self ):\n-        a1 = Align()\n-        a1.setFromTuple( ( "chr1", "1", "100", "seq3", "1", "100", "1e-23", "89", "97.26" ) )\n-        a2 = Align()\n-        a2.setFromTuple( ( "chr2", "121", "210", "seq5", "21", "110", "1e-32", "95", "98.13" ) )\n-        lAligns = [ a1, a2 ]\n-        \n-        lExp = [ 89, 95 ]\n-        lObs = AlignUtils.getListOfScores( lAligns )\n-        \n-        self.assertEqual( lExp, lObs )\n-        \n-        \n-    def test_getScoreListFromFile( self ):\n-        alignFile = "dummyAlignFile"\n-        alignFileHandler = open( alignFile, "w" )\n-        alignFileHandler.write( "chr3\\t1\\t100\\tchr5\\t11\\t110\\t1e-52\\t133\\t87.2\\n" )\n-        alignFileHandler.write( "chr7\\t1\\t200\\tchr2\\t11\\t210\\t1e-78\\t235\\t98.9\\n" )\n-        alignFileHandler.close()\n-        \n-        lExp = [ 133, 235 ]\n-        lObs = AlignUtils.getScoreListFromFile( alignFile )\n-        self.assertEqual( lExp, lObs )\n-        \n-        os.remove( alignFile )\n-        \n-        \n-    def test_getScoreListFromFile_empty_file( self ):\n-        alignFile = "dummyAlignFile"\n-    '..b' iAlign2 = Align( Range("chr1",51,80), Range("TE1",161,190), 1e-20, 90.2, 30 )\n-        self.assertFalse( iAlign1.isOverlapping( iAlign2 ) )\n-        \n-        \n-    def test_mergeList( self ):\n-        iAlign1 = Align( Range("chr1",81,120), Range("TE1",91,130), 1e-20, 90.2, 30 )\n-        iAlign2 = Align( Range("chr2",51,80), Range("TE1",61,90), 1e-20, 90.2, 30 )  # different query\n-        iAlign3 = Align( Range("chr1",1,100), Range("TE1",11,110), 1e-20, 90.2, 30 )  # to be merged with 1st line\n-        iAlign4 = Align( Range("chr1",1,200), Range("TE2",11,210), 1e-20, 90.2, 30 )  # different subject\n-        iAlign5 = Align( Range("chr1",1,100), Range("TE1",501,600), 1e-20, 90.2, 30 )  # non-overlapping subject\n-        lAligns = [ iAlign1, iAlign2, iAlign3, iAlign4, iAlign5 ]\n-        \n-        iAlign6 = Align( Range("chr1",1,120), Range("TE1",11,130), 1e-20, 90.2, 30 )\n-        lExp = [ iAlign6, iAlign5, iAlign4, iAlign2 ]\n-        \n-        lObs = AlignUtils.mergeList( lAligns )\n-        \n-        self.assertEquals( lExp, lObs )\n-        \n-        \n-    def test_mergeFile_empty( self ):\n-        inFile = "dummyInFile.align"\n-        inF = open( inFile, "w" )\n-        inF.close()\n-        \n-        expFile = "dummyExpFile.align"\n-        expF = open( expFile, "w" )\n-        expF.close()\n-        \n-        obsFile = "dummyObsFile.align"\n-        AlignUtils.mergeFile( inFile, obsFile )\n-        \n-        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n-        \n-        for f in [ inFile, expFile, obsFile ]:\n-            os.remove( f )\n-            \n-            \n-    def test_mergeFile( self ):\n-        iAlign = Align()\n-        \n-        inFile = "dummyInFile.align"\n-        inF = open( inFile, "w" )\n-        iAlign.setFromString( "chr1\\t81\\t120\\tTE1\\t91\\t130\\t1e-20\\t30\\t90.2\\n" )\n-        iAlign.write( inF )\n-        iAlign.setFromString( "chr2\\t51\\t80\\tTE1\\t61\\t90\\t1e-20\\t30\\t90.2\\n" )  # different query\n-        iAlign.write( inF )\n-        iAlign.setFromString( "chr1\\t1\\t100\\tTE1\\t11\\t110\\t1e-20\\t30\\t90.2\\n" )  # to be merged with 1st line\n-        iAlign.write( inF )\n-        iAlign.setFromString( "chr1\\t1\\t200\\tTE2\\t11\\t210\\t1e-20\\t30\\t90.2\\n" )  # different subject\n-        iAlign.write( inF )\n-        inF.close()\n-        \n-        expFile = "dummyExpFile.align"\n-        expF = open( expFile, "w" )\n-        iAlign.setFromString( "chr1\\t1\\t120\\tTE1\\t11\\t130\\t1e-20\\t30\\t90.2\\n" )\n-        iAlign.write( expF )\n-        iAlign.setFromString( "chr1\\t1\\t200\\tTE2\\t11\\t210\\t1e-20\\t30\\t90.2\\n" )\n-        iAlign.write( expF )\n-        iAlign.setFromString( "chr2\\t51\\t80\\tTE1\\t61\\t90\\t1e-20\\t30\\t90.2\\n" )\n-        iAlign.write( expF )\n-        expF.close()\n-        \n-        obsFile = "dummyObsFile.align"\n-        AlignUtils.mergeFile( inFile, obsFile )\n-        \n-        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n-        \n-        for f in [ inFile, expFile, obsFile ]:\n-            os.remove( f )\n-            \n-            \n-    def test_updateScoresInFile( self ):\n-        iAlign = Align()\n-        \n-        inFile = "dummyInFile.align"\n-        inHandler = open( inFile, "w" )\n-        iAlign.setFromString( "query1\\t1\\t100\\tsubject1\\t1\\t95\\t1e-180\\t230\\t90.2\\n" )\n-        iAlign.write( inHandler )\n-        inHandler.close()\n-        \n-        expFile = "dummyExpFile.align"\n-        expHandler = open( expFile, "w" )\n-        iAlign.setFromString( "query1\\t1\\t100\\tsubject1\\t1\\t95\\t1e-180\\t%i\\t90.2\\n" % ( ( 100 - 1 + 1 ) * 90.2 / 100.0 ) )\n-        iAlign.write( expHandler )\n-        expHandler.close()\n-        \n-        obsFile = "dummyObsFile.align"\n-        AlignUtils.updateScoresInFile( inFile, obsFile )\n-        \n-        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n-        \n-        for f in [ inFile, expFile, obsFile ]:\n-            os.remove( f )\n-            \n-            \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_ConvCoord.py
--- a/commons/core/coord/test/Test_ConvCoord.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,205 +0,0 @@\n-import unittest\n-import os\n-import time\n-from commons.core.coord.ConvCoord import ConvCoord\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.sql.DbFactory import DbFactory\n-from commons.core.coord.Map import Map\n-\n-class Test_ConvCoord( unittest.TestCase ):\n-    \n-    def setUp( self ):\n-        self._i = ConvCoord()\n-        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n-        self._inData = "dummyInData_%s" % ( self._uniqId )\n-        self._mapData = "dummyMapData_%s" % ( self._uniqId )\n-        self._expData = "dummyExpData_%s" % ( self._uniqId )\n-        self._obsData = "dummyObsData_%s" % ( self._uniqId )\n-        self._iDb = DbFactory.createInstance()\n-        self._i._iDb = self._iDb\n-        \n-    def tearDown( self ):\n-        self._iDb.close()\n-\n-#TODO: handle duplicated matchs for path\n-#    def test_convCoordsChkToChrFromFile_duplicated_matchs( self ):\n-#        dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ),\n-#                             "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) }\n-#        tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId \n-#        self._writePathFileCoordOnChunk(tmpPathFileName)\n-#        \n-#        expPathFile = "dummyExpPathFile_%s" % self._uniqId\n-#        self._writePathFileCoordOnChrWithOutDoublons(expPathFile)\n-#        \n-#        outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps)\n-#        \n-#        obsPathFile = "dummyObsPathFile_%s" % self._uniqId\n-#        self._iDb.exportDataToFile(outTableName, obsPathFile)\n-#        \n-#        self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))\n-#        \n-#        for f in [ expPathFile, obsPathFile, tmpPathFileName ]:\n-#            os.remove( f )\n-#        self._iDb.dropTable(outTableName)\n- \n-#TODO: handle matchs out of chunk overlap ? For one side (=> path 128, remove path 152) ? For two sides (path 129, fusion with path 154) ?\n-#    def test_convCoordsChkToChrFromFile_matchs_out_of_overlap( self ):\n-#        dChunks2CoordMaps = {"chunk1": Map( "chunk1", "dmel_chr4", 760001, 960000 ),\n-#                             "chunk2": Map( "chunk2", "dmel_chr4", 950001, 1150000 ) }\n-#        tmpPathFileName = "dummyPathCoordOnChr_%s" % self._uniqId \n-#        self._writePathFileCoordOnChunk_outOfOverlap(tmpPathFileName)\n-#        \n-#        expPathFile = "dummyExpPathFile_%s" % self._uniqId\n-#        self._writePathFileCoordOnChrWithOutDoublons_outOfOverlap(expPathFile)\n-#        \n-#        outTableName = self._i.convCoordsChkToChrFromFile(tmpPathFileName, "path", dChunks2CoordMaps)\n-#        \n-#        obsPathFile = "dummyObsPathFile_%s" % self._uniqId\n-#        self._iDb.exportDataToFile(outTableName, obsPathFile)\n-#        \n-#        self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))\n-#        \n-#        for f in [ expPathFile, obsPathFile, tmpPathFileName ]:\n-#            os.remove( f )\n-#        self._iDb.dropTable(outTableName)\n-        \n-    def test_mergeCoordsOnChunkOverlaps( self ):\n-        dChunks2CoordMaps = { "chunk1": Map( "chunk1", "chromosome1", 1, 100 ),\n-                             "chunk2": Map( "chunk2", "chromosome1", 91, 190 ),\n-                             "chunk3": Map( "chunk3", "chromosome2", 1, 100 ) }\n-        tmpPathTable = "dummyTmpPathTable"\n-        linesToProcess = [\n-                          "1" + "\\t" + "chromosome1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.8" + "\\n",  # hit within the 1st chunk\n-                          "3" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n",       # hit included within the chunk overlap, on the 2nd chunk\n-                          "2" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" '..b'tMARWOLEN1_1p:classII:TIR\\t285\\t320\\t7e-25\\t28\\t41.67\\n")\n-        pathFile.write("152\\tchunk2\\t3866\\t3889\\tCR1-19_HM_1p:classI:LINE\\t898\\t891\\t5e-21\\t4\\t34.98\\n")\n-        pathFile.write("153\\tchunk2\\t3951\\t4343\\tCR1-1_DWil_1p:classI:LINE\\t127\\t2\\t4e-18\\t92\\t37.59\\n")\n-        pathFile.write("154\\tchunk2\\t3866\\t3889\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n-        pathFile.write("155\\tchunk2\\t3102\\t3199\\tCR1-1_DWil_2p:classI:LINE\\t869\\t837\\t2e-26\\t38\\t57.89\\n")\n-        pathFile.close()\n-        \n-#    def _writePathFileCoordOnChunk_outOfOverlap(self, pathFileName):\n-#        pathFile = open( pathFileName, "w" )\n-#        pathFile.write("123\\tchunk1\\t108397\\t108531\\tMariner2_AG_1p:classII:TIR\\t53\\t97\\t8e-19\\t28\\t35.56\\n")\n-#        pathFile.write("123\\tchunk1\\t108545\\t109120\\tMariner2_AG_1p:classII:TIR\\t102\\t333\\t8e-19\\t87\\t27.97\\n")\n-#        pathFile.write("124\\tchunk1\\t59607\\t59714\\tLINER1-2_NVi_2p:classI:?\\t502\\t537\\t3e-20\\t30\\t36.11\\n")\n-#        pathFile.write("124\\tchunk1\\t59695\\t60156\\tLINER1-2_NVi_2p:classI:?\\t533\\t725\\t3e-20\\t90\\t36.79\\n")\n-#        pathFile.write("125\\tchunk1\\t193027\\t193101\\tCR1-8_AG_1p:classI:LINE\\t470\\t448\\t1e-27\\t11\\t28.57\\n")\n-#        pathFile.write("126\\tchunk1\\t102131\\t102178\\tTc1-1_TCa_1p:classII:TIR\\t288\\t274\\t5e-29\\t18\\t52.5\\n")\n-#        pathFile.write("127\\tchunk1\\t59520\\t59606\\tNotoAg1_2p:classI:?\\t482\\t508\\t1e-13\\t14\\t30.61\\n")\n-#        pathFile.write("128\\tchunk1\\t183866\\t193889\\tCR1-19_HM_1p:classI:LINE\\t898\\t1891\\t5e-21\\t4\\t34.98\\n")\n-#        pathFile.write("129\\tchunk1\\t183866\\t200000\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n-#        pathFile.write("150\\tchunk2\\t21176\\t21250\\tTc1-1_TCa_1p:classII:TIR\\t135\\t109\\t8e-32\\t21\\t41.57\\n")\n-#        pathFile.write("151\\tchunk2\\t116603\\t116698\\tMARWOLEN1_1p:classII:TIR\\t285\\t320\\t7e-25\\t28\\t41.67\\n")\n-#        pathFile.write("152\\tchunk2\\t1\\t3889\\tCR1-19_HM_1p:classI:LINE\\t898\\t1891\\t5e-21\\t4\\t34.98\\n")\n-#        pathFile.write("153\\tchunk2\\t3951\\t4343\\tCR1-1_DWil_1p:classI:LINE\\t127\\t2\\t4e-18\\t92\\t37.59\\n")\n-#        pathFile.write("154\\tchunk2\\t1\\t13889\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n-#        pathFile.write("155\\tchunk2\\t3102\\t3199\\tCR1-1_DWil_2p:classI:LINE\\t869\\t837\\t2e-26\\t38\\t57.89\\n")\n-#        pathFile.close()\n-#        \n-#    def _writePathFileCoordOnChrWithOutDoublons_outOfOverlap(self, pathFileName):\n-#        file = open( pathFileName, "w" )\n-#        file.write("123\\tdmel_chr4\\t868397\\t868531\\tMariner2_AG_1p:classII:TIR\\t53\\t97\\t8e-19\\t28\\t35.56\\n")\n-#        file.write("123\\tdmel_chr4\\t868545\\t869120\\tMariner2_AG_1p:classII:TIR\\t102\\t333\\t8e-19\\t87\\t27.97\\n")\n-#        file.write("124\\tdmel_chr4\\t819607\\t819714\\tLINER1-2_NVi_2p:classI:?\\t502\\t537\\t3e-20\\t30\\t36.11\\n")\n-#        file.write("124\\tdmel_chr4\\t819695\\t820156\\tLINER1-2_NVi_2p:classI:?\\t533\\t725\\t3e-20\\t90\\t36.79\\n")\n-#        file.write("125\\tdmel_chr4\\t953027\\t953101\\tCR1-8_AG_1p:classI:LINE\\t470\\t448\\t1e-27\\t11\\t28.57\\n")\n-#        file.write("126\\tdmel_chr4\\t862131\\t862178\\tTc1-1_TCa_1p:classII:TIR\\t288\\t274\\t5e-29\\t18\\t52.5\\n")\n-#        file.write("127\\tdmel_chr4\\t819520\\t819606\\tNotoAg1_2p:classI:?\\t482\\t508\\t1e-13\\t14\\t30.61\\n")\n-#        file.write("128\\tdmel_chr4\\t943866\\t953889\\tCR1-19_HM_1p:classI:LINE\\t898\\t1891\\t5e-21\\t4\\t34.98\\n")\n-#        file.write("129\\tdmel_chr4\\t943866\\t963889\\tCR1-83_HM_1p:classI:LINE\\t912\\t905\\t3e-21\\t4\\t34.62\\n")\n-#        file.write("150\\tdmel_chr4\\t971176\\t971250\\tTc1-1_TCa_1p:classII:TIR\\t135\\t109\\t8e-32\\t21\\t41.57\\n")\n-#        file.write("151\\tdmel_chr4\\t1066603\\t1066698\\tMARWOLEN1_1p:classII:TIR\\t285\\t320\\t7e-25\\t28\\t41.67\\n")\n-#        file.write("153\\tdmel_chr4\\t953951\\t954343\\tCR1-1_DWil_1p:classI:LINE\\t127\\t2\\t4e-18\\t92\\t37.59\\n")\n-#        file.write("155\\tdmel_chr4\\t953102\\t953199\\tCR1-1_DWil_2p:classI:LINE\\t869\\t837\\t2e-26\\t38\\t57.89\\n")\n-#        file.close()\n-       \n-if __name__ == "__main__":\n-        unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_F_ConvCoord.py
--- a/commons/core/coord/test/Test_F_ConvCoord.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,213 +0,0 @@\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.sql.DbFactory import DbFactory\n-from commons.core.coord.ConvCoord import ConvCoord\n-import time\n-import subprocess\n-import os\n-import unittest\n-\n-class Test_F_ConvCoord(unittest.TestCase):\n-    \n-    def setUp( self ):\n-        self._i = ConvCoord()\n-        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n-        self._inData = "dummyInData_%s" % ( self._uniqId )\n-        self._mapData = "dummyMapData_%s" % ( self._uniqId )\n-        self._expData = "dummyExpData_%s" % ( self._uniqId )\n-        self._obsData = "dummyObsData_%s" % ( self._uniqId )\n-        self._iDb = DbFactory.createInstance()\n-        self._i._iDb = self._iDb\n-        \n-    def tearDown( self ):\n-        self._iDb.close()\n-        \n-    def test_run_as_script_alignFile_query( self ):\n-        configFile = "%s/dummyConfigFile_%s" % ( os.getcwd(), self._uniqId )\n-        configF = open( configFile, "w" )\n-        configF.write( "[repet_env]\\n" )\n-        configF.write( "repet_host: %s\\n" % ( os.environ["REPET_HOST"] ) )\n-        configF.write( "repet_user: %s\\n" % ( os.environ["REPET_USER"] ) )\n-        configF.write( "repet_pw: %s\\n" % ( os.environ["REPET_PW"] ) )\n-        configF.write( "repet_db: %s\\n" % ( os.environ["REPET_DB"] ) )\n-        configF.write( "repet_port: %s\\n" % ( os.environ["REPET_PORT"] ) )\n-        configF.close()\n-        self._writeMapFile( self._mapData )\n-        \n-        linesToProcess = [ "chunk1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.800000" + "\\n",  # hit within the 1st chunk\n-                           "chunk1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",   # hit included within the chunk overlap, on the 1st chunk\n-                           "chunk2" + "\\t" + "2" + "\\t" + "9" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",       # hit included within the chunk overlap, on the 2nd chunk\n-                           "chunk2" + "\\t" + "51" + "\\t" + "58" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",     # hit inside the 2nd chunk\n-                           "chunk2" + "\\t" + "51" + "\\t" + "70" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n"     # subject on reverse strand\n-                           ]\n-        FileUtils.writeLineListInFile( self._inData, linesToProcess )\n-        \n-        refLines = [ "chromosome1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.800000" + "\\n",\n-                     "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",\n-                     "chromosome1" + "\\t" + "141" + "\\t" + "148" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",\n-                     "chromosome1" + "\\t" + "141" + "\\t" + "160" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n"\n-                     ]\n-        FileUtils.writeLineListInFile( self._expData, refLines )\n-        \n-        cmd = "ConvCoord.py"\n-        cmd += " -i %s" % ( self._inData )\n-        cmd += " -f %s" % ( "align" )\n-        cmd += " -c %s" % ( "q" )\n-        cmd += " -m %s" % ( self._mapData )\n-        cmd += " -o %s" % ( self._obsData )\n-        cmd += " -C %s" % ( configFile )\n-        process = subprocess.Popen(cmd, shell = True)\n-        process.communicate()\n-        \n-        self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )\n-        \n-        os.remove( self._inData )\n-        os.remove(configFile)\n-        os.remove( self._mapData )\n-        os.r'..b'"8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",   # hit included within the chunk overlap, on the 1st chunk\n-                           "3" + "\\t" + "chunk2" + "\\t" + "2" + "\\t" + "9" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",       # hit included within the chunk overlap, on the 2nd chunk\n-                           "4" + "\\t" + "chunk2" + "\\t" + "51" + "\\t" + "58" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n",     # hit inside the 2nd chunk\n-                           "5" + "\\t" + "chunk2" + "\\t" + "51" + "\\t" + "70" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.800000" + "\\n"     # subject on reverse strand\n-                           ]\n-        FileUtils.writeLineListInFile( self._inData, linesToProcess )\n-        self._iDb.createTable( self._inData, "path", self._inData, True )\n-        os.remove( self._inData )\n-        \n-        refLines = [ "1" + "\\t" + "chromosome1" + "\\t" + "21" + "\\t" + "37" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "27" + "\\t" + "8e-58" + "\\t" + "30" + "\\t" + "97.8" + "\\n",\n-                     "2" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n",\n-                     "3" + "\\t" + "chromosome1" + "\\t" + "92" + "\\t" + "99" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n",       # hit included within the chunk overlap, on the 2nd chunk\n-                     "4" + "\\t" + "chromosome1" + "\\t" + "141" + "\\t" + "148" + "\\t" + "TE1" + "\\t" + "1" + "\\t" + "8" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n",\n-                     "5" + "\\t" + "chromosome1" + "\\t" + "141" + "\\t" + "160" + "\\t" + "TE1" + "\\t" + "8" + "\\t" + "1" + "\\t" + "8e-58" + "\\t" + "11" + "\\t" + "97.8" + "\\n"\n-                     ]\n-        FileUtils.writeLineListInFile( self._expData, refLines )\n-        \n-        cmd = "ConvCoord.py"\n-        cmd += " -i %s" % ( self._inData )\n-        cmd += " -f %s" % ( "path" )\n-        cmd += " -c %s" % ( "q" )\n-        cmd += " -m %s" % ( self._mapData )\n-        cmd += " -M %s" % ( "no" )\n-        cmd += " -o %s" % ( self._obsData )\n-        process = subprocess.Popen(cmd, shell = True)\n-        process.communicate()\n-        \n-        self._iDb.exportDataToFile( self._obsData, self._obsData )\n-        self.assertTrue( FileUtils.are2FilesIdentical( self._expData, self._obsData ) )\n-        \n-        os.remove( self._obsData )\n-        os.remove( self._expData )\n-        self._iDb.dropTable( self._mapData )\n-        self._iDb.dropTable( self._inData )\n-        self._iDb.dropTable( self._expData )\n-        self._iDb.dropTable( self._obsData )\n-\n-    def test_run(self):\n-        inFileName = "DmelChr4_chk.align.not_over.filtered"\n-        expFileName = "%s/Tools/DmelChr4_chr.align.not_over.filtered" % os.environ["REPET_DATA"]\n-        obsFileName = "obs.align"\n-        os.symlink("%s/Tools/%s" % (os.environ["REPET_DATA"], inFileName), inFileName)\n-        iConvCoord = ConvCoord()\n-        iConvCoord.setInputData(inFileName)\n-        iConvCoord.setMapData("%s/Tools/DmelChr4_chunks.map" % os.environ["REPET_DATA"])\n-        iConvCoord.setCoordinatesToConvert("qs")\n-        iConvCoord.setMergeChunkOverlaps(False)\n-        iConvCoord.setOutputData(obsFileName)\n-        iConvCoord.run()\n-        \n-        self.assertTrue(FileUtils.are2FilesIdentical(expFileName, obsFileName))\n-        \n-        os.remove(inFileName)\n-        os.remove(obsFileName)\n-        \n-    def _writeMapFile( self, mapFile ):\n-        mapF = open( mapFile, "w" )\n-        mapF.write( "chunk1\\tchromosome1\\t1\\t100\\n" )\n-        mapF.write( "chunk2\\tchromosome1\\t91\\t190\\n" )\n-        mapF.write( "chunk3\\tchromosome2\\t1\\t100\\n" )\n-        mapF.close()\n-\n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_Map.py
--- a/commons/core/coord/test/Test_Map.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,183 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-import unittest
-import os
-from commons.core.coord.Map import Map
-from commons.core.utils.FileUtils import FileUtils
-
-
-class Test_Map( unittest.TestCase ):
-    
-    def setUp(self):
-        self._map = Map()
-        
-    def test_setFromString(self):
-        line = "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n"   # test with '\t' separator
-        self._map.setFromString(line)
-        self.assertEqual( self._map.name, "MbQ12Gr2Cl2" )
-        self.assertEqual( self._map.seqname, "consensus1" )
-        self.assertEqual( self._map.start, 51 )
-        self.assertEqual( self._map.end, 1230 )
-        line = "MbQ12Gr2Cl2;consensus1;51;1230"   # test with ';' separator
-        self._map.setFromString(line,";")
-        self.assertEqual( self._map.name, "MbQ12Gr2Cl2" )
-        self.assertEqual( self._map.seqname, "consensus1" )
-        self.assertEqual( self._map.start, 51 )
-        self.assertEqual( self._map.end, 1230 )
-    
-    def test___eq__(self):
-        self._map.setFromString( "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n" )
-        o = Map()
-        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n" )
-        self.assertEqual( self._map, o )   # same data
-        o.setFromString( "MbQ12Gr2Cl1\tconsensus1\t51\t1230\n" )
-        self.assertNotEqual( self._map, o )   # different name
-        o.setFromString( "MbQ12Gr2Cl2\tconsensus2\t51\t1230\n" )
-        self.assertNotEqual( self._map, o )   # different seqname
-        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t15\t1230\n" )
-        self.assertNotEqual( self._map, o )   # different start
-        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t51\t123000\n" )
-        self.assertNotEqual( self._map, o )   # different end
-        o.setFromString( "MbQ12Gr2Cl2\tconsensus1\t1230\t51\n" )
-        self.assertNotEqual( self._map, o )   # same start/end but in different order
-        
-    def test_setFromTuple(self):
-        tuple = ("MbQ12Gr2Cl2", "consensus1","51","1230")
-        self._map.setFromTuple(tuple)
-
-        expMap = Map("MbQ12Gr2Cl2", "consensus1",51,1230)
-        obsMap = self._map
-        
-        self.assertEquals(expMap, obsMap)
-    
-    def test_read_empty_file(self):
-        
-        fileName = "dummyFile"
-        os.system("touch " + fileName) 
-        fileHandle = open(fileName, "r")
-        
-        obsResult = self._map.read(fileHandle)
-        expResult = 0
-         
-        fileHandle.close()
-        os.remove(fileName) 
-        
-        self.assertEquals(expResult, obsResult)
-    
-    def test_read_uncompleted_line( self):
-        uncompletedLine = "MbQ12Gr2Cl2\tconsensus1\t51"
-        fileName = "dummyFile"
-
-        fileHandle = open(fileName, "w")
-        fileHandle.write(uncompletedLine)
-        fileHandle.close()
-
-        fileHandle = open(fileName, "r")
-       
-        obsResult = self._map.read(fileHandle)
-        expResult = 0
-
-        fileHandle.close()
-        os.remove(fileName)
-
-        self.assertEquals(obsResult, expResult)
-
-    def test_read(self):
-        line =  "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n"
-        fileName = "dummyFile"
-
-        fileHandle = open(fileName, "w")
-        fileHandle.write(line)
-        fileHandle.close()
-
-        fileHandle = open(fileName, "r")
-        self._map.read(fileHandle)
-        obsResult = self._map
-        
-        expResult = Map()
-        expResult.setFromString(line) 
-
-        fileHandle.close()
-        os.remove(fileName)
-
-        self.assertEquals(obsResult, expResult) 
-     
-    def test_write(self):
-        line =  "MbQ12Gr2Cl2\tconsensus1\t51\t1230\n"
-        expFileName = "expFileName"
-
-        fileHandle = open(expFileName, "w")
-        fileHandle.write(line)
-        fileHandle.close()
-        
-        obsFileName = "obsFileName"
-        fileHandle = open(obsFileName, "w")
-        self._map.setFromString(line)
-        self._map.write(fileHandle)
-        fileHandle.close()
-        
-        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )
-        
-        os.remove(obsFileName)
-        os.remove(expFileName)
-        
-    def test_diff1(self):
-        map1 = Map("seq1","DmelChr4", 190000, 390000)
-        map2 = Map("seq2","DmelChr4", 290000, 590000)
-        
-        expMap1 = Map("seq1", "DmelChr4", 190000, 289999)
-        expReturnedMap = Map()
-        
-        obsReturnedMap = map1.diff(map2)
-        obsMap1 = map1
-        
-        self.assertEquals(expMap1, obsMap1)
-        self.assertEquals(expReturnedMap, obsReturnedMap)
-        
-    def test_diff2(self):
-        map1 = Map("seq1","DmelChr4", 190000, 590000)
-        map2 = Map("seq2","DmelChr4", 290000, 390000)
-
-        expMap1 = Map("seq1", "DmelChr4", 190000, 289999)
-        expReturnedMap = Map("seq1", "DmelChr4", 390001, 590000)
-        
-        obsReturnedMap = map1.diff(map2)
-        obsMap1 = map1
-        
-        self.assertEquals(expMap1, obsMap1)
-        self.assertEquals(expReturnedMap, obsReturnedMap)
-        
-        
-test_suite = unittest.TestSuite()
-test_suite.addTest( unittest.makeSuite( Test_Map ) )
-if __name__ == "__main__":
-    unittest.TextTestRunner(verbosity=2).run( test_suite )
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_MapUtils.py
--- a/commons/core/coord/test/Test_MapUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,384 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-import sys\n-from commons.core.coord.MapUtils import MapUtils\n-from commons.core.coord.Map import Map\n-from commons.core.coord.Set import Set\n-from commons.core.utils.FileUtils import FileUtils\n-\n-\n-class Test_MapUtils( unittest.TestCase ):\n-    \n-    def test_getMapListSortedByIncreasingMinThenMax( self ):\n-        iMap1 = Map("name1", "chr1", 1, 350)\n-        iMap2 = Map("name2", "chr1", 1, 100)\n-        iMap3 = Map("name3", "chr1", 50, 350)\n-        iMap4 = Map("name4", "chr1", 5, 450)\n-        lMaps = [ iMap1, iMap2, iMap3, iMap4 ]\n-        \n-        expLMaps = [ iMap2, iMap1, iMap4, iMap3 ]\n-        \n-        obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n-        \n-        self.assertEquals( expLMaps, obsLMaps )\n-        \n-        \n-    def test_getMapListSortedByIncreasingMinThenMax_ordered( self ):\n-        iMap1 = Map("name1", "chr1", 1, 100)\n-        iMap2 = Map("name2", "chr1", 1, 350)\n-        \n-        lMaps = [ iMap1, iMap2 ]\n-        expLMaps = [ iMap1, iMap2 ]\n-        \n-        obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n-        \n-        self.assertEquals( expLMaps, obsLMaps )\n-        \n-        \n-    def test_getMapListSortedByIncreasingMinThenMax_unordered( self ):\n-        iMap1 = Map("name1", "chr1", 1, 350)\n-        iMap2 = Map("name2", "chr1", 1, 100)\n-        \n-        lMaps = [ iMap1, iMap2 ]\n-        expLMaps = [ iMap2, iMap1 ]\n-        \n-        obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n-        \n-        self.assertEquals( expLMaps, obsLMaps )\n-        \n-        \n-    def test_getMapListSortedByIncreasingMinThenMax_nonOverlapping( self ):\n-        iMap1 = Map("name1", "chr1", 1, 350)\n-        iMap2 = Map("name2", "chr1", 400, 600)\n-        \n-        lMaps = [ iMap2, iMap1 ]\n-        expLMaps = [ iMap1, iMap2 ]\n-        \n-        obsLMaps = MapUtils.getMapListSortedByIncreasingMinThenMax( lMaps )\n-        \n-        self.assertEquals( expLMaps, obsLMaps )\n-        \n-        \n-    def test_getMapListSortedByIncreasingMinThenMax_sameMinThreeMaps( self ):\n-        iMap1 = Map("name1", "chr1", 350, 1)\n-        iMap2 = Map("name2", "chr1", 400, 1)\n-        iMap3 = Map("name3", "chr1", 500, 1)\n-        \n-        lMaps = [ iMap2, iMap1, iMap3 ]\n-        expLMaps = [ iMap1, iMap2, iMap3 ]\n-        \n-        obsLM'..b'SetFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n-        expSetFileHandler.write( "3\\tseq40\\tchr2\\t600\\t700\\n" )\n-        expSetFileHandler.write( "4\\tseq2\\tchr3\\t301\\t500\\n" )\n-        expSetFileHandler.close()\n-        \n-        obsFile = "dummyObsFile"\n-        \n-        MapUtils.convertMapFileIntoSetFile( mapInputFile, obsFile )\n-        \n-        self.assertTrue( FileUtils.are2FilesIdentical( expSetFile, obsFile ) )\n-        \n-        for f in [ expSetFile, mapInputFile, obsFile ]:\n-            os.remove( f )\n-\n-    def test_convertMapFileIntoSetFile_one_line(self):\n-        mapInputFile = "dummyExpFile"\n-        mapFileHandler = open( mapInputFile, "w" )\n-        mapFileHandler.write( "seq31\\tchr1\\t151\\t250\\n" )\n-        mapFileHandler.close()\n-\n-        expSetFile = "dummyexpSetFile"\n-        expSetFileHandler = open( expSetFile, "w" )\n-        expSetFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n-        expSetFileHandler.close()\n-        \n-        obsFile = "dummyObsFile"\n-        \n-        MapUtils.convertMapFileIntoSetFile( mapInputFile, obsFile )\n-        \n-        self.assertTrue( FileUtils.are2FilesIdentical( expSetFile, obsFile ) )\n-        \n-        for f in [ expSetFile, mapInputFile, obsFile ]:\n-            os.remove( f )\n-\n-    def test_convertMapFileIntoSetFile_empty_file(self):\n-        mapInputFile = "dummyFile.map"\n-        mapFileHandler = open( mapInputFile, "w" )\n-        mapFileHandler.close()\n-        \n-        expFile = "dummyExpFile.map.set"\n-        expFileHandler = open( expFile, "w" )\n-        expFileHandler.close()\n-        \n-        obsFile = "dummyFile.map.set"\n-        \n-        MapUtils.convertMapFileIntoSetFile( mapInputFile )\n-        \n-        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n-        \n-        for f in [ expFile, mapInputFile, obsFile ]:\n-            os.remove( f )\n-            \n-    def test_writeListInFile_empty_list(self):\n-        lMaps = [ ]\n-        expFileName = "expFileName"\n-        fileHandle = open(expFileName, "w")\n-        fileHandle.close()\n- \n-        obsFileName = "obsFileName"\n-        fileHandle = open(obsFileName, "w")\n-        MapUtils.writeListInFile(lMaps, obsFileName, "w")\n-        fileHandle.close()\n-         \n-        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )\n-        \n-        os.remove(obsFileName)\n-        os.remove(expFileName)\n-        \n-    def test_writeListInFile_list_one_set(self):\n-        lMaps = [ Map( "map1", "map1seq", 1, 10 ) ]\n-        line =  "map1\\tmap1seq\\t1\\t10\\n"\n-       \n-        expFileName = "expFileName"\n- \n-        fileHandle = open(expFileName, "w")\n-        fileHandle.write(line)\n-        fileHandle.close()\n- \n-        obsFileName = "obsFileName"\n-        fileHandle = open(obsFileName, "w")\n-        MapUtils.writeListInFile(lMaps, obsFileName, "w")\n-        fileHandle.close()\n-         \n-        self.assertTrue( FileUtils.are2FilesIdentical( expFileName, obsFileName ) )\n-        \n-        os.remove(obsFileName)\n-        os.remove(expFileName)\n-\n-    def test_getMinLengthOfMapFile(self):\n-        mapFileName = "%s/Gnome_tools/Vein_v4_scaffold_00001.fa.Nstretch.map" % os.environ["REPET_DATA"]\n-        expMinLengthofMapFile = 20\n-        iMap = MapUtils()\n-        obsMinLengthofMapFile = iMap.getMinLengthOfMapFile(mapFileName)\n-        self.assertEquals(expMinLengthofMapFile, obsMinLengthofMapFile)\n-       \n-    def test_getMaxLengthOfMapFile(self):\n-        mapFileName = "%s/Gnome_tools/Vein_v4_scaffold_00001.fa.Nstretch.map" % os.environ["REPET_DATA"]\n-        expMinLengthofMapFile = 6344\n-        iMap = MapUtils()\n-        obsMinLengthofMapFile = iMap.getMaxLengthOfMapFile(mapFileName)\n-        self.assertEquals(expMinLengthofMapFile, obsMinLengthofMapFile)\n-       \n-\n-        \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_MapUtils ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_Match.py
--- a/commons/core/coord/test/Test_Match.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,363 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-from commons.core.coord.Match import Match\n-from commons.core.coord.Path import Path\n-\n-\n-class Test_Match( unittest.TestCase ):\n-    \n-    def test_eq_match_equals( self ):\n-        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertEquals( match1, match2 )\n-        \n-    def test_eq_match_not_equals_query_name( self ):\n-        tuple1 = ("Name", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertNotEquals( match1, match2 )\n-        \n-    def test_eq_match_not_equals_query_start( self ):\n-        tuple1 = ("QName", 2, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertNotEquals( match1, match2 )\n-        \n-    def test_eq_match_not_equals_query_end( self ):\n-        tuple1 = ("QName", 1, 6, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertNotEquals( match1, match2 )\n-        \n-    def test_eq_match_not_equals_query_length( self ):\n-        tuple1 = ("QName", 1, 5, 6, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertNotEquals( match1, match2 )\n-        \n-    def test_eq_match_not_equals_query_length_perc( self ):\n-        tuple1 = ("QName", 1, 5, 5, 0.15, 0.2, "SName'..b'ple = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match = Match()\n-        match.setFromTuple(tuple)\n-        expString = "QName\\t1\\t5\\t5\\t%f\\t%f\\tSName\\t5\\t25\\t20\\t%f\\t%g\\t15\\t%f\\t1" % (0.1,0.2,0.15,1e-20, 87.2)\n-        obsString = match.toString()\n-        self.assertEquals(expString, obsString)\n-        \n-    def test_getPathInstance( self ):\n-        tuple = ( "QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1 )\n-        match = Match()\n-        match.setFromTuple( tuple )\n-        tuple = ( 1, "QName", 1, 5, "SName", 5, 25, 1e-20, 15, 87.2 )\n-        exp = Path()\n-        exp.setFromTuple( tuple )\n-        obs = match.getPathInstance()\n-        self.assertEqual( exp, obs )\n-        \n-    def test_getQryIsIncluded(self):\n-        tuple = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match = Match()\n-        match.setFromTuple(tuple)\n-        expString = "query QName (50 bp: 1-5) is contained in subject SName (133 bp: 5-25): id=87.20 - 0.100 - 0.200 - 0.150"\n-        obsString = match.getQryIsIncluded()\n-        self.assertEquals(expString, obsString)\n-        \n-    def test_isDoublonWith_Matchs_equals(self):\n-        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertTrue(match1.isDoublonWith(match2))\n-        \n-    def test_isDoublonWith_Matchs_unequals_on_MatchNumbers(self):\n-        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 86.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertFalse(match1.isDoublonWith(match2))\n-        \n-    def test_isDoublonWith_Matchs_unequals_on_SeqNames(self):\n-        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("QName", 1, 5, 5, 0.1, 0.2, "Name", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertFalse(match1.isDoublonWith(match2))\n-        \n-    def test_isDoublonWith_Matchs_unequals_on_Coordinates(self):\n-        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("QName", 1, 6, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertFalse(match1.isDoublonWith(match2))\n-        \n-    def test_isDoublonWith_Reversed_Matchs_equals(self):\n-        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("SName", 5, 25, 20, 0.15, 0.2, "QName", 1, 5, 5, 0.1, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertTrue(match1.isDoublonWith(match2))\n-        \n-    def test_isDoublonWith_Reversed_Matchs_unequals(self):\n-        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)\n-        match1 = Match()\n-        match1.setFromTuple(tuple1)\n-        tuple2 = ("SName", 5, 25, 20, 0.15, 0.2, "QName", 1, 6, 5, 0.1, 1e-20, 15, 87.2, 1)\n-        match2 = Match()\n-        match2.setFromTuple(tuple2)\n-        self.assertFalse(match1.isDoublonWith(match2))\n-        \n-        \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_Match ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_MatchUtils.py
--- a/commons/core/coord/test/Test_MatchUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,439 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.coord.MatchUtils import MatchUtils\n-from commons.core.coord.Match import Match\n-from commons.core.seq.BioseqDB import BioseqDB\n-\n-\n-class Test_MatchUtils( unittest.TestCase ):\n-    \n-    def test_getMatchListFromFile( self ):\n-        inFile = "dummyInFile"\n-        inFileHandler = open( inFile, "w" )\n-        inFileHandler.write( "query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n" )\n-        m1 = Match()\n-        m1.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n-        m1.write( inFileHandler )\n-        m2 = Match()\n-        m2.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName2", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n-        m2.write( inFileHandler )\n-        inFileHandler.close()\n-        \n-        lExp = [ m1, m2 ]\n-        \n-        lObs = MatchUtils.getMatchListFromFile( inFile )\n-        \n-        self.assertEquals( lExp, lObs )\n-        \n-        os.remove( inFile )\n-        \n-    def test_getDictOfListsWithSubjectAsKey( self ):\n-        m1 = Match()\n-        m1.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n-        m2 = Match()\n-        m2.setFromTuple( ("QName", 1, 5, 5, 0.1, 0.2, "SName2", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n-        lMatch = [ m1, m2 ]\n-        \n-        dExp = { "SName1": [ m1 ], "SName2": [ m2 ] }\n-        \n-        dObs = MatchUtils.getDictOfListsWithSubjectAsKey( lMatch )\n-        \n-        self.assertEquals( dExp, dObs )\n-        \n-    def test_getDictOfListsWithQueryAsKey( self ):\n-        m1 = Match()\n-        m1.setFromTuple( ("QName1", 1, 5, 5, 0.1, 0.2, "SName1", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n-        m2 = Match()\n-        m2.setFromTuple( ("QName2", 1, 5, 5, 0.1, 0.2, "SName2", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n-        m3 = Match()\n-        m3.setFromTuple( ("QName1", 1, 5, 5, 0.1, 0.2, "SName3", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1) )\n-        lMatch = [ m1, m2, m3 ]\n-        \n-        dExp = { "QName1": [ m1, m3 ], "QName2": [ m2 ] }\n-        \n-        dObs = MatchUtils.getDictOfListsWithQueryAsKey( lMatch )\n-        \n-        self.assertEquals'..b'TTCACTGGTGTGTCATGCACATTTAATAGGGGTAAGACTGAATAAAAAATGATTATTTG\\n")\n-        f.write("CATGAAATGGGGATGAGAGAGAAGGAAAGAGTTTCATCCTGGGATTCGTTTCATTCACCG\\n")\n-        f.close()\n-\n-    def _writeMatchFile2(self, fileName):\n-        f = open(fileName, "w")\n-        f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n-        f.write("header2\\t1\\t120\\t120\\t1\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n-        f.close()\n-        \n-    def _writeMatchFile3(self, fileName):\n-        f = open(fileName, "w")\n-        f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n-        f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n-        f.write("header3\\t1\\t120\\t120\\t0.99\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-68\\t238\\t100\\t1\\n")\n-        f.write("header4\\t1\\t120\\t120\\t1\\t0.94157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t67\\t1\\n")\n-        f.close()\n-        \n-    def _writeMatchFile4(self, fileName):\n-        f = open(fileName, "w")\n-        f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n-        f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n-        f.write("header3\\t120\\t220\\t120\\t0.99\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-65\\t238\\t100\\t1\\n")\n-        f.write("header4\\t1\\t120\\t120\\t1\\t0.94157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-67\\t244\\t90\\t1\\n")\n-        f.close()\n-        \n-    def _writeExpAlignFile(self,fileName):\n-        f = open(fileName, "w")\n-        f.write("header2\\t1\\t120\\tBS31790\\t19\\t138\\t3e-68\\t238.0\\t100.0\\n")\n-        f.write("header3\\t120\\t220\\tBS31790\\t19\\t138\\t3e-65\\t238.0\\t100.0\\n")\n-        f.write("header4\\t1\\t120\\tBS31790\\t19\\t138\\t3e-67\\t244.0\\t90.0\\n")\n-        f.close()\n-        \n-    def _writeMatchFile5(self,fileName):\n-        f = open(fileName, "w")\n-        f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n-        f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n-        f.write("header2\\t124\\t144\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100\\t1\\n")\n-        f.write("header3\\t120\\t220\\t120\\t0.99\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-65\\t238\\t100\\t1\\n")\n-        f.write("header4\\t1\\t120\\t120\\t1\\t0.94157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-67\\t244\\t90\\t1\\n")\n-        f.close()\n-        \n-    def _writeExpMatchFile(self,fileName):\n-        f = open(fileName, "w")\n-        f.write("query.name\\tquery.start\\tquery.end\\tquery.length\\tquery.length.%\\tmatch.length.%\\tsubject.name\\tsubject.start\\tsubject.end\\tsubject.length\\tsubject.length.%\\tE.value\\tScore\\tIdentity\\tpath\\n")\n-        f.write("header2\\t1\\t120\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100.000000\\t1\\n")\n-        f.write("header2\\t124\\t144\\t120\\t0.674157\\t0.674157\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-68\\t238\\t100.000000\\t1\\n")\n-        f.write("header3\\t120\\t220\\t120\\t0.990000\\t0.994157\\tBS31790\\t19\\t138\\t120\\t0.994157\\t3e-65\\t238\\t100.000000\\t2\\n")\n-        f.write("header4\\t1\\t120\\t120\\t1.000000\\t0.941570\\tBS31790\\t19\\t138\\t120\\t0.674157\\t3e-67\\t244\\t90.000000\\t3\\n")\n-        f.close()\n-    \n-\n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_MatchUtils ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_MergedRange.py
--- a/commons/core/coord/test/Test_MergedRange.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,119 +0,0 @@
-import unittest
-from commons.core.coord.MergedRange import MergedRange
-from commons.core.coord.Match import Match
-
-class Test_MergedRange(unittest.TestCase):
-    
-    def test_eq_True(self):
-        mr1 = MergedRange([1], 6, 10)
-        mr2 = MergedRange([1], 6, 10)
-        self.assertEquals(mr1, mr2)
-    
-    def test_eq_different_list(self):
-        mr1 = MergedRange([1], 6, 10)
-        mr2 = MergedRange([1, 2], 6, 10)
-        self.assertNotEquals(mr1, mr2)
-    
-    def test_eq_different_start(self):
-        mr1 = MergedRange([1], 5, 10)
-        mr2 = MergedRange([1], 6, 10)
-        self.assertNotEquals(mr1, mr2)
-    
-    def test_eq_different_end(self):
-        mr1 = MergedRange([1], 6, 10)
-        mr2 = MergedRange([1], 6, 11)
-        self.assertNotEquals(mr1, mr2)
-
-    def test_isOverlapping_no( self ):
-        mr1 = MergedRange([1], 6, 10)
-        mr2 = MergedRange([2], 16, 20)
-        exp = False
-        obs = mr1.isOverlapping( mr2 )
-        self.assertEquals( exp, obs )
-        
-    def test_isOverlapping_yes( self ):
-        mr1 = MergedRange([1], 6, 10)
-        mr2 = MergedRange([2], 5, 20)
-        exp = True
-        obs = mr1.isOverlapping( mr2 )
-        self.assertEquals( exp, obs )
-
-    def test_isOverlapping_range1_before_range2( self ):
-        mr1 = MergedRange([1], 6, 10)
-        mr2 = MergedRange([2], 8, 15)
-        exp = True
-        obs = mr1.isOverlapping( mr2 )
-        self.assertEquals( exp, obs )
-        
-    def test_isOverlapping_range1_after_range2( self ):
-        mr1 = MergedRange([1], 6, 10)
-        mr2 = MergedRange([2], 1, 8)
-        exp = True
-        obs = mr1.isOverlapping( mr2 )
-        self.assertEquals( exp, obs )
-        
-    def test_isOverlapping_range1_equal_range2( self ):
-        mr1 = MergedRange([1], 6, 10)
-        mr2 = MergedRange([2], 6, 10)
-        exp = True
-        obs = mr1.isOverlapping( mr2 )
-        self.assertEquals( exp, obs )
-    
-    def test_merge_mr1_with_mr2(self):
-        otherMergedRange = MergedRange()
-        otherMergedRange._lId.append(3)
-        otherMergedRange._start = 1
-        otherMergedRange._end = 10
-        
-        mr1 = MergedRange()
-        mr1._lId.append(1)
-        mr1._start = 6
-        mr1._end = 10
-        
-        mr2 = MergedRange([2], 1, 15)
-        mr1.merge(mr2)
-        
-        exp = MergedRange([1, 2], 1, 15)
-        self.assertEquals(exp, mr1)
-        
-    def test_merge_mr2_with_mr1(self):
-        mr1 = MergedRange([1], 6, 10)
-        mr2 = MergedRange([2], 1, 15)
-        mr2.merge(mr1)
-        exp = MergedRange([1, 2], 1, 15)
-        self.assertEquals(exp, mr2)
-        
-    def test_setFromMatch(self):
-        tuple = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)
-        iMatch = Match()
-        iMatch.setFromTuple(tuple)
-        
-        expMergedRange = MergedRange([1], 1, 5)
-        obsMergedRange = MergedRange()
-        obsMergedRange.setFromMatch(iMatch)
-        
-        self.assertEquals(expMergedRange, obsMergedRange)
-    
-    def test_getMergedRangeListFromMatchList(self):
-        tuple1 = ("QName", 1, 5, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 1)
-        iMatch1 = Match()
-        iMatch1.setFromTuple(tuple1)
-        tuple2 = ("QName", 10, 15, 5, 0.1, 0.2, "SName", 5, 25, 20, 0.15, 1e-20, 15, 87.2, 2)
-        iMatch2 = Match()
-        iMatch2.setFromTuple(tuple2)
-        lMatch = [iMatch1, iMatch2]
-        
-        explMergedRange = [MergedRange([1], 1, 5), MergedRange([2], 10, 15)]
-        obslMergedRange = MergedRange.getMergedRangeListFromMatchList(lMatch)
-
-        self.assertEquals(explMergedRange, obslMergedRange)
-    
-    def test_getMergedRangeListFromMatchList_empty_list(self):
-        lMatch = []
-        explMergedRange = []
-        obslMergedRange = MergedRange.getMergedRangeListFromMatchList(lMatch)
-
-        self.assertEquals(explMergedRange, obslMergedRange)
-        
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_Path.py
--- a/commons/core/coord/test/Test_Path.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,146 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-import unittest
-from commons.core.coord.Path import Path
-from commons.core.coord.Align import Align
-from commons.core.coord.Set import Set
-
-
-class Test_Path( unittest.TestCase ):
-    
-    def setUp( self ):
-        self._path = Path()
-        
-    def test_setFromTuple( self ):
-        line = "1\tchr1\t1\t10\tTE2\t11\t17\t1e-20\t30\t90.2"
-        self._path.setFromTuple( line.split("\t") )
-        self.assertEqual( self._path.id, 1 )
-        self.assertEqual( self._path.range_query.seqname, "chr1" )
-        self.assertEqual( self._path.range_query.start, 1 )
-        self.assertEqual( self._path.range_query.end, 10 )
-        self.assertEqual( self._path.range_subject.seqname, "TE2" )
-        self.assertEqual( self._path.range_subject.start, 11 )
-        self.assertEqual( self._path.range_subject.end, 17 )
-        self.assertEqual( self._path.e_value, float("1e-20") )
-        self.assertEqual( self._path.score, float("30") )
-        self.assertEqual( self._path.identity, float("90.2") )
-        
-    def test___eq__( self ):
-        self._path.setFromString( "1\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t30\t90.2\n" )
-        o = Path()
-        o.setFromString( "1\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t30\t90.2\n" )
-        self.assertEqual( self._path,  o )
-        o.setFromString( "2\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t30\t90.2\n" )
-        self.assertNotEqual( self._path,  o )
-        o.setFromString( "1\tchr1\t1\t6\tTE2\t11\t16\t1e-20\t3000000\t90.2\n" )
-        self.assertNotEqual( self._path,  o )
-        
-    def test_canMerge( self ):
-        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
-        self._path.setFromTuple(tuple)
-        tuple = ("2", "chr1","2", "9","TE2","10","13","1e-20","30","90.2")
-        o = Path()
-        o.setFromTuple(tuple)
-        self.assertTrue(self._path.canMerge(o))
-        
-    def test_canMerge_on_same_id ( self ): 
-        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
-        self._path.setFromTuple(tuple)
-        tuple = ("1", "chr1","2", "9","TE2","10","13","1e-20","30","90.2")
-        o = Path()
-        o.setFromTuple(tuple)
-        self.assertFalse(self._path.canMerge(o))
-        
-    def test_canMerge_on_same_chr( self ):     
-        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
-        self._path.setFromTuple(tuple)
-        tuple = ("2", "chr2","2", "9","TE2","10","13","1e-20","30","90.2")
-        o = Path()
-        o.setFromTuple(tuple)
-        self.assertFalse(self._path.canMerge(o))
-        
-    def test_canMerge_on_diff_subj( self ):      
-        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
-        self._path.setFromTuple(tuple)
-        tuple = ("2", "chr1","2", "9","TE3","10","13","1e-20","30","90.2")
-        o = Path()
-        o.setFromTuple(tuple)
-        self.assertFalse(self._path.canMerge(o)) 
-        
-    def test_canMerge_on_queries_that_do_not_overlap( self ):
-        tuple = ("1", "chr1","5", "11","TE2","11","17","1e-20","30","90.2")
-        self._path.setFromTuple(tuple)
-        tuple = ("2", "chr1","1", "4","TE2","10","13","1e-20","30","90.2")
-        o = Path()
-        o.setFromTuple(tuple)
-        self.assertFalse(self._path.canMerge(o)) 
-        
-    def test_canMerge_on_subjects_that_do_not_overlap( self ):    
-        tuple = ("1", "chr1","1", "10","TE2","11","17","1e-20","30","90.2")
-        self._path.setFromTuple(tuple)
-        tuple = ("2", "chr1","2", "9","TE2","1","10","1e-20","30","90.2")
-        o = Path()
-        o.setFromTuple(tuple)
-        self.assertFalse(self._path.canMerge(o))
-        
-    def test_getSubjectAsSetOfQuery( self ):
-        tuple = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2")
-        self._path.setFromTuple(tuple)
-        exp = Set(1,"TE2","chr1",1,10)
-        obs = self._path.getSubjectAsSetOfQuery()
-        self.assertEqual( exp, obs )
-        
-    def test_getSubjectAsSetOfQuery_on_neg_strand( self ):
-        tuple = ("1","chr1","10","1","TE2","11","17","1e-20","30","90.2")
-        self._path.setFromTuple(tuple)
-        exp = Set(1,"TE2","chr1",10,1)
-        obs = self._path.getSubjectAsSetOfQuery()
-        self.assertEqual( exp, obs )
-        
-    def test_toString( self ):
-        self._path.setFromString( "1\tchr1\t1\t10\tTE3\t11\t17\t1e-20\t30\t85.2\n" )
-        exp = "1\tchr1\t1\t10\tTE3\t11\t17\t%g\t30\t%f" % ( 1e-20, 85.2 )
-        obs = self._path.toString()
-        self.assertEqual( obs, exp )
-        
-    def test_getAlignInstance( self ):
-        self._path.setFromTuple( ( "2", "chr3", "250", "151", "seq5", "1", "100", "1e-32", "147", "87.9" ) )
-        expAlign = Align()
-        expAlign.setFromTuple( ( "chr3", "151", "250", "seq5", "100", "1", "1e-32", "147", "87.9" ) )
-        obsAlign = self._path.getAlignInstance()
-        self.assertEqual( expAlign, obsAlign )
-        
-        
-test_suite = unittest.TestSuite()
-test_suite.addTest( unittest.makeSuite( Test_Path ) )
-if __name__ == "__main__":
-    unittest.TextTestRunner(verbosity=2).run( test_suite )
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_PathUtils.py
--- a/commons/core/coord/test/Test_PathUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1667 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-import time\n-from commons.core.coord.PathUtils import PathUtils\n-from commons.core.coord.Path import Path\n-from commons.core.coord.Set import Set\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.coord.Range import Range\n-from commons.core.coord.Align import Align\n-\n-\n-class Test_PathUtils ( unittest.TestCase ):\n-\n-    def test_getSetListFromQueries( self ):\n-        set1 = Set(1,"TE2","chr1",1,10)\n-        set2 = Set(1,"TE2","chr1",10,1)\n-        set3 = Set(1,"TE3","chr4",12,22)\n-    \n-        expList = [set1, set2, set3]\n-\n-        tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2")\n-        tuple2 = ("1","chr1","10","1","TE2","11","17","1e-20","30","90.2")\n-        tuple3 = ("1","chr4","12","22","TE3","11","17","1e-20","30","90.2")\n-\n-        pathList = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] )\n-\n-        obsList = PathUtils.getSetListFromQueries( pathList )\n-\n-        self.assertEquals( expList, obsList )\n-        \n-    \n-    def test_getSetListFromQueries_on_empty_list( self ):\n-        expList = []\n-        obsList = PathUtils.getSetListFromQueries( [] )\n-\n-        self.assertEquals( expList, obsList )\n-        \n-        \n-    def test_getSetListFromQueries_on_list_size1( self ):\n-        set1 = Set(1,"TE2","chr1",1,10)\n-        \n-        expList = [set1]\n-        \n-        tuple1 = ("1","chr1","1","10","TE2","11","17","1e-20","30","90.2")\n-        path1 = Path()\n-        path1.setFromTuple(tuple1)\n-        \n-        pathList = [path1]\n-        obsList = PathUtils.getSetListFromQueries( pathList )\n-        \n-        self.assertEquals( expList, obsList )\n-        \n-        \n-    def test_getRangeListFromSubjects_initiallyOrdered_directStrand( self ):\n-        tuple1 = ("1","chr1","1","10","TE2","1","10","1e-20","30","90.2")\n-        tuple2 = ("1","chr1","21","30","TE2","11","20","1e-20","30","90.2")\n-        tuple3 = ("1","chr1","41","50","TE2","21","30","1e-20","30","90.2")\n-        lPaths = self._makePathListFromTupleList( [ tuple1, tuple2, tuple3 ] )\n-        \n-        iSet1 = Range( "TE2", 1, 10 )\n-        iSet2 = Range( "TE2", 11, 20 )\n-        iSet3 = Range( "TE2", 21, 30 )\n-        lExp = [ iSet1, iSet2, iSet3 ]\n-        \n-        lObs = PathUtils.getRangeListFromSubjects( lPaths )\n-        \n-        self.assertEquals( lExp, lObs )\n-'..b'\\t4641\\t0\\t585\\t97.3607\\n")\n-        f.write("9\\taurora-element\\t2265\\t2483\\tAtha5Chr4_Pals_Piler_3595_62_MAP_3\\t3999\\t4218\\t0\\t361\\t96.347\\n")\n-        f.write("10\\taurora-element\\t2834\\t4045\\tAtha5Chr4_Pals_Piler_3595_69_MAP_3\\t4800\\t6011\\t0\\t2074\\t97.0248\\n")\n-        f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t205\\t317\\t8.5e-37\\t157\\t93.75\\n")\n-        f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t305\\t417\\t8.5e-37\\t157\\t93.75\\n")\n-        f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t305\\t417\\t8.5e-37\\t157\\t93.75\\n")\n-        f.close()            \n-        \n-        obsPathFile = "obsDummyPathFile"\n-        PathUtils.removeInPathFileDuplicatedPathOnQueryNameQueryCoordAndSubjectName(pathFile, obsPathFile)\n-        \n-        expPathFile = "expDummyPathFile"\n-        f = open(expPathFile, "w")\n-        f.write("1\\tG4\\t1\\t3856\\tAtha5Chr4_Pals_Piler_3590_69_MAP_3\\t1\\t3856\\t0\\t7642\\t99.974100\\n")\n-        f.write("2\\trooA\\t1\\t386\\tAtha5Chr4_Pals_Piler_3589_69_MAP_3\\t1\\t386\\t6.3e-220\\t758\\t99.481900\\n")\n-        f.write("3\\trooA\\t7236\\t7621\\tAtha5Chr4_Pals_Piler_3536_69_MAP_3\\t1\\t386\\t6.3e-220\\t758\\t99.481900\\n")\n-        f.write("4\\trooA\\t387\\t7235\\tAtha5Chr4_Pals_Piler_3596_69_MAP_3\\t1\\t6849\\t0\\t13580\\t99.985400\\n")\n-        f.write("5\\taurora-element\\t4046\\t4257\\tAtha5Chr4_Pals_Piler_3540_69_MAP_3\\t1\\t204\\t6.1e-80\\t300\\t96.568600\\n")\n-        f.write("6\\taurora-element\\t274\\t381\\tAtha5Chr4_Pals_Piler_3595_23_MAP_3\\t177\\t284\\t0\\t191\\t97.222200\\n")\n-        f.write("6\\taurora-element\\t116\\t287\\tAtha5Chr4_Pals_Piler_3595_30_MAP_3\\t3\\t170\\t0\\t290\\t98.809500\\n")\n-        f.write("7\\taurora-element\\t393\\t902\\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\\t1467\\t1945\\t0\\t873\\t97.244100\\n")\n-        f.write("7\\taurora-element\\t1387\\t2271\\tAtha5Chr4_Pals_Piler_3595_31_MAP_3\\t276\\t10780\\t0\\t1576\\t97.624400\\n")\n-        f.write("8\\taurora-element\\t2486\\t2828\\tAtha5Chr4_Pals_Piler_3595_50_MAP_3\\t4301\\t4641\\t0\\t585\\t97.360700\\n")\n-        f.write("9\\taurora-element\\t2265\\t2483\\tAtha5Chr4_Pals_Piler_3595_62_MAP_3\\t3999\\t4218\\t0\\t361\\t96.347000\\n")\n-        f.write("10\\taurora-element\\t2834\\t4045\\tAtha5Chr4_Pals_Piler_3595_69_MAP_3\\t4800\\t6011\\t0\\t2074\\t97.024800\\n")\n-        f.write("11\\taurora-element\\t2\\t113\\tAtha5Chr4_Pals_Piler_3598_69_MAP_3\\t205\\t317\\t8.5e-37\\t157\\t93.750000\\n")\n-        f.close()\n-        \n-        self.assertTrue(FileUtils.are2FilesIdentical(expPathFile, obsPathFile))\n-        \n-        os.remove(pathFile)\n-        os.remove(expPathFile)\n-        os.remove(obsPathFile)\n-        \n-        \n-    def test_getPathListWithoutDuplicatesOnQueryCoord(self):\n-        iPath1 = Path(1, Range("qry1",398,491), Range("sbj1",10,112), 0.0, 10, 98.7)\n-        iPath2 = Path(1, Range("qry1",451,492), Range("sbj1",124,169), 0.0, 10, 98.7)\n-        iPath3 = Path(1, Range("qry1",451,492), Range("sbj1",249,294), 0.0, 10, 98.7)\n-        lPaths = [iPath3, iPath2, iPath1]\n-       \n-        obslPaths = PathUtils.getPathListWithoutDuplicatesOnQueryCoord(lPaths)\n-       \n-        explPaths = [iPath1, iPath3]\n-        \n-        self.assertEquals(explPaths, obslPaths)\n-        \n-                \n-    def _makePathListFromTupleList ( self, tupleList ):\n-        pathList = []\n-        for tuple in tupleList:\n-            path = Path()\n-            path.setFromTuple(tuple)\n-            pathList.append(path)\n-        return pathList\n-    \n-    def _makePathListFromStringList (self, stringList):\n-        pathList = []\n-        for string in stringList:\n-            path = Path()\n-            path.setFromString(string)\n-            pathList.append(path)\n-        return pathList\n-    \n-    def _show (self, list):\n-        for item in list:\n-            print item.toString()\n-            \n-            \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_PathUtils ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_Range.py
--- a/commons/core/coord/test/Test_Range.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,698 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-from commons.core.coord.Range import Range\n-from commons.core.coord.Range import getBin, getIdx\n-\n-\n-class Test_Range( unittest.TestCase ):\n-    \n-    def setUp(self):\n-        self._range = Range()\n-        \n-    def test_setFromString(self):\n-        line = "chunk1\\t190000\\t390000"\n-        self._range.setFromString( line )\n-        self.assertEqual( self._range.seqname, "chunk1" )\n-        self.assertEqual( self._range.start, 190000 )\n-        self.assertEqual( self._range.end, 390000 )\n-        \n-        line = "chunk1\\t190000\\t390000\\n"\n-        self._range.setFromString( line )\n-        self.assertEqual( self._range.seqname, "chunk1" )\n-        self.assertEqual( self._range.start, 190000 )\n-        self.assertEqual( self._range.end, 390000 )\n-        \n-        line = "chunk1;190000;390000"\n-        self._range.setFromString( line, ";" )\n-        self.assertEqual( self._range.seqname, "chunk1" )\n-        self.assertEqual( self._range.start, 190000 )\n-        self.assertEqual( self._range.end, 390000 )\n-        \n-    def test_setFromTuple(self):\n-        tuple = ("chunk1","190000","390000")\n-        self._range.setFromTuple( tuple)\n-        \n-        self.assertEqual( self._range.seqname, "chunk1" )\n-        self.assertEqual( self._range.start, 190000 )\n-        self.assertEqual( self._range.end, 390000 )\n-        \n-    def test___eq__(self):\n-        self._range.setFromString( "chunk1\\t190000\\t390000\\n" )\n-        o = Range()\n-        o.setFromString( "chunk1\\t190000\\t390000\\n" )\n-        self.assertEqual( self._range, o )\n-        \n-        o.setFromString( "chunk1\\t190000\\t39" )\n-        self.assertNotEquals( self._range, o )\n-        \n-        o.setFromString( "chromosome1\\t190000\\t390000" )\n-        self.assertNotEquals( self._range, o )\n-        \n-        o.setFromString( "chunk1\\t390000\\t190000" )\n-        self.assertNotEquals( self._range, o )\n-        \n-        o.setFromString( "chromosome1\\t390000\\t190000" )\n-        self.assertNotEquals( self._range, o )\n-        \n-    def test_getMin(self):\n-        self._range.setFromTuple( ("chunk1", 190000, 390000) )\n-        expMin = 190000\n-        obsMin = self._range.getMin() \n-        self.assertTrue(expMin, obsMin)\n-        \n-    def test_getMax(self):\n-        self._range.setFromTuple( ("chunk1", 190000, 390000) )\n-        expMax = 390000\n-        obsMa'..b'2)\n-        obsRange1 = range1\n-        \n-        self.assertEquals(expRange1, obsRange1)\n-        self.assertEquals(expReturnedRange, obsReturnedRange)\n-        \n-    def test_getIdx(self):\n-        self.assertEqual(getIdx(1000,3),1000001)\n-        self.assertEqual(getIdx(999,3),1000000)\n-        self.assertEqual(getIdx(2000,3),1000002)\n-        self.assertEqual(getIdx(2000,4),2000000)\n-        self.assertEqual(getIdx(2000,5),3000000)\n-        self.assertEqual(getIdx(20000000,6),4000000)\n-        self.assertEqual(getIdx(20000000,5),3000200)\n-        self.assertEqual(getIdx(20000000,4),2002000)\n-        self.assertEqual(getIdx(20000000,3),1020000)\n-        \n-    def test_getBin_bin_level_9(self):\n-        tuple1 = ("chunk1", 190000000, 390000000)\n-        range1 =Range()\n-        range1.setFromTuple(tuple1)\n-        \n-        expRes = 100000000.0\n-        obsRes = range1.getBin()\n-        \n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_8(self):\n-        tuple1 = ("chunk1", 19000000, 39000000)\n-        range1 =Range()\n-        range1.setFromTuple(tuple1)\n-        \n-        expRes = 100000000.0\n-        obsRes = range1.getBin()\n-        \n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_7(self):\n-        tuple1 = ("chunk1", 1900000, 3900000)\n-        range1 =Range()\n-        range1.setFromTuple(tuple1)\n-        \n-        expRes = 10000000.0\n-        obsRes = range1.getBin()\n-        \n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_6(self):\n-        tuple1 = ("chunk1", 190000, 390000)\n-        range1 =Range()\n-        range1.setFromTuple(tuple1)\n-        \n-        expRes = 1000000.0\n-        obsRes = range1.getBin()\n-        \n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_5(self):\n-        tuple = ("chunk1", 19000, 39000)\n-        range =Range()\n-        range.setFromTuple(tuple)\n-        expRes = 100000.0\n-        obsRes = range.getBin()\n-        \n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_4(self):\n-        tuple = ("chunk1", 1900, 3900)\n-        range =Range()\n-        range.setFromTuple(tuple)\n-        \n-        expRes = 10000.0\n-        obsRes = range.getBin()\n-        \n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_3(self):\n-        tuple = ("chunk1", 190, 390)\n-        range =Range()\n-        range.setFromTuple(tuple)\n-        \n-        expRes = 1000.0\n-        obsRes = range.getBin()\n-        \n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_2(self):\n-        tuple = ("chunk1", 19, 39)\n-        range =Range()\n-        range.setFromTuple(tuple)\n-        \n-        expRes = 1000.0\n-        obsRes = range.getBin()\n-        \n-        self.assertEquals(expRes, obsRes)\n-        \n-    def test_getBin_bin_level_1(self):\n-        tuple = ("chunk1", 1, 3)\n-        range =Range()\n-        range.setFromTuple(tuple)\n-        \n-        expRes = 1000.0\n-        obsRes = range.getBin()\n-        \n-        self.assertEquals(expRes, obsRes)\n-        \n-        \n-    def test_getBin_function(self):\n-        expBin = 2L\n-        obsBin = getBin(200, 2)\n-        \n-        self.assertEquals(expBin, obsBin)\n-        \n-    def test_findIdx(self):\n-        o = Range()\n-        o.setFromString( "chunk1\\t1000\\t2000\\n" )\n-        self.assertEqual(o.findIdx(),2000000)\n-        \n-        o.setFromString( "chunk1\\t2000\\t1000\\n" )       \n-        self.assertEqual(o.findIdx(),2000000)\n-        \n-        o.setFromString( "chunk1\\t200\\t999\\n" )       \n-        self.assertEqual(o.findIdx(),1000000)\n-        \n-        o.setFromString( "chunk1\\t1\\t20000000\\n" )       \n-        self.assertEqual(o.findIdx(),4000000)\n-        \n-        \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_Range ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_Set.py
--- a/commons/core/coord/test/Test_Set.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,282 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-from commons.core.coord.Set import Set\n-from commons.core.coord.Map import Map\n-\n-\n-class Test_Set( unittest.TestCase ):\n-    \n-    def test__eq__sets_equals(self):\n-        set1 = Set( 1, "set1", "seq1", 1, 2 )\n-        set2 = Set( 1, "set1", "seq1", 1 ,2 )   \n-        self.assertEquals( set1, set2 )\n-        \n-    def test__eq__sets_not_equals_ids(self):\n-        set1 = Set( 1, "set1", "seq1", 1, 2 )\n-        set2 = Set( 2, "set1", "seq1", 1 ,2 )   \n-        self.assertNotEquals( set1, set2 )\n-        \n-    def test__eq__sets_not_equals_name(self):\n-        set1 = Set( 1, "set1", "seq1", 1, 2 )\n-        set2 = Set( 1, "set2", "seq1", 1 ,2 )   \n-        self.assertNotEquals( set1, set2 )\n-    \n-    def test__eq__on_empty_set(self):\n-        set1 = Set()\n-        set2 = Set()\n-        self.assertEquals( set1, set2 )\n-        \n-    def test_setFromTuple_equals_instances(self):\n-        tuple = ( 1, "set1", "seq1", 1, 2 )\n-        obsSet = Set()\n-        obsSet.setFromTuple(tuple)\n-        expSet = Set( 1, "set1", "seq1", 1, 2 )\n-        self.assertEquals( expSet, obsSet )\n-   \n-    def test_setFromTuple_not_equals_instances(self):\n-        tuple = ( 1, "set1", "seq1", 1, 2 )\n-        obsSet = Set()\n-        obsSet.setFromTuple(tuple)\n-        expSet = Set( 2, "set1", "seq1", 1, 2 )\n-        self.assertNotEquals( expSet, obsSet )\n-        \n-    def test_read_empty_line_file(self):\n-        fileName = "dummyFile"\n-        \n-        os.system(" touch " + fileName)\n-        \n-        fileHandler = open(fileName, "r")\n-        \n-        obsSet = Set()\n-        \n-        obsRes = obsSet.read( fileHandler )\n-        expRes = 0\n-        \n-        fileHandler.close()\n-        os.remove(fileName)\n-        \n-        self.assertEquals( expRes, obsRes )\n-\n-    def test_read_one_line_file(self):\n-        line = ( "1\\tset1\\tseq1\\t1\\t2" )\n-        fileName = "dummyFile"\n-        \n-        fileHandler = open( fileName, "w" )\n-        fileHandler.write( line )\n-        fileHandler.close()\n-        \n-        fileHandler = open( fileName, "r" )\n-        \n-        tuple = line.split("\\t")\n-        expSet = Set()\n-        expSet.setFromTuple(tuple)\n-        \n-        obsSet = Set()\n-        \n-        expRes = 1\n-        obsRes = obsSet.read(fileHandler)\n-        \n-        fileHandler.close()\n-        os.remove(fileName)\n-  '..b'et\n-        \n-        self.assertEquals( expSet, obsSet)\n-        \n-    def test_merge_first_id_smaller_than_second_id(self):\n-        firstSet = Set( 1, "set1", "seq1", 10, 40 )\n-        secondSet = Set( 2, "set2", "seq1", 20, 60 )\n-        \n-        firstSet.merge( secondSet )\n-        \n-        expSet = Set( 1, "set1", "seq1", 10, 60)\n-        obsSet = firstSet\n-        \n-        self.assertEquals( expSet, obsSet)\n-\n-    def test_merge_first_id_equals_second_id(self):\n-        firstSet = Set( 1, "set1", "seq1", 10, 40 )\n-        secondSet = Set( 1, "set2", "seq1", 20, 60 )\n-        \n-        firstSet.merge( secondSet )\n-        \n-        expSet = Set( 1, "set1", "seq1", 10, 60)\n-        obsSet = firstSet\n-        \n-        self.assertEquals( expSet, obsSet)\n-    \n-    def test_merge_different_seqnames(self):\n-        firstSet = Set( 2, "set1", "seq1", 10, 40 )\n-        secondSet = Set( 1, "set1", "seq2", 20, 60 )\n-        expSet = Set( 2, "set1", "seq1", 10, 40 )\n-        firstSet.merge( secondSet )\n-        obsSet = firstSet\n-        self.assertEquals( expSet, obsSet )\n-        \n-    def test_diff_on_empty_sets(self):\n-        firstSet = Set()\n-        firstSet.seqname = "seq1"\n-        secondSet = Set()\n-        secondSet.seqname = "seq2"\n-        \n-        obsSet = firstSet.diff( secondSet )\n-        expSet = Set()\n-        \n-        self.assertEquals( expSet, obsSet )\n-    \n-    def test_diff(self):\n-        firstSet = Set( 2, "set1", "seq1", 10, 80 )\n-        secondSet = Set( 1, "set2", "seq1", 20, 60 )\n-        \n-        expSet1 = Set( 2, "set1", "seq1", 10, 19 )\n-        expSet2 = Set( 2, "set1", "seq1", 61, 80 )        \n-        \n-        obsSet2 = firstSet.diff( secondSet )\n-        obsSet1 = firstSet\n-                \n-        self.assertEquals( expSet1, obsSet1 ) \n-        self.assertEquals( expSet2, obsSet2 )\n-        \n-    def test_diff_reverse(self):\n-        firstSet = Set( 2, "set1", "seq1", 20, 60 )\n-        secondSet = Set( 1, "set2", "seq1", 10, 80 )\n-        \n-        expSet1 = Set( 2, "set1", "seq1", 0, 0 )\n-        expSet2 = Set( )        \n-        \n-        obsSet2 = firstSet.diff( secondSet )\n-        obsSet1 = firstSet\n-                \n-        self.assertEquals( expSet1, obsSet1 ) \n-        self.assertEquals( expSet2, obsSet2 )\n-        \n-    def test_diff_list1_overlap_end_list2(self):\n-        firstSet = Set( 2, "set1", "seq1", 20, 100 )\n-        secondSet = Set( 1, "set2", "seq1", 10, 80 )\n-        \n-        expSet1 = Set( 2, "set1", "seq1", 81, 100 )  \n-        expSet2 = Set( )             \n-        \n-        obsSet2 = firstSet.diff( secondSet )\n-        obsSet1 = firstSet\n-                \n-        self.assertEquals( expSet1, obsSet1 ) \n-        self.assertEquals( expSet2, obsSet2 )\n-        \n-    def test_diff_with_empty_set1(self):\n-        set2 = Set( 1, "set1", "seq1", 2, 45 )\n-        set1 = Set( )\n-        \n-        expSet1 = Set( )\n-        expSet2 = Set( )\n-        \n-        obsSet2 = set1.diff( set2 )\n-        obsSet1 = set1\n-        \n-        self.assertEquals( expSet1, obsSet1 ) \n-        self.assertEquals( expSet2, obsSet2 )\n-        \n-    def test_diff_list2_overlap_end_list1(self):\n-        firstSet = Set( 2, "set1", "seq1", 10, 70 )\n-        secondSet = Set( 1, "set2", "seq1", 40, 100 )\n-        \n-        expSet1 = Set( 2, "set1", "seq1", 10, 39 )\n-        expSet2 = Set( )        \n-        \n-        obsSet2 = firstSet.diff( secondSet )\n-        obsSet1 = firstSet\n-                \n-        self.assertEquals( expSet1, obsSet1 ) \n-        self.assertEquals( expSet2, obsSet2 )\n-        \n-    def test_set2map(self):\n-        set = Set( 1, "set", "seq", 1, 2 )\n-        \n-        expMap = Map( "set::1", "seq", 1, 2 )\n-        obsMap = set.set2map()\n-        \n-        self.assertEquals( expMap, obsMap )\n-    \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_Set ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_SetUtils.py
--- a/commons/core/coord/test/Test_SetUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1689 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-import time\n-from commons.core.coord.Set import Set\n-from commons.core.coord.Map import Map\n-from commons.core.coord.SetUtils import SetUtils\n-from commons.core.utils.FileUtils import FileUtils\n-\n-\n-class Test_SetUtils( unittest.TestCase ):\n-    \n-    def test_changeIdInList_on_empty_list(self):\n-        lSets = []\n-        SetUtils.changeIdInList( lSets , 1 )\n-        obsLSets = lSets\n-        expLSets = []\n-        self.assertEquals( expLSets , obsLSets )\n-        \n-    def test_changeIdInList_on_list_size_one(self):\n-        set1 = Set( 1, "set1", "seq1", 1, 2 )\n-        lSets = [ set1 ]\n-        SetUtils.changeIdInList( lSets , 9 )\n-        obsLSets = lSets\n-        set1 = Set( 9, "set1", "seq1", 1, 2 )\n-        expLSets = [ set1 ]\n-        self.assertEquals( expLSets , obsLSets )\n-        \n-    def test_changeIdInList(self):\n-        set1 = Set( 1, "set1", "seq1", 1, 2 )\n-        set2 = Set( 2, "set2", "seq2", 2, 3 )\n-        lSets = [ set1, set2 ]\n-        SetUtils.changeIdInList( lSets , 9 )\n-        obsLSets = lSets\n-        set1 = Set( 9, "set1", "seq1", 1, 2 )\n-        set2 = Set( 9, "set2", "seq2", 2, 3 )\n-        expLSets = [ set1, set2 ]\n-        \n-        self.assertEquals( expLSets , obsLSets )\n-        \n-    def test_getOverlapLengthBetweenLists_all_list_are_empty (self):\n-        lSets1 = []\n-        lSets2 = []\n-        \n-        expOverlapSize = 0\n-        obsOverlapSize = SetUtils.getOverlapLengthBetweenLists( lSets1, lSets2 )\n-        \n-        self.assertEquals( expOverlapSize, obsOverlapSize )\n-        \n-    def test_getOverlapLengthBetweenLists_list1_empty_list2_size_one (self):\n-        lSets1 = []\n-        lSets2 = [ Set( 9, "set1", "seq1", 1, 2 ) ]\n-        \n-        expOverlapSize = 0\n-        obsOverlapSize = SetUtils.getOverlapLengthBetweenLists( lSets1, lSets2 )\n-        \n-        self.assertEquals( expOverlapSize, obsOverlapSize )\n-        \n-    def test_getOverlapLengthBetweenLists_list1_empty_list2_size_two (self):\n-        lSets1 = []\n-        lSets2 = [ Set( 9, "set1", "seq1", 1, 2 ), Set( 9, "set2", "seq2", 2, 3 ) ]\n-        \n-        expOverlapSize = 0\n-        obsOverlapSize = SetUtils.getOverlapLengthBetweenLists( lSets1, lSets2 )\n-        \n-        self.assertEquals( expOverlapSize, obsOverlapSize )\n-        \n-    def test_getOverlapLengthBetweenLists_list1_si'..b'\n-        obsLSet = SetUtils.getSetListFromFile(file)\n-        os.remove(file)\n-        self.assertEqual( expLSet, obsLSet )\n-        \n-        \n-    def test_convertSetFileIntoMapFile( self ):\n-        setFile = "dummySetFile"\n-        setFileHandler = open( setFile, "w" )\n-        setFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n-        setFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n-        setFileHandler.close()\n-        \n-        expFile = "dummyExpFile"\n-        expFileHandler = open( expFile, "w" )\n-        expFileHandler.write( "seq31\\tchr1\\t151\\t250\\n" )\n-        expFileHandler.write( "seq27\\tchr2\\t301\\t500\\n" )\n-        expFileHandler.close()\n-        \n-        obsFile = "dummyObsFile"\n-        \n-        SetUtils.convertSetFileIntoMapFile( setFile, obsFile )\n-        \n-        self.assertTrue( FileUtils.are2FilesIdentical( expFile, obsFile ) )\n-        \n-        for f in [ setFile, expFile, obsFile ]:\n-            os.remove( f )\n-            \n-            \n-    def test_getDictOfListsWithSeqnameAsKey_empty( self ):\n-        lSets = []\n-        dExp = {}\n-        dObs = SetUtils.getDictOfListsWithSeqnameAsKey( lSets )\n-        self.assertEquals( dExp, dObs )\n-            \n-            \n-    def test_getDictOfListsWithSeqnameAsKey( self ):\n-        lSets = [ Set( 1, "TE3", "chr2", 10, 50 ),\n-                 Set( 2, "gene74", "chr1", 31, 800 ),\n-                 Set( 3, "TE1", "chr1", 1, 30 ) ]\n-        dExp = { "chr1": [ Set( 2, "gene74", "chr1", 31, 800 ),\n-                          Set( 3, "TE1", "chr1", 1, 30 ) ],\n-                "chr2": [ Set( 1, "TE3", "chr2", 10, 50 ) ] }\n-        dObs = SetUtils.getDictOfListsWithSeqnameAsKey( lSets )\n-        self.assertEquals( dExp, dObs )\n-        \n-        \n-    def test_filterOnLength( self ):\n-        lSets = [ Set( 1, "TE3", "chr2", 10, 50 ),\n-                 Set( 2, "gene74", "chr1", 31, 800 ),\n-                 Set( 3, "TE1", "chr1", 1, 30 ) ]\n-        lExp = [ Set( 2, "gene74", "chr1", 31, 800 ) ]\n-        lObs = SetUtils.filterOnLength( lSets, 100 )\n-        self.assertEqual( lExp, lObs )\n-        \n-        \n-    def test_getListOfNames( self ):\n-        setFile = "dummySetFile"\n-        setFileHandler = open( setFile, "w" )\n-        setFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n-        setFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n-        setFileHandler.close()\n-        \n-        lExp = [ "seq31", "seq27" ]\n-        lObs = SetUtils.getListOfNames( setFile )\n-        \n-        self.assertEquals( lExp, lObs )\n-        \n-        os.remove( setFile )\n-        \n-        \n-    def test_getDictOfDictsWithNamesThenIdAsKeyFromFile( self ):\n-        setFile = "dummySetFile"\n-        setFileHandler = open( setFile, "w" )\n-        setFileHandler.write( "1\\tseq31\\tchr1\\t151\\t250\\n" )\n-        setFileHandler.write( "3\\tseq27\\tchr3\\t1\\t100\\n" )\n-        setFileHandler.write( "2\\tseq27\\tchr2\\t301\\t500\\n" )\n-        setFileHandler.write( "2\\tseq27\\tchr2\\t601\\t650\\n" )\n-        setFileHandler.close()\n-        \n-        dExp = { "seq31": { 1: [ Set( 1, "seq31", "chr1", 151, 250 ) ] },\n-                "seq27": { 2: [ Set( 2, "seq27", "chr2", 301, 500 ),\n-                               Set( 2, "seq27", "chr2", 601, 650 ) ],\n-                               3: [ Set( 3, "seq27", "chr3", 1, 100 ) ]\n-                               }\n-                }\n-        dObs = SetUtils.getDictOfDictsWithNamesThenIdAsKeyFromFile( setFile )\n-        \n-        self.assertEquals( dExp, dObs )\n-        \n-        os.remove( setFile )\n-        \n-        \n-    def _makeSetListFromTupleList (self, tupleList):\n-        setList = []\n-        for tuple in tupleList:\n-            set = Set()\n-            set.setFromTuple(tuple)\n-            setList.append(set)\n-        return setList\n-        \n-        \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_SetUtils ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/coord/test/Test_SlidingWindow.py
--- a/commons/core/coord/test/Test_SlidingWindow.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,109 +0,0 @@
-import unittest
-from commons.core.coord.SlidingWindow import SlidingWindow
-from commons.core.coord.SlidingWindow import SlidingWindowToCountMatchingBases
-from commons.core.coord.Set import Set
-
-class Test_SlidingWindow( unittest.TestCase ):
-        
-    def test_slideWindowOnce( self ):
-        expStart = 91 
-        expEnd = 190
-        self.sw = SlidingWindow(100, 10)
-        self.sw.slideWindowOnce()
-        obsStart = self.sw._start
-        obsEnd = self.sw._end
-        
-        self.assertEqual(expStart, obsStart)
-        self.assertEqual(expEnd, obsEnd)
-        
-    def test_slideWindowOnceFourTime( self ):
-        expStart = 201 
-        expEnd = 300
-        self.sw = SlidingWindow(100, 50)
-        i = 0
-        for i in range(4):
-            self.sw.slideWindowOnce()
-            i += 1
-        obsStart = self.sw._start
-        obsEnd = self.sw._end
-        
-        self.assertEqual(expStart, obsStart)
-        self.assertEqual(expEnd, obsEnd)
-    
-        
-class Test_SlidingWindowToCountMatchingBases(unittest.TestCase):
-        
-    def test_getSetLengthOnWindow_featureIncluded( self ):
-        self.sw = SlidingWindowToCountMatchingBases(100, 1)
-        iSet = Set( 1, "TE3", "chr1", 21, 30 )
-        exp = 10
-        obs = self.sw.getSetLengthOnWindow( iSet)
-        self.assertEqual( exp, obs )
-        
-    def test_getSetLengthOnWindow_windowIncluded( self ):
-        self.sw = SlidingWindowToCountMatchingBases(100, 10)
-        self.sw.slideWindowOnce()
-        iSet = Set( 1, "TE3", "chr1", 21, 530 )
-        exp = 100
-        obs = self.sw.getSetLengthOnWindow( iSet)
-        self.assertEqual( exp, obs )
-        
-    def test_getSetLengthOnWindow_featureOverlapLeft( self ):
-        self.sw = SlidingWindowToCountMatchingBases(100, 10)
-        self.sw.slideWindowOnce()
-        iSet = Set( 1, "TE3", "chr1", 21, 130 )
-        exp = 40
-        obs = self.sw.getSetLengthOnWindow( iSet)
-        self.assertEqual( exp, obs )
-        
-    def test_getSetLengthOnWindow_featureOverlapRight( self ):
-        self.sw = SlidingWindowToCountMatchingBases(100, 10)
-        self.sw.slideWindowOnce()
-        iSet = Set( 1, "TE3", "chr1", 121, 230 )
-        exp = 70
-        obs = self.sw.getSetLengthOnWindow( iSet)
-        self.assertEqual( exp, obs )
-        
-    def test_getCoordSetOnWindow_featureIncluded( self ):
-        self.sw = SlidingWindowToCountMatchingBases(100, 1)
-        iSet = Set( 1, "TE3", "chr1", 21, 30 )
-        expStart = 21
-        expEnd = 30
-        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
-        self.assertEqual( expStart, obsStart )
-        self.assertEqual( expEnd, obsEnd )
-        
-    def test_getCoordSetOnWindow_windowIncluded( self ):
-        self.sw = SlidingWindowToCountMatchingBases(100, 10)
-        self.sw.slideWindowOnce()
-        iSet = Set( 1, "TE3", "chr1", 21, 530 )
-        expStart = 91
-        expEnd = 190
-        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
-        self.assertEqual( expStart, obsStart )
-        self.assertEqual( expEnd, obsEnd )
-        
-    def test_getCoordSetOnWindow_featureOverlapLeft( self ):
-        self.sw = SlidingWindowToCountMatchingBases(100, 10)
-        self.sw.slideWindowOnce()
-        iSet = Set( 1, "TE3", "chr1", 21, 130 )
-        expStart = 91
-        expEnd = 130
-        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
-        self.assertEqual( expStart, obsStart )
-        self.assertEqual( expEnd, obsEnd )
-        
-    def test_getCoordSetOnWindow_featureOverlapRight( self ):
-        self.sw = SlidingWindowToCountMatchingBases(100, 10)
-        self.sw.slideWindowOnce()
-        iSet = Set( 1, "TE3", "chr1", 121, 230 )
-        expStart = 121
-        expEnd = 190
-        obsStart,obsEnd = self.sw.getCoordSetOnWindow( iSet)
-        self.assertEqual( expStart, obsStart )
-        self.assertEqual( expEnd, obsEnd )
-
-test_suite = unittest.TestSuite()
-test_suite.addTest( unittest.makeSuite( Test_SlidingWindow ) )
-if __name__ == "__main__":
-    unittest.TextTestRunner(verbosity=2).run( test_suite )
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/AxtParser.py
--- a/commons/core/parsing/AxtParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,154 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Mapping import Mapping
-from SMART.Java.Python.structure.SubMapping import SubMapping
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.misc import Utils
-from SMART.Java.Python.misc.Utils import getHammingDistance
-
-
-class AxtParser(MapperParser):
-    """A class that parses AXT (as given by Mosaik)"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(AxtParser, self).__init__(fileName, verbosity)
-        self.queryLine = None
-        self.subjectLine = None
-
-    def __del__(self):
-        super(AxtParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["axt"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def getInfos(self):
-        self.chromosomes = set()
-        self.nbMappings  = 0
-        self.size        = 0
-        cpt              = 0
-        self.reset()
-        for line in self.handle:
-            line = line.strip()
-            if line == "": continue
-            if cpt % 3 == 0:
-                line    = line.strip()
-                parts = line.split(" ")
-                self.chromosomes.add(parts[1])
-                self.size       += int(parts[6])
-                self.nbMappings += 1
-            cpt += 1
-            if self.verbosity >= 10 and self.nbMappings % 100000 == 0:
-                sys.stdout.write("    %d mappings read\r" % (self.nbMappings))
-                sys.stdout.flush()
-        self.reset()
-        if self.verbosity >= 10:
-            print "    %d mappings read" % (self.nbMappings)
-            print "Done."
-        
-
-    def parseLine(self, line):
-
-        if line.strip() == "":
-            for line in self.handle:
-                self.currentLineNb += 1
-                break
-        if line.strip() == "":
-            return None
-
-        m = re.search(r"^\s*\d+\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+\d+\s*$", line)
-        if m != None:
-            #sys.exit("\nLine %d '%s' does not have an AXT format" % (self.currentLineNb, line))
-
-            mapping = Mapping()
-            subMapping = SubMapping()
-    
-            offset = -1 if m.group(7) == "-" else 0
-            subMapping.queryInterval.setName(m.group(4))
-            subMapping.queryInterval.setStart(min(int(m.group(5)), int(m.group(6)))-1)
-            subMapping.queryInterval.setEnd(max(int(m.group(5)), int(m.group(6)))-1)
-            subMapping.queryInterval.setDirection(m.group(7))
-    
-            subMapping.targetInterval.setChromosome(m.group(1))
-            subMapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))) + offset)
-            subMapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))) + offset)
-            subMapping.targetInterval.setDirection(1)
-    
-            subMapping.setSize(min(subMapping.targetInterval.getSize(), subMapping.queryInterval.getSize()))
-            subMapping.setDirection(m.group(7))
-    
-            mapping.addSubMapping(subMapping)
-    
-            mapping.setDirection(m.group(7))
-            mapping.targetInterval.setChromosome(m.group(1))
-            mapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))) + offset)
-            mapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))) + offset)
-    
-            mapping.queryInterval.setName(m.group(4))
-            mapping.queryInterval.setStart(min(int(m.group(5)), int(m.group(6)))-1)
-            mapping.queryInterval.setEnd(max(int(m.group(5)), int(m.group(6)))-1)
-    
-            mapping.setSize(min(mapping.targetInterval.getSize(), mapping.queryInterval.getSize()))
-    
-            for line in self.handle:
-                string1 = line.strip()
-                self.currentLineNb += 1
-                break
-            for line in self.handle:
-                string2 = line.strip()
-                self.currentLineNb += 1
-                break
-            mapping.setNbMismatches(Utils.getHammingDistance(string1, string2))
-            mapping.setNbGaps(0)
-    
-            self.currentMapping = mapping
-        else:
-            if self.queryLine == None:
-                self.queryLine = line
-            else:
-                self.subjectLine = line
-                seqLen = float(len(self.subjectLine))
-                dist = float(getHammingDistance(self.queryLine, self.subjectLine))
-                identity = ((seqLen-dist)/seqLen) *100
-                self.currentMapping.setIdentity(identity)
-                self.queryLine = None
-                self.subjectLine = None
-                return self.currentMapping
-            
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/AxtParser.pyc
b
Binary file commons/core/parsing/AxtParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BamParser.py
--- a/commons/core/parsing/BamParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,483 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2012\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import re, sys, gzip, struct\n-from commons.core.parsing.MapperParser import MapperParser\n-from SMART.Java.Python.structure.Mapping import Mapping\n-from SMART.Java.Python.structure.SubMapping import SubMapping\n-from SMART.Java.Python.structure.Interval import Interval\n-\n-\n-BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN"\n-\n-BAM_CIGAR_LOOKUP = "MIDNSHP=X"\n-BAM_CIGAR_SHIFT = 4\n-BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1)\n-\n-\n-\n-def pack_int32(x):\n-\treturn struct.pack(\'<i\', x)\n-\n-def pack_uint32(x):\n-\treturn struct.pack(\'<I\', x)\n-\n-def unpack_int8(x):\n-\treturn struct.unpack(\'<b\', x)[0]\n-\n-def unpack_int16(x):\n-\treturn struct.unpack(\'<h\', x)[0]\n-\n-def unpack_int32(x):\n-\treturn struct.unpack(\'<i\', x)[0]\n-\n-def unpack_int64(x):\n-\treturn struct.unpack(\'<q\', x)[0]\n-\n-def unpack_uint8(x):\n-\treturn struct.unpack(\'<B\', x)[0]\n-\n-def unpack_uint16(x):\n-\treturn struct.unpack(\'<H\', x)[0]\n-\n-def unpack_uint32(x):\n-\treturn struct.unpack(\'<I\', x)[0]\n-\n-def unpack_uint64(x):\n-\treturn struct.unpack(\'<Q\', x)[0]\n-\n-def unpack_float(x):\n-\treturn struct.unpack(\'<f\', x)[0]\n-\n-def unpack_string(x):\n-\tlength = len(x)\n-\tformat_string = "<{0}s".format(length)\n-\tstring = struct.unpack(format_string, x)[0]\n-\tif string[-1] == \'\\0\':\n-\t\treturn string[:-1]\n-\telse:\n-\t\treturn string\n-\n-\n-BAM_TAG_CODE = {"c": unpack_int8, \\\n-\t\t\t\t"C": unpack_uint8, \\\n-\t\t\t\t"s": unpack_int16, \\\n-\t\t\t\t"S": unpack_uint16, \\\n-\t\t\t\t"i": unpack_int32, \\\n-\t\t\t\t"I": unpack_uint32, \\\n-\t\t\t\t"f": unpack_float, \\\n-\t\t\t\t#"A": unpack_int8, \\\n-\t\t\t\t"A": lambda x: x, \\\n-\t\t\t\t"Z": unpack_int8, \\\n-\t\t\t\t"H": unpack_int8}\n-\n-BAM_TAG_VALUE = {"c": int, \\\n-\t\t\t\t "C": int, \\\n-\t\t\t\t "s": int, \\\n-\t\t\t\t "S": int, \\\n-\t\t\t\t "i": int, \\\n-\t\t\t\t "I": int, \\\n-\t\t\t\t "f": float, \\\n-\t\t\t\t "A": lambda x: x}\n-\n-BAM_TAG_SIZE = {"c": 1, \\\n-\t\t\t\t"C": 1, \\\n-\t\t\t\t"s": 2, \\\n-\t\t\t\t"S": 2, \\\n-\t\t\t\t"i": 4, \\\n-\t\t\t\t"I": 4, \\\n-\t\t\t\t"f": 4, \\\n-\t\t\t\t"A": 1}\n-\n-\n-class CigarOp(object):\n-\tdef __init__(self, data):\n-\t\tself._length = data >> BAM_CIGAR_SHIFT\n-\t\tself._type   = BAM_CIGAR_LOOKUP[ data & BAM_CIGAR_MASK ]\n-\n-\n-class CigarData(object):\n-\tdef __init__(self, data, num_ops):\n-\t\tself._ops = []\n-\t\tfor i in range(num_ops):\n-\t\t\tcigar_data = unpack_uint32(data[i*4: (i+1)*4])\n-\t\t\tself._ops.append(CigarOp(cigar_data))\t\t\n-\n-\tdef getCigarData(self):\n-\t\treturn self._ops\n-\t\n-\tdef __str__(self):\n-\t\treturn "".join(["%d%s" % (op._length, op._type) for op in self._ops])\n-\n-\n-class TagsData(object):\n-\tdef __init__(self):\n-\t\tself._tags = {}\n-\n-\tdef add(self, tag):\n-\t\tself._tags[tag._ta'..b'nbGaps\t\t  = 0\n-\tsubMapping\t  = None\n-\tqueryOffset   = 0\n-\ttargetOffset  = 0\n-\treadStart\t  = None\n-\n-\tfor tag, value in read._tags.iteritems():\n-\t\tif tag == "X0":\n-\t\t\tnbOccurrences = value._value\n-\t\telif tag == "X1":\n-\t\t\tnbOccurrences += value._value\n-\t\telif tag == "XM":\n-\t\t\tnbMismatches = value._value\n-\tmapping.setTagValue("nbOccurrences", nbOccurrences)\n-\tmapping.setTagValue("quality", read._mappingQuality)\n-\n-\tfor operation in read._cigar:\n-\t\tif operation._type == "M":\n-\t\t\tif readStart == None:\n-\t\t\t\treadStart = queryOffset\n-\t\t\tif subMapping == None:\n-\t\t\t\tsubMapping = SubMapping()\n-\t\t\t\tsubMapping.setSize(operation._length)\n-\t\t\t\tsubMapping.setDirection(direction)\n-\t\t\t\tsubMapping.queryInterval.setName(read._name)\n-\t\t\t\tsubMapping.queryInterval.setStart(queryOffset)\n-\t\t\t\tsubMapping.queryInterval.setDirection(direction)\n-\t\t\t\tsubMapping.targetInterval.setChromosome(read._chromosome)\n-\t\t\t\tsubMapping.targetInterval.setStart(genomeStart + targetOffset)\n-\t\t\t\tsubMapping.targetInterval.setDirection(1)\n-\t\t\tnbMatches\t += operation._length\n-\t\t\ttargetOffset += operation._length\n-\t\t\tqueryOffset  += operation._length\n-\t\t\tcurrentNumber = 0\n-\t\t\tcontinue\n-\t\tif operation._type == "I":\n-\t\t\tnbGaps\t   += 1\n-\t\t\tqueryOffset  += operation._length\n-\t\t\tcurrentNumber = 0\n-\t\t\tcontinue\n-\t\tif operation._type == "D":\n-\t\t\tif subMapping != None:\n-\t\t\t\tsubMapping.queryInterval.setEnd(queryOffset - 1)\n-\t\t\t\tsubMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n-\t\t\t\tmapping.addSubMapping(subMapping)\n-\t\t\tsubMapping\t  = None\n-\t\t\tnbGaps\t     += 1\n-\t\t\ttargetOffset += operation._length\n-\t\t\tcurrentNumber = 0\n-\t\t\tcontinue\n-\t\tif operation._type == "N":\n-\t\t\tif subMapping != None:\n-\t\t\t\tsubMapping.queryInterval.setEnd(queryOffset - 1)\n-\t\t\t\tsubMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n-\t\t\t\tmapping.addSubMapping(subMapping)\n-\t\t\tsubMapping\t= None\n-\t\t\ttargetOffset += operation._length\n-\t\t\tcurrentNumber = 0\n-\t\t\tcontinue\n-\t\tif operation._type == "S":\n-\t\t\tnbMismatches += operation._length\n-\t\t\ttargetOffset += operation._length\n-\t\t\tqueryOffset  += operation._length\n-\t\t\tcurrentNumber = 0\n-\t\t\tcontinue\n-\t\tif operation._type == "H":\n-\t\t\ttargetOffset += operation._length\n-\t\t\tqueryOffset  += operation._length\n-\t\t\tcurrentNumber = 0\n-\t\t\tcontinue\n-\t\tif operation._type == "P":\n-\t\t\tcontinue\n-\t\traise Exception("Do not understand parameter \'%s\'" % (operation._type))\n-\n-\tif subMapping != None:\n-\t\tsubMapping.queryInterval.setEnd(queryOffset - 1)\n-\t\tsubMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n-\t\tmapping.addSubMapping(subMapping)\n-\tmapping.queryInterval.setStart(readStart)\n-\tmapping.queryInterval.setEnd(queryOffset - 1)\n-\tmapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n-\tmapping.setNbMismatches(nbMismatches)\n-\tmapping.setNbGaps(nbGaps)\n-\tmapping.queryInterval.setName(read._name)\n-\tmapping.queryInterval.setDirection(direction)\n-\tmapping.targetInterval.setChromosome(read._chromosome)\n-\tmapping.targetInterval.setStart(genomeStart)\n-\tmapping.targetInterval.setDirection(direction)\n-\tmapping.setSize(len(read._sequence))\n-\tmapping.setDirection(direction)\n-\treturn mapping\n-\n-\t\n-class BamParser(MapperParser):\n-\t"""A class that parses BAM format"""\n-\n-\tdef __init__(self, fileName, verbosity = 0):\n-\t\tself.verbosity = verbosity\n-\t\tself.handle = gzip.open(fileName, "rb")\n-\t\tself.reader = FileReader(self.handle)\n-\t\tself.nbMappings = None\n-\t\tself.fileName   = fileName\n-\n-\n-\tdef __del__(self):\n-\t\tself.handle.close()\n-\n-\n-\tdef getFileFormats():\n-\t\treturn ["bam"]\n-\tgetFileFormats = staticmethod(getFileFormats)\n-\n-\n-\tdef reset(self):\n-\t\tself.reader.reset()\n-\n-\n-\tdef getNextMapping(self):\n-\t\tself.currentMapping = None\n-\t\twhile self.currentMapping == None:\n-\t\t\tread = self.reader.getNextAlignment()\n-\t\t\tif not read:\n-\t\t\t\tself.currentMapping = False\n-\t\t\t\treturn False\n-\t\t\tread.parse()\n-\t\t\tself.currentMapping = parseAlignedRead(read)\n-\t\treturn self.currentMapping\n-\t\t\n-\t\t\n-\tdef setDefaultTagValue(self, name, value):\n-\t\tpass\n-\n-\n-\tdef skipFirstLines(self):\n-\t\tpass\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BamParser.pyc
b
Binary file commons/core/parsing/BamParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BedParser.py
--- a/commons/core/parsing/BedParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,139 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Interval import Interval
-from commons.core.parsing.TranscriptListParser import TranscriptListParser
-from SMART.Java.Python.structure.Transcript import Transcript
-
-
-class BedParser(TranscriptListParser):
-    """A class that parses a BED file and create a transcript list"""
-
-
-    def __init__(self, fileName, verbosity = 0):
-        self.title = None
-        TranscriptListParser.__init__(self, fileName, verbosity)
-
-
-#    def __del__(self):
-#        super(BedParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["bed"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        mark = self.handle.tell()
-        line = self.handle.readline()
-        line = line.strip()
-        m = re.search(r"^\s*track\s+name\s*=\s*(\S+)\s+", line)
-        if m != None:
-            self.title = m.group(1)
-            self.currentLineNb += 1
-        else:
-            self.handle.seek(mark)
-        return
-            
-
-
-
-    def parseLine(self, line):
-        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\s*$", line)
-        if m != None:
-            transcript = Transcript()
-            transcript.setChromosome(m.group(1))
-            transcript.setStart(min(int(m.group(2)), int(m.group(3))-1))
-            transcript.setEnd(max(int(m.group(2)), int(m.group(3))-1))
-            transcript.setName("Unnamed")
-            transcript.setDirection(1)
-            return transcript
-
-        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\s*$", line)
-        if m != None:
-            transcript = Transcript()
-            transcript.setChromosome(m.group(1))
-            transcript.setStart(min(int(m.group(2)), int(m.group(3))-1))
-            transcript.setEnd(max(int(m.group(2)), int(m.group(3))-1))
-            transcript.setName(m.group(4))
-            transcript.setDirection(1)
-            return transcript
-
-        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\t+\d+\.?\d*\s*$", line)
-        if m != None:
-            transcript = Transcript()
-            transcript.setChromosome(m.group(1))
-            transcript.setStart(min(int(m.group(2)), int(m.group(3))-1))
-            transcript.setEnd(max(int(m.group(2)), int(m.group(3))-1))
-            transcript.setName(m.group(4))
-            transcript.setDirection(1)
-            return transcript
-
-        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\d+)\t+([^\t]+)\t+\d+\t+([+-])\t+\d+\t+\d+\t+0\t+(\d+)\t+(\S+)\t+(\S+)\s*$", line)
-        if m == None:
-            raise Exception("\nLine %d '%s' does not has a BED format." % (self.currentLineNb, line))
-        transcript = Transcript()
-        transcript.setChromosome(m.group(1))
-        transcript.setStart(min(int(m.group(2)), int(m.group(3))-1))
-        transcript.setEnd(max(int(m.group(2)), int(m.group(3))-1))
-        transcript.setName(m.group(4))
-        transcript.setDirection(m.group(5))
-        nbExons = int(m.group(6))
-        sizes = m.group(7).split(",")
-        starts = m.group(8).split(",")
-
-        # check for comment in name
-        m = re.search(r"^([^\(]*)\((\S+)\)$", transcript.getName())
-        if m != None:
-            transcript.setName(m.group(1))
-            transcript.setTagValues(m.group(2), ";", "=")
-        
-        # check for nb occurrences in name
-        m = re.search(r"(.*)-(\d+)$", transcript.getName())
-        if m != None:
-            transcript.setName(m.group(1))
-            transcript.setOccurrence(int(m.group(2)))
-
-        for i in range(nbExons):
-            exon = Interval(transcript)
-            exon.setStart(int(starts[i])+transcript.getStart())
-            exon.setEnd(transcript.getStart()+int(starts[i])+int(sizes[i])-1)
-            exon.setSize(int(sizes[i]))
-            transcript.addExon(exon)
-            
-        if transcript.exons[0].getStart() != transcript.getStart():
-            sys.exit("There is something wrong with the start of transcript line '%s': transcript starts at %d whereas first exon starts at %d" % (line.strip(), transcript.start, transcript.exons[0].start))
-        if transcript.exons[-1].getEnd() != transcript.getEnd():
-            sys.exit("There is something wrong with the end of transcript line '%s': transcript ends at %d whereas last exon ends at %d" % (line.strip(), transcript.end, transcript.exons[-1].end))
-
-        return transcript
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BedParser.pyc
b
Binary file commons/core/parsing/BedParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BlastParser.py
--- a/commons/core/parsing/BlastParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,88 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.structure.SubMapping import SubMapping
-from SMART.Java.Python.structure.Mapping import Mapping
-
-
-class BlastParser(MapperParser):
-    """A class that parses the output of Blast (-m 8 format)"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(BlastParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(BlastParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["blast"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def parseLine(self, line):
-        m = re.search(r"^(\S+)\s+(\S+)\s+(\d+\.?\d*)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+([-+]?\d+\.?\d*[eE]?[-+]?\d*)\s+(\d+\.?\d*)\s*$", line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have an Blast format" % (self.currentLineNb, line))
-
-        mapping = Mapping()
-
-        queryInterval = Interval()
-        queryInterval.setName(m.group(1))
-        queryInterval.setStart(min(int(m.group(7)), int(m.group(8))))
-        queryInterval.setEnd(max(int(m.group(7)), int(m.group(8))))
-
-        targetInterval = Interval()
-        targetInterval.setChromosome(m.group(2))
-        targetInterval.setStart(min(int(m.group(9)), int(m.group(10))))
-        targetInterval.setEnd(max(int(m.group(9)), int(m.group(10))))
-
-        subMapping = SubMapping()
-        subMapping.setQueryInterval(queryInterval)
-        subMapping.setTargetInterval(targetInterval)
-
-        mapping.addSubMapping(subMapping)
-
-        mapping.setIdentity(round(float(m.group(3))))
-        mapping.setSize(int(m.group(4)))
-        mapping.setNbMismatches(int(m.group(5)))
-        mapping.setNbGaps(int(m.group(6)))
-        mapping.setDirection((int(m.group(8)) - int(m.group(7))) * (int(m.group(10)) - int(m.group(9))))
-        mapping.setEvalue(float(m.group(11)))
-
-        return mapping
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BlastParser.pyc
b
Binary file commons/core/parsing/BlastParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BlatFileParser.py
--- a/commons/core/parsing/BlatFileParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,63 +0,0 @@
-from commons.core.parsing.BlatParser import BlatParser
-import os
-
-class BlatFileParser(object):
-
-    def __init__(self, blatFileName = None):
-        self._blatFileName = blatFileName
-        self._lBlatHits = []
-        self._dBlatHitsByQueries = {}
-        self._dQueries = {}
-        
-    def getDictOfQueries(self):
-        return self._dQueries
-    
-    def getResultLinesOfOneQuery(self, queryName):
-        return self._dBlatHitsByQueries[queryName]
-    
-    def getDictOfBlatHitsByQueries(self):
-        return self._dBlatHitsByQueries
-    
-    def getListsOfHits(self):
-        return self._lBlatHits
-    
-    def parseBlatFile(self):
-        blatFile = open(self._blatFileName, 'r')
-        line = blatFile.readline()
-        n = 1
-        while line != "":
-            if self._isInteger(line.split("\t")[0]):
-                iBlatParser = BlatParser()
-                iBlatParser.setAttributesFromString(line, n)
-                queryHeader = iBlatParser.getQName()
-                self._dQueries[queryHeader] = 1
-                self._lBlatHits.append(iBlatParser)
-            line = blatFile.readline()
-            n += 1
-        return self._lBlatHits
-    
-    def parseBlatFileByQueries(self):
-        blatFile = open(self._blatFileName, 'r')
-        line = blatFile.readline()
-        n = 1
-        while line != "":
-            if self._isInteger(line.split("\t")[0]):
-                iBlatParser = BlatParser()
-                iBlatParser.setAttributesFromString(line, n)
-                queryHeader = iBlatParser.getQName()
-                self._dQueries[queryHeader] = 1
-                if self._dBlatHitsByQueries.has_key(queryHeader):
-                    self._dBlatHitsByQueries[queryHeader].append(iBlatParser)
-                else:
-                    self._dBlatHitsByQueries[queryHeader] = [iBlatParser]
-            line = blatFile.readline()
-            n += 1
-        blatFile.close()
-        return self._dBlatHitsByQueries
-        
-    def _isInteger(self, string):
-        try:
-            int(string)
-            return True
-        except ValueError:
-            return False
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BlatParser.py
--- a/commons/core/parsing/BlatParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,351 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-import sys\n-\n-## this class can parse a Blat results output file\n-#\n-class BlatParser(object):\n-\n-\n-    def __init__(self, match=\'\', mismatch=\'\', repMatch=\'\', N=\'\', QGapCount=\'\', QGapBases=\'\', TGapCount=\'\', TGapBases=\'\', strand=\'\', QName=\'\', QSize=\'\', QStart=\'\', QEnd=\'\', TName=\'\', TSize=\'\', TStart=\'\', TEnd=\'\', blockCount=\'\', blockSizes=\'\', qStarts=\'\', tStarts=\'\'):\n-        self._match = match\n-        self._mismatch = mismatch\n-        self._repMatch = repMatch\n-        self._N = N\n-        self._QGapCount = QGapCount\n-        self._QGapBases = QGapBases\n-        self._TGapCount = TGapCount\n-        self._TGapBases = TGapBases\n-        self._strand = strand\n-        self._QName = QName\n-        self._QSize = QSize\n-        self._QStart = QStart\n-        self._QEnd = QEnd\n-        self._TName = TName\n-        self._TSize = TSize\n-        self._TStart = TStart\n-        self._TEnd = TEnd\n-        self._blockCount = blockCount\n-        self._blockSizes = blockSizes\n-        self._qStarts = qStarts\n-        self._tStarts = tStarts\n-        \n-    def __eq__(self, o):\n-        return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd\n-    \n-    def setMatch(self, match):\n-        self._match = match\n-        \n-    def setMismatch(self, mismatch):\n-        self._mismatch = mismatch\n-        \n-    def setRepMatch(self, repMatch):\n-        self._repMatch = repMatch\n-        \n-    def setN(self, N):\n-        self._N = N\n-        \n-    def setQGapCount(self, QGapCount):\n-        self._QGapCount = QGapCount\n-        \n-    def setQGapBases(self, QGapBases):\n-        self._QGapBases = QGapBases\n-        \n-    def setTGapCount(self, TGapCount):\n-        self._TGapCount = TGapCount\n-        \n-    def setTGapBases(self, TGapBases):\n-        self._TGapBases = TGapBases\n-        \n-    def setStrand(self, strand):\n-        self._strand = strand\n-        \n-    def setQName(self, QName):\n-        self._QName = QName\n-        \n-    def setQSize(self, QSize):\n-        self._QSize = QSize\n-        \n-    def setQStart(self, QStart):\n-        self._QStart = QStart\n-        \n-    def setQEnd(self, QEnd):\n-        self._QEnd = QEnd\n-        \n-    def setTName(self, TName):\n-        self._TName = TName\n-        \n-    def setTSize(self, TSize):\n-        self._TSize = TSize\n-        \n-    def setTStart(self'..b'e:\n-            sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[10] != \'\':\n-            self.setQSize(lResults[10])\n-        else:\n-            sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[11] != \'\':\n-            self.setQStart(lResults[11])\n-        else:\n-            sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[12] != \'\':\n-            self.setQEnd(lResults[12])\n-        else:\n-            sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[13] != \'\':\n-            self.setTName(lResults[13])\n-        else:\n-            sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[14] != \'\':\n-            self.setTSize(lResults[14])\n-        else:\n-            sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[15] != \'\':\n-            self.setTStart(lResults[15])\n-        else:\n-            sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[16] != \'\':\n-            self.setTEnd(lResults[16])\n-        else:\n-            sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[17] != \'\':\n-            self.setBlockCount(lResults[17])\n-        else:\n-            sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[18] != \'\':\n-            self.setBlockSizes(lResults[18])\n-        else:\n-            sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[19] != \'\':\n-            self.setQStarts(lResults[19])\n-        else:\n-            sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-        \n-        if lResults[20] != \'\':\n-            self.setTStarts(lResults[20])\n-        else:\n-            sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\\n" % iCurrentLineNumber)\n-            error = True\n-            \n-        if error == True:\n-            self._setAllToNull()\n-            \n-    def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\\t"):\n-        blatLine = blatLine.rstrip()\n-        lBlatLineItem = blatLine.split(fieldSeparator)\n-        if not len(lBlatLineItem) == 21:\n-            sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\\n" % (iCurrentLineNumber, len(lBlatLineItem)))\n-        else:\n-            self.setAttributes(lBlatLineItem, iCurrentLineNumber)\n-            \n-    def _setAllToNull(self):\n-        self._match = \'\'\n-        self._mismatch = \'\'\n-        self._repMatch = \'\'\n-        self._N = \'\'\n-        self._QGapCount = \'\'\n-        self._QGapBases = \'\'\n-        self._TGapCount = \'\'\n-        self._TGapBases = \'\'\n-        self._strand = \'\'\n-        self._QName = \'\'\n-        self._QSize = \'\'\n-        self._QStart = \'\'\n-        self._QEnd = \'\'\n-        self._TName = \'\'\n-        self._TSize = \'\'\n-        self._TStart = \'\'\n-        self._TEnd = \'\'\n-        self._blockCount = \'\'\n-        self._blockSizes = \'\'\n-        self._qStarts = \'\'\n-        self._tStarts = \'\'\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BlatToGff.py
--- a/commons/core/parsing/BlatToGff.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,116 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-import optparse
-import os
-from commons.core.parsing.BlatParser import BlatParser
-
-class BlatToGff(object):
-
-
-    def __init__(self):
-        pass
-    
-    def setAttributesFromCmdLine(self):
-        help = '\
-        \nThis Script Launch BlatToGff.\n\n\
-        Example 1: python BlatToGff.py -i blatResultsFile.tab -o outputFile.gff3\n\n'
-        parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0")
-        parser.add_option( '-i', '--input', dest='inputBLAT', help='Blat Input File Name [Format: tabular]', default= None )
-        parser.add_option( '-o', '--output', dest='output', help='Output File Name [Format: GFF3]', default= None )
-        parser.add_option( '-n', '--methodname', dest='methodName', help='Method name in col. 3 [Default: None]', default= None )
-        ( options, args ) = parser.parse_args()
-        self._options = options
-    
-    def checkOptions(self):
-        if self._options.inputBLAT == '':
-            raise Exception("ERROR: No Blat file specified for -i !")
-        elif not os.path.exists(self._options.inputBLAT):
-            raise Exception("ERROR: Blat Input File doesn't exist !")
-        else:
-            self._inputFileBlat = self._options.inputBLAT
-            
-        if self._options.output == '':
-            raise Exception("ERROR: No Output file specified for -o !")
-        else:
-            self._outputFileGFF = self._options.output
-            
-        self._methodName = self._options.methodName
-            
-    def run(self):
-        self.checkOptions()
-        self._createGFFOutputFile()
-        BLATFile = open(self._inputFileBlat, 'r')
-        
-        headerBlatLine = BLATFile.readline()
-        headerBlatLine = BLATFile.readline()
-        headerBlatLine = BLATFile.readline()
-        headerBlatLine = BLATFile.readline()
-        headerBlatLine = BLATFile.readline()
-        blatLine = BLATFile.readline()
-        numberLine = 6
-        while blatLine != '':
-            gffLine = self.convertBlatObjectToGffLine(blatLine, numberLine)
-            self._printGFFLinesToOutputFile(gffLine)
-            blatLine = BLATFile.readline()
-            numberLine = numberLine + 1
-            
-    def convertBlatObjectToGffLine(self, blatLine, numberLine):
-        iBlatHit = BlatParser()
-        iBlatHit.setAttributesFromString(blatLine, numberLine)
-        col1 = iBlatHit.getTName()
-        col2 = 'BlatToGff'
-        if self._methodName == '' or self._methodName == None:
-            col3 = 'BES'
-        else:
-            col3 = '%s:BES' % self._methodName
-        col4 = iBlatHit.getTStart()
-        col5 = iBlatHit.getTEnd()
-        col6 = '.'
-        col7 = '+'
-        col8 = '.'
-        col9 = 'ID=%s;Name=%s;bes_start=%s;bes_end=%s;bes_size=%s' % (iBlatHit.getQName(), iBlatHit.getQName(), iBlatHit.getTStart(), iBlatHit.getTEnd(), iBlatHit.getTSize())
-        gffLine = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (col1, col2, col3, col4, col5, col6, col7, col8, col9)
-        return gffLine
-    
-    def _createGFFOutputFile(self):
-        GFFfile = open(self._outputFileGFF, 'w')
-        GFFfile.write("##gff-version 3\n")
-        GFFfile.close()
-        
-    def _printGFFLinesToOutputFile(self, line):
-        GFFfile = open(self._outputFileGFF, 'a')
-        GFFfile.write(line)
-        GFFfile.close()
-
-if __name__ == '__main__':
-    iBlatToGff = BlatToGff()
-    iBlatToGff.setAttributesFromCmdLine()
-    iBlatToGff.run()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BlatToGffForBesPaired.py
--- a/commons/core/parsing/BlatToGffForBesPaired.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,266 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-import optparse\n-import os\n-import sys\n-import re\n-import datetime\n-from commons.core.parsing.BlatParser import BlatParser\n-from commons.core.seq.FastaUtils import FastaUtils \n-\n-class BlatToGffForBesPaired(object):\n-\n-\n-    def __init__(self):\n-        pass\n-    \n-    def setAttributesFromCmdLine(self):\n-        help = \'\\\n-        \\nThis Script Launch BlatToGffForBesPaired.\\n\\n\\\n-        Example 1: python BlatToGffForBesPaired.py -i blatResultsFile.tab -f besSequences.fasta -o outputFile.gff3\\n\\\n-        Example 2: python BlatToGffForBesPaired.py -i blatResultsFile.tab -f besSequences.fasta -o outputFile.gff3 -n muscadine:filtre1\\n\\n\\\n-        Note 1: In blat input file, all BAC-Ends must be paired. In addition, they must be one above the other.\\nFor example, if you have the BES MRRE1H032F08FM1 (forward), we must have the BES MRRE1H032F08RM1 (reverse) just after, like:\\n\\\n-        554\\t26\\t0\\t0\\t1\\t16\\t1\\t17\\t+\\tMRRE1H032F08FM1\\t606\\t10\\t606\\tchr11\\t19818926\\t3725876\\t3726473\\t2\\t553,27,\\t10,579,\\t3725876,3726446,\\n\\\n-        620\\t23\\t0\\t0\\t0\\t0\\t0\\t0\\t-\\tMRRE1H032F08RM1\\t643\\t0\\t643\\tchr11\\t19818926\\t3794984\\t3795627\\t1\\t643,\\t0,\\t3794984,\\n\\\n-        Note 2: the header in Blat results output file must be present (5 lines).\\n\\n\'\n-                \n-        parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0")\n-        parser.add_option( \'-i\', \'--input\', dest=\'inputBLAT\', help=\'Blat Input File Name, with BES paired (1 Forward and 1 Reverse) [Format: tabular]\', default= None )\n-        parser.add_option( \'-f\', \'--fasta\', dest=\'inputFASTA\', help=\'Fasta Input File Name, with all sequences of BES [Format: fasta]\', default= None )\n-        parser.add_option( \'-o\', \'--output\', dest=\'output\', help=\'Output File Name [Format: GFF3]\', default= None )\n-        parser.add_option( \'-n\', \'--methodname\', dest=\'methodName\', help=\'Method name in col. 3 [Default: None]\', default= None )\n-        ( options, args ) = parser.parse_args()\n-        self._options = options\n-    \n-    def checkOptions(self):\n-        if self._options.inputBLAT == \'\':\n-            raise Exception("ERROR: No Blat file specified for -i !")\n-        elif not os.path.exists(self._options.inputBLAT):\n-            raise Exception("ERROR: Blat Input File doesn\'t exist !")\n-        else:\n-            self._inputFileBlat = self._options.inputBLAT\n-        '..b'         col9 = \'ID=%s;Name=%s;bac_start=%s;bac_end=%s;bac_size=%s;besFM_name=%s;muscadine_besFM_seq=%s;besRM_name=%s;muscadine_besRM_seq=%s\' % (bacName, bacName, startBacPos, endBacPos, sizeBacPos, nameBesFM, seqBesFM, nameBesRM, seqBesRM)\n-            gffLine = \'%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\n\' % (col1, col2, col3, col4, col5, col6, col7, col8, col9)\n-            return gffLine\n-        return None\n-    \n-    def getBesFmAndRmNamesAndSequences(self, besName1, seqBes1, typeBes1, besName2, seqBes2, typeBes2):\n-        if typeBes1 == \'FM\' and typeBes2 == \'RM\':\n-            return besName1, seqBes1, besName2, seqBes2\n-        elif typeBes1== \'RM\' and typeBes2 == \'FM\':\n-            return besName2, seqBes2, besName1, seqBes1\n-\n-    def getBesName(self, col9):\n-        lCol9 = col9.split(\';\')\n-        ID = lCol9[0]\n-        besName = ID[3:]\n-        return besName\n-    \n-    def getBacName(self, besName):\n-        bacName = besName[:-3]\n-        return bacName\n-\n-    def checkBesNames(self, besName1, besName2, line):\n-        bacName1 = besName1[:-3]\n-        bacName2 = besName2[:-3]\n-        if bacName1 == bacName2:\n-            return True\n-        else:\n-            sys.stderr.write("WARNING: Lines %s and %s the two Bes (%s AND %s) do not belong to the same BAC !!!\\n  -> you have to filter this Blat file...\\n" % (int(line)-1, line, besName1, besName2))\n-            return False\n-    \n-    def checkBesPositions(self, tBes1, tBes2):\n-        if tBes1[0] == tBes2[0]:\n-            minBes1 = min(tBes1[1], tBes1[2])\n-            maxBes1 = max(tBes1[1], tBes1[2])\n-            minBes2 = min(tBes2[1], tBes2[2])\n-            maxBes2 = max(tBes2[1], tBes2[2])\n-            if (minBes1 < minBes2 and maxBes1 < minBes2) or (minBes2 < minBes1 and maxBes2 < minBes1):\n-                return True\n-        return False\n-    \n-    def getBacPositions(self, tBes1, tBes2):\n-        startBacPos = 0\n-        endBacPos = 0\n-        minBes1 = min(tBes1[1], tBes1[2])\n-        maxBes1 = max(tBes1[1], tBes1[2])\n-        minBes2 = min(tBes2[1], tBes2[2])\n-        maxBes2 = max(tBes2[1], tBes2[2])\n-        if minBes1 < minBes2:\n-            startBacPos = minBes1\n-            endBacPos = maxBes2\n-        else:\n-            startBacPos = minBes2\n-            endBacPos = maxBes1\n-        return startBacPos, endBacPos\n-    \n-    def extractBesSequenceFromFastaFile(self, besName, numberLine):\n-        seq = \'\'\n-        date = datetime.datetime.now()\n-        date = date.strftime("%d%m%Y_%H%M%S")\n-        tmpFileName = \'tmp_BlatToGffForBesPaired_%s.fasta\' % date\n-        iFastaUtils = FastaUtils()\n-        iFastaUtils.dbExtractByPattern(besName, self._inputFileFasta, tmpFileName)\n-        \n-        if os.path.exists(tmpFileName):\n-            newFastaFile = open(tmpFileName, \'r\')\n-            line = newFastaFile.readline()\n-            if line != \'\':\n-                while line != \'\':\n-                    if line[0] != \'>\':\n-                        line = line.replace(\'\\n\', \'\')\n-                        seq += line\n-                    line = newFastaFile.readline()\n-                newFastaFile.close()\n-                os.remove(tmpFileName)\n-                return seq\n-            os.remove(tmpFileName)\n-        \n-        sys.stderr.write("WARNING: At line %s, the BAC-Ends (%s) hasn\'t got sequence in fasta file (%s) !!\\n" % (numberLine, besName, os.path.basename(self._inputFileFasta)))\n-        return \'NA\'\n-    \n-    def _createGFFOutputFile(self):\n-        GFFfile = open(self._outputFileGFF, \'w\')\n-        GFFfile.write("##gff-version 3\\n")\n-        GFFfile.close()\n-        \n-    def _printGFFLinesToOutputFile(self, lLines):\n-        GFFfile = open(self._outputFileGFF, \'a\')\n-        for line in lLines:\n-            GFFfile.write(line)\n-        GFFfile.close()\n-\n-if __name__ == \'__main__\':\n-    iBlatToGffForBesPaired = BlatToGffForBesPaired()\n-    iBlatToGffForBesPaired.setAttributesFromCmdLine()\n-    iBlatToGffForBesPaired.run()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BowtieParser.py
--- a/commons/core/parsing/BowtieParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,91 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2011
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure.Mapping import Mapping
-from SMART.Java.Python.structure.SubMapping import SubMapping
-from SMART.Java.Python.structure.Interval import Interval
-
-class BowtieParser(MapperParser):
-    """A class that parses BowTie format"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(BowtieParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(BowtieParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["bowtie"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def parseLine(self, line):
-        line   = line.strip()
-        fields = line.split("\t")
-        if len(fields) not in (7, 8):
-            raise Exception("Line %d '%s' does not look like a BowTie line (number of fields is %d instead of 7 or 8)" % (self.currentLineNb, line, len(fields)))
-        name         = fields[0]
-        direction    = 1 if fields[1] == "+" else -1
-        chromosome   = fields[2]
-        genomeStart  = int(fields[3]) + 1
-        sequence     = fields[4]
-        quality      = fields[5]
-        number       = int(fields[6])
-        nbMismatches = 0
-        if len(fields) == 8:
-            tags         = fields[7]
-            nbMismatches = len(tags.split(","))
-
-        mapping = Mapping()
-        queryInterval = Interval()
-        queryInterval.setName(name)
-        queryInterval.setStart(1)
-        queryInterval.setEnd(len(sequence) + 1)
-        targetInterval = Interval()
-        targetInterval.setChromosome(chromosome)
-        targetInterval.setStart(genomeStart)
-        targetInterval.setEnd(genomeStart + len(sequence) - 1)
-        subMapping = SubMapping()
-        subMapping.setQueryInterval(queryInterval)
-        subMapping.setTargetInterval(targetInterval)
-        mapping.addSubMapping(subMapping)
-        mapping.setSize(len(sequence))
-        mapping.setNbMismatches(nbMismatches)
-        mapping.setDirection(direction)
-        return mapping
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/BowtieParser.pyc
b
Binary file commons/core/parsing/BowtieParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/CoordsParser.py
--- a/commons/core/parsing/CoordsParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,137 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Mapping import Mapping
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure.SubMapping import SubMapping
-from SMART.Java.Python.misc import Utils
-
-class CoordsParser(MapperParser):
-    """A class that parses the .coords output of Nucmer"""
-
-    def __init__(self, fileName, verbosity = 0):
-        self._lineParseRe = re.compile(r"^\s*(?P<tStart>\d+)\s+(?P<tEnd>\d+)\s+\|\s+(?P<qStart>\d+)\s+(?P<qEnd>\d+)\s+\|\s+(?P<tLength>\d+)\s+(?P<qLength>\d+)\s+\|\s+(?P<identity>\d+\.?\d*)\s+\|\s+(?P<tName>[\w\|\:\-]+)\s+(?P<qName>.*)\s*$")
-        self._lineParseRe2 = re.compile(r"^\s*(?P<tStart>\d+)\s+(?P<tEnd>\d+)\s+(?P<qStart>\d+)\s+(?P<qEnd>\d+)\s+(?P<tLength>\d+)\s+(?P<qLength>\d+)\s+(?P<identity>\d+\.?\d*)\s+(?P<rlen>\d+\.?\d*)\s+(?P<qlen>\d+\.?\d*)\s+(?P<rcov>\d+\.?\d*)\s+(?P<qcov>\d+\.?\d*)\s+(?P<rframe>[-]?\d+\.?\d*)\s+(?P<qframe>[-]?\d+\.?\d*)\s+(?P<tName>[\w\|\:\-]+)\s+(?P<qName>.*)\s*$")
-        self._lineParseRe3 = re.compile(r"^\s*(?P<tStart>\d+)\s+(?P<tEnd>\d+)\s+\|\s+(?P<qStart>\d+)\s+(?P<qEnd>\d+)\s+\|\s+(?P<tLength>\d+)\s+(?P<qLength>\d+)\s+\|\s+(?P<identity>\d+\.?\d*)\s+(?P<sim>\d+\.?\d*)\s+(?P<stp>\d+\.?\d*)\s+\|\s+(?P<rframe>[-]?\d+\.?\d*)\s+(?P<qframe>[-]?\d+\.?\d*)\s+(?P<tName>[\w\|\:\-]+)\s+(?P<qName>.*)\s*$")
-        self._lineParseRe4 = re.compile(r"^\s*(?P<tStart>\d+)\s+(?P<tEnd>\d+)\s+(?P<qStart>\d+)\s+(?P<qEnd>\d+)\s+(?P<tLength>\d+)\s+(?P<qLength>\d+)\s+(?P<identity>\d+\.?\d*)\s+(?P<sim>\d+\.?\d*)\s+(?P<stp>\d+\.?\d*)\s+(?P<rlen>\d+\.?\d*)\s+(?P<qlen>\d+\.?\d*)\s+(?P<rcov>\d+\.?\d*)\s+(?P<qcov>\d+\.?\d*)\s+(?P<rframe>[-]?\d+\.?\d*)\s+(?P<qframe>[-]?\d+\.?\d*)\s+(?P<tName>[\w\|\:\-]+)\s+(?P<qName>.*)\s*$")
-        self.lineType = 1
-        MapperParser.__init__(self, fileName, verbosity)
-        
-    def getFileFormats():
-        return ["coords"]
-    getFileFormats = staticmethod(getFileFormats)
-
-    def skipFirstLines(self):    
-        while True: 
-            line = self.handle.readline()
-            self.currentLineNb += 1
-            if line == "":
-                break
-            if "=====" in line:
-                break
-            if "[S1]\t[E1]\t[S2]\t[E2]\t[LEN 1]\t[LEN 2]\t[% IDY]\t[LEN R]\t[LEN Q]\t[COV R]\t[COV Q]\t[FRM]\t[TAGS]" in line:
-                self.lineType = 2
-                break
-            if "[S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [% IDY]  [% SIM]  [% STP]  | [FRM]  [TAGS]" in line:
-                self.lineType = 3
-           
-            if "[% IDY]\t[% SIM]\t[% STP]" in line and "[LEN Q]"in line:
-                self.lineType = 4 
-                break     
-        
-    def parseLine(self, line):
-        
-        if self.lineType == 1 : 
-            m = self._lineParseRe.search(line)
-        elif self.lineType == 2:
-            m = self._lineParseRe2.search(line)
-        elif self.lineType == 3:
-            m = self._lineParseRe3.search(line)
-        elif self.lineType == 4:
-            m = self._lineParseRe4.search(line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a NucMer format" % (self.currentLineNb, line))
-  
-        mapping = Mapping()
-        
-        subMapping = SubMapping()
-        subMapping.queryInterval.setName(m.group("qName"))
-        subMapping.queryInterval.setStart(min(int(m.group("qStart")), int(m.group("qEnd"))))
-        subMapping.queryInterval.setEnd(max(int(m.group("qStart")), int(m.group("qEnd"))))
-        subMapping.queryInterval.setSize(int(m.group("qLength")))
-        subMapping.queryInterval.setDirection(int(m.group("qEnd")) - int(m.group("qStart")))
-        
-        subMapping.targetInterval.setChromosome(m.group("tName"))
-        subMapping.targetInterval.setStart(min(int(m.group("tStart")), int(m.group("tEnd"))))
-        subMapping.targetInterval.setEnd(max(int(m.group("tStart")), int(m.group("tEnd"))))
-        subMapping.targetInterval.setSize(int(m.group("tLength")))
-        subMapping.targetInterval.setDirection(int(m.group("tEnd")) - int(m.group("tStart")))
-       
-        subMapping.setDirection(int(m.group("qEnd")) - int(m.group("qStart")))
-        subMapping.setSize(min(int(m.group("qLength")), int(m.group("tLength"))))
-        subMapping.setIdentity(float(m.group("identity")))
-        
-        mapping.addSubMapping(subMapping)
-        mapping.targetInterval.setStart(min(int(m.group("tStart")), int(m.group("tEnd"))))
-        mapping.targetInterval.setEnd(max(int(m.group("tStart")), int(m.group("tEnd"))))
-        mapping.targetInterval.setSize(int(m.group("tLength")))
-        mapping.targetInterval.setChromosome(m.group("tName"))
-         
-        mapping.queryInterval.setStart(min(int(m.group("qStart")), int(m.group("qEnd"))))
-        mapping.queryInterval.setEnd(max(int(m.group("qStart")), int(m.group("qEnd"))))
-        mapping.queryInterval.setSize(int(m.group("qLength")))
-        mapping.queryInterval.setName(m.group("qName"))
-        mapping.setDirection(int(m.group("qEnd")) - int(m.group("qStart")))
-        mapping.setSize(min(int(m.group("qLength")), int(m.group("tLength"))))
-        mapping.setIdentity(float(m.group("identity")))
-        mapping.setTagValue("feature", "match")
-        mapping.setTagValue("Target", "%s %d %d" % (m.group("qName"), int(m.group("qStart")), int(m.group("qEnd"))))
-                    
-        if self.lineType ==2 or self.lineType ==4:
-            mapping.setTagValue("target_pident", float(m.group("identity")))
-            mapping.setTagValue("target_pcover", float(m.group("qcov")))
-            mapping.setTagValue("target_length", int(m.group("qlen")))
-            
-        
-# Specific to Mark Work. Commented lines because of possible slowdown.                 
-#        for line in self.handle:
-#            string1 = line.strip()
-#            self.currentLineNb += 1
-#            break
-#        for line in self.handle:
-#            string2 = line.strip()
-#            self.currentLineNb += 1
-#            break
-#        print(len(string1),len(string2))
-#        mapping.setNbMismatches(Utils.getHammingDistance(string1, string2))
-        mapping.setNbGaps(0)
-
-        return mapping
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/CoordsParser.pyc
b
Binary file commons/core/parsing/CoordsParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py
--- a/commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,197 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import os\n-import optparse\n-from commons.core.parsing.SsrParser import SsrParser\n-from commons.core.parsing.BlatParser import BlatParser\n-\n-class CrossSsrAndBesMappedByBlatToGff(object):\n-\n-\n-    def __init__(self):\n-        self._inputFileSSR = \'\'\n-        self._inputFileBlat = \'\'\n-        self._outputFileGFF = \'\'\n-    \n-    def setAttributesFromCmdLine(self):\n-        help = \'\\\n-        \\nThis Script Launch CrossSsrAndBesMappedByBlatToGff.\\n\\n\\\n-        Example 1: python CrossSsrAndBesMappedByBlatToGff.py -s ssrResultsFile.tab -b blatResultsFile.tab -o outputFile.gff3\\n\\\n-        Example 2: python CrossSsrAndBesMappedByBlatToGff.py -s ssrResultsFile.tab -b blatResultsFile.tab -o outputFile.gff3 -n muscadine:filtre1\\n\\n\'\n-        \n-        parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0")\n-        parser.add_option( \'-s\', \'--ssr\', dest=\'inputSSR\', help=\'SSR Input File Name [Format: tabular]\', default= None )\n-        parser.add_option( \'-b\', \'--blat\', dest=\'inputBLAT\', help=\'Blat Input File Name [Format: tabular]\', default= None )\n-        parser.add_option( \'-o\', \'--output\', dest=\'output\', help=\'Output File Name [Format: GFF3]\', default= None )\n-        parser.add_option( \'-n\', \'--methodName\', dest=\'methodName\', help=\'Method name in col. 3 [Default: None]\', default= None )\n-        ( options, args ) = parser.parse_args()\n-        self.options = options\n-    \n-    def checkOptions(self):\n-        if self.options.inputSSR == \'\':\n-            raise Exception("ERROR: No SSR file specified for -s !")\n-        elif not os.path.exists(self.options.inputSSR):\n-            raise Exception("ERROR: SSR Input File doesn\'t exist !")\n-        else:\n-            self._inputFileSSR = self.options.inputSSR\n-        \n-        if self.options.inputBLAT == \'\':\n-            raise Exception("ERROR: No Blat file specified for -b !")\n-        elif not os.path.exists(self.options.inputBLAT):\n-            raise Exception("ERROR: Blat Input File doesn\'t exist !")\n-        else:\n-            self._inputFileBlat = self.options.inputBLAT\n-            \n-        if self.options.output == \'\':\n-            raise Exception("ERROR: No Output file specified for -o !")\n-        else:\n-            self._outputFileGFF = self.options.output\n-            \n-        self._methodName = self.options.methodName\n-    \n-    def run(self):\n-     '..b'\n-        besNameToKeep =  BlatHitObject.getQName()\n-        lOfSSRHitObject = dictSsrParser[besNameToKeep]\n-        \n-        for SSRHitObject in  lOfSSRHitObject:\n-            posSSRStart = self.convertSSRPositionsToChromPositions(SSRHitObject.getSsrStart(), BlatHitObject.getTStart(), BlatHitObject.getTEnd(), BlatHitObject.getStrand())\n-            posSSREnd = self.convertSSRPositionsToChromPositions(SSRHitObject.getSsrEnd(), BlatHitObject.getTStart(), BlatHitObject.getTEnd(), BlatHitObject.getStrand())\n-            ssrSeq = self.getSsrSeq(SSRHitObject.getSsrMotif(), SSRHitObject.getSsrMotifNumber())\n-            \n-            col1 = BlatHitObject.getTName()\n-            col2 = \'CrossSsrAndBesAlignedByBlat\'\n-            if self._methodName != \'\' and self._methodName != None:\n-                col3 = \'%s:SSR\' %self._methodName\n-            else:\n-                col3 = \'SSR\'\n-            col4 = posSSRStart\n-            col5 = posSSREnd\n-            col6 = \'.\'\n-            col7 = BlatHitObject.getStrand()\n-            col8 = \'.\'\n-            col9 = \'ID=SSR_%s_%s;Name=SSR_%s_%s;bes_name=%s;bes_size=%s;bes_matchstart=%s;bes_matchend=%s;bes_redundancy=%s;ssr_type=%s;ssr_motif=%s;ssr_motif_number=%s;ssr_start=%s;ssr_end=%s;muscadine_seq=%s\' % (besNameToKeep, SSRHitObject.getBesRedundancy(), \n-                                                                                                                                                                                           besNameToKeep, SSRHitObject.getBesRedundancy(),\n-                                                                                                                                                                                           besNameToKeep, BlatHitObject.getQSize(),\n-                                                                                                                                                                                           BlatHitObject.getQStart(), BlatHitObject.getQEnd(), \n-                                                                                                                                                                                           SSRHitObject.getBesRedundancy(), SSRHitObject.getSsrNbNucleotides(),\n-                                                                                                                                                                                           SSRHitObject.getSsrMotif(), SSRHitObject.getSsrMotifNumber(),\n-                                                                                                                                                                                           SSRHitObject.getSsrStart(), SSRHitObject.getSsrEnd(), ssrSeq)\n-            gffLine = \'%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\n\' % (col1, col2, col3, col4, col5, col6, col7, col8, col9)\n-            listGffLines.append(gffLine)\n-            \n-        return listGffLines\n-    \n-    def convertSSRPositionsToChromPositions(self, ssrPos, chromPosStart, chromPosEnd, strand):\n-        if strand == \'+\':\n-            newPos =  int(chromPosStart) + int(ssrPos) - 1\n-        elif strand == \'-\':\n-            newPos =  int(chromPosEnd) - int(ssrPos) + 1\n-        return newPos\n-    \n-    def getSsrSeq(self, motif, nbMotif):\n-        ssrSeq = motif * int(nbMotif)\n-        return ssrSeq\n-    \n-    def _createGFFOutputFile(self):\n-        GFFfile = open(self._outputFileGFF, \'w\')\n-        GFFfile.write("##gff-version 3\\n")\n-        GFFfile.close()\n-        \n-    def _printGFFLinesToOutputFile(self, lLinesToPrint):\n-        GFFfile = open(self._outputFileGFF, \'a\')\n-        for line in lLinesToPrint:\n-            GFFfile.write(line)\n-        GFFfile.close()\n-\n-if __name__ == \'__main__\':\n-    iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()\n-    iCrossSsrAndBesMappedByBlatToGff.setAttributesFromCmdLine()\n-    iCrossSsrAndBesMappedByBlatToGff.run()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/ElandParser.py
--- a/commons/core/parsing/ElandParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,126 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure import Mapping
-
-class ElandParser(MapperParser):
-    """A class that parses ELAND format"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(ElandParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(ElandParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["eland"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def getInfos(self):
-        super(ElandParser, self).getInfos()
-        
-
-    def parseLine(self, line):
-
-        line = line.strip()
-
-        fields = line.split("\t")
-        if len(fields) < 22:
-            sys.exit("Line %d '%s' does not look like a ELAND line (number of fields is %d instead of 22)" % (self.currentLineNb, line, len(fields)))
-
-        flowCell = fields[0]
-        run = fields[1]
-        lane = fields[2]
-        tile = fields[3]
-        xcoord = fields[4]
-        ycoord = fields[5]
-        index = fields[6]
-        number = fields[7]
-        read = fields[8]
-        quality = fields[9]
-        chromosome = fields[10]
-        contig = fields[11]
-        position = fields[12]
-        strand = fields[13]
-        description = fields[14]
-        singleScore = fields[15]
-        pairScore = fields[16]
-        partnerChromosome = fields[17]
-        partnerContig = fields[18]
-        partnerOffset = fields[19]
-        partnerStrand = fields[20]
-        filtering = fields[21]
-
-        if number != "1":
-            sys.exit("S-MART cannot handle pair-end reads yet!")
-
-        # nothing found
-        if position == "":
-            return None
-
-        name = "%s_%s:%s:%s:%s:%s#0/1" % (flowCell, run, lane, tile, xcoord, ycoord)
-        direction = 1 if strand == "F" else -1
-        nbMismatches = 0
-        for char in description:
-            if ord("A") <= ord(char) and ord(char) <= ord("Z"):
-                nbMismatches += 1
-
-        mapping = Mapping()
-        mapping.setTagValue("qualityString", quality)
-        
-        mapping.queryInterval.setName(name)
-        mapping.queryInterval.setDirection(direction)
-        mapping.queryInterval.setStart(1)
-        mapping.queryInterval.setEnd(len(read))
-
-        mapping.targetInterval.setChromosome(chromosome)
-        mapping.targetInterval.setStart(int(position))
-        mapping.targetInterval.setEnd(int(position) + len(read))
-        mapping.targetInterval.setDirection(1)
-
-        mapping.setSize(len(read))
-        mapping.setDirection(direction)
-
-        mapping.setNbGaps(0)
-        mapping.setNbMismatches(nbMismatches)
-        mapping.setTagValue("score", int(singleScore))
-
-        if filtering == "Y":
-            return mapping
-        # mapping filtered out
-        return None
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/ElandParser.pyc
b
Binary file commons/core/parsing/ElandParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/ExoParser.py
--- a/commons/core/parsing/ExoParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,137 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure.Mapping import Mapping
-from SMART.Java.Python.structure.SubMapping import SubMapping
-
-class ExoParser(MapperParser):
-    """A class that parses the output of Exonerate - roll your own format"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(ExoParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(ExoParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["exo", "exonerate"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        while "Hostname" not in self.handle.readline():
-            self.currentLineNb += 1
-            pass
-
-
-    def parseLine(self, line):
-        
-        if line == "-- completed exonerate analysis\n":
-            return None
-        
-        m = re.search(r"^\s*(\S+)\s+(\d+)\s+(\d+)\s+[+-]\s+(\S+)\s+(\d+)\s+(\d+)\s+([+-])\s+\d+\s+(\d+)\s+(\S.*)$", line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a RYO format" % (self.currentLineNb, line))
-
-        mapping = Mapping()
-        name = m.group(1)
-        queryStart = min(int(m.group(2)), int(m.group(3)))
-        queryEnd = max(int(m.group(2)), int(m.group(3)))-1
-        chromosome = m.group(4)
-        targetStart = min(int(m.group(5)), int(m.group(6)))
-        targetEnd = max(int(m.group(5)), int(m.group(6)))-1
-        direction = m.group(7)
-        nbMismatches = int(m.group(8))
-        rest = m.group(9).strip()
-        
-        nbGaps = 0
-        queryOffset = 0
-        targetOffset = 0
-        
-        subMapping = None
-        m = re.search(r"^(\w)\s+(\d+)\s+(\d+)", rest)
-        while m != None:
-            queryDistance    = int(m.group(2))
-            targetDistance = int(m.group(3))
-            if m.group(1) == "M":
-                if subMapping == None:
-                    subMapping = SubMapping()
-    
-                    subMapping.setSize(queryDistance)
-                    subMapping.setDirection(direction)
-        
-                    subMapping.queryInterval.setName(name)
-                    subMapping.queryInterval.setStart(queryStart + queryOffset)
-                    subMapping.queryInterval.setDirection(direction)
-        
-                    subMapping.targetInterval.setChromosome(chromosome)
-                    subMapping.targetInterval.setStart(targetStart + targetOffset)
-                    subMapping.targetInterval.setDirection(1)
-    
-            elif m.group(1) == "G":
-                nbGaps += max(queryDistance, targetDistance)
-                
-            elif m.group(1) == "I" or m.group(1) == "5" or m.group(1) == "3":
-                if subMapping != None:
-                    subMapping.queryInterval.setEnd(queryStart + queryOffset - 1)
-                    subMapping.targetInterval.setEnd(targetStart + targetOffset - 1)
-                    mapping.addSubMapping(subMapping)
-                    subMapping = None
-            else:
-                sys.exit("Cannot understand sign '%s' in line %s" % (m.group(1), line))
-            
-            queryOffset += queryDistance
-            targetOffset += targetDistance
-            rest = rest[m.end():].strip()
-            m = re.search(r"^(\w)\s+(\d+)\s+(\d+)", rest)
-            
-        if subMapping != None:
-            subMapping.queryInterval.setEnd(queryStart + queryOffset - 1)
-            subMapping.targetInterval.setEnd(targetStart + targetOffset - 1)
-            mapping.addSubMapping(subMapping)
-                        
-        mapping.setNbMismatches(nbMismatches)
-        mapping.setNbGaps(nbGaps)
-        mapping.setDirection(direction)
-
-        mapping.queryInterval.setName(name)
-        mapping.queryInterval.setStart(queryStart)
-        mapping.queryInterval.setEnd(queryEnd)
-
-        mapping.targetInterval.setChromosome(chromosome)
-        mapping.targetInterval.setStart(targetStart)
-        mapping.targetInterval.setEnd(targetEnd)
-
-        return mapping
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/ExoParser.pyc
b
Binary file commons/core/parsing/ExoParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/FastaParser.py
--- a/commons/core/parsing/FastaParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,173 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from commons.core.parsing.SequenceListParser import SequenceListParser
-from SMART.Java.Python.structure.Sequence import Sequence
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-class FastaParser(SequenceListParser):
- """A class that reads a list of sequences in FASTA"""
-
- def __init__(self, fileName, verbosity = 0):
- super(FastaParser, self).__init__(fileName, verbosity)
- self.tags = {}
-
-
- def getTags(self):
- return self.tags
-
-
- def getFileFormats():
- return ["fasta", "mfa", "fas"]
- getFileFormats = staticmethod(getFileFormats)
-
-
- def getInfos(self):
- """
- Get some generic information about the sequences
- """
- self.nbSequences = 0
- self.size = 0
- self.reset()
- progress = UnlimitedProgress(100000, "Reading input file", self.verbosity - 9)
- for line in self.handle:
- line = line.strip()
- if line == "":
- continue
- if line[0] == ">":
- self.nbSequences += 1
- else:
- self.size += len(line)
- progress.inc()
- progress.done()
- self.reset()
-
-
- def parseOne(self):
- """
- Parse only one element in the file
- """
- name  = None
- string = ""
-
- if self.currentLine != None:
- if self.currentLine[0] != ">":
- raise Exception("First line is weird: %s" % (self.currentLine))
- name = self.currentLine[1:].split()[0]
- self.currentLine = None
-
- for line in self.handle:
- line = line.strip()
- if line == "":
- pass
- elif line[0] == ">":
- if name == None:
- name = line[1:].split()[0]
- else:
- self.currentLine = line
- return Sequence(name, string)
- else:
- string += line
-
- if name == None:
- return None
- return Sequence(name, string)
-
-
- def setTags(self):
- mark = self.handle.tell()
- thisTag = mark
-
- line = self.handle.readline()
- while line != "":
- if line[0] == ">":
- line = line.strip()
- self.tags[line[1:].split()[0]] = thisTag
- thisTag = self.handle.tell()
- line = self.handle.readline()
-
- self.handle.seek(mark)
-
-
- def getSubSequence(self, chromosome, start, end, direction, name = None):
- if not self.tags:
- self.setTags()
-
- if chromosome not in self.tags:
- raise Exception("Cannot find " + chromosome)
-
- if name == None:
- name = "%s:%d-%d (%d)" % (chromosome, start, end, direction)
- sequence = Sequence(name)
-
- # switch from 0-based to 1-based coordinates
- start -= 1
- end   -= 1
-
- self.handle.seek(self.tags[chromosome])
- line = self.handle.readline().strip()
- if line != ">" + chromosome:
- raise Exception("Arrived in a wrong place (got %s)" % (line))
-
- position1 = self.handle.tell()
- line   = self.handle.readline().strip()
- position2 = self.handle.tell()
- size   = len(line)
- address   = position1 + ((start - (start % size)) / size) * (position2 - position1);
-
- count  = max(0, start - (start % size));
- self.handle.seek(address)
-
- newSequence = ""
- for line in self.handle:
- line = line.strip()
-
- if line[0] == ">":
- break
-
- subStart = start - count
- if subStart < 0:
- subStart = 0
- subEnd  = end - count
- subSize = subEnd - subStart + 1
- if subSize + subStart > len(line):
- subSize = len(line) - subStart
- if subEnd < 0:
- break
- if subStart <= len(line):
- newSequence += line[subStart:subStart+subSize]
- count += len(line)
-
- if newSequence == "":
- raise Exception("Error, sequence %s is empty" % (name))
- sequence.sequence = newSequence
- if direction == -1:
- sequence.reverseComplement()
- return sequence
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/FastaParser.pyc
b
Binary file commons/core/parsing/FastaParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/FastqParser.py
--- a/commons/core/parsing/FastqParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,104 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from commons.core.parsing.SequenceListParser import SequenceListParser
-from SMART.Java.Python.structure.Sequence import Sequence
-
-class FastqParser(SequenceListParser):
-    """A class that reads a list of sequences in FASTQ format"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(FastqParser, self).__init__(fileName, verbosity)
-
-
-    def getFileFormats():
-        return ["fastq", "mfq"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def getInfos(self):
-        """
-        Get some generic information about the sequences
-        """
-        self.nbSequences = 0
-        self.reset()
-        if self.verbosity >= 10:
-            print "Getting information on %s." % (self.fileName)
-
-        nbLines = 0
-        for line in self.handle:
-            line = line.strip()
-            if line == "":
-                continue
-            nbLines += 1
-            if self.verbosity >= 10 and nbLines % 400000 == 0:
-                sys.stdout.write("    %d sequences read\r" % (nbLines / 4))
-                sys.stdout.flush()
-        self.reset()
-        self.nbSequences = nbLines / 4
-        if self.verbosity >= 10:
-            print "    %d sequences read" % (self.nbSequences)
-            print "Done."
-
-
-    def parseOne(self):
-        """
-        Parse only one element in the file
-        """
-        string = ""
-        quality = ""
-        lineType = 0
-
-        for line in self.handle:
-            line = line.strip()
-            if lineType == 0:
-                if line[0] != "@":
-                    raise Exception("Line '%s' should start with '@'!" % (line))
-                name = line[1:]
-                inSequence = True
-                inQuality = False
-            elif lineType == 1:
-                string = line
-            elif lineType == 2:
-                if line[0] != "+":
-                    sys.exit("Line '%s' should start with '+'!" % (line))
-                if line[1:] != name and line != "+":
-                    sys.exit("Weird difference in sequence and quality names (%s and %s) while parsing FASTQ file %s." % (name, line[1:], self.fileName))
-                inQuality = True
-                inSequence = False
-            elif lineType == 3:
-                quality = line
-            lineType += 1
-            if lineType == 4:
-                sequence = Sequence(name, string)
-                sequence.setQuality(quality)
-                return sequence
-                
-        return None
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/FastqParser.pyc
b
Binary file commons/core/parsing/FastqParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/FindRep.py
--- a/commons/core/parsing/FindRep.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,113 +0,0 @@
-import re
-from xml.sax.handler import ContentHandler
-
-class FindRep( ContentHandler ):
-    def __init__(self,outfileName, filter=0,count=0):
-        self.inWindowContent = 0
-        self.inSeqNameContent = 0
-        self.inStartContent = 0
-        self.inEndContent = 0
-        self.inPeriodContent = 0
-        self.inUnitContent = 0
-        self.inScoreContent = 0
-        self.count = count
-        self._outfileName = outfileName
-        self.filter=filter
-    
-    def startDocument(self):
-        self._fileout = open(self._outfileName,"w")
-        
-    def startElement(self,name,attrs):
-        if name=="window":
-            self.inWindowContent=1
-        elif name=="sequence-name":
-            self.inSeqNameContent=1
-            self.seqname=""
-        elif name=="repeat":
-            self.inRepContent=1
-            self.start=""
-            self.end=""
-            self.period=""
-            self.type={}
-        elif name=="start":
-            self.inStartContent=1
-        elif name=="end":
-            self.inEndContent=1
-        elif name=="period":
-            self.inPeriodContent=1
-        elif name=="unit":
-            self.inUnitContent=1
-            self.unit=""
-        elif name=="score":
-            self.inScoreContent=1
-            self.score=""
-
-    def characters(self,ch):
-        if self.inSeqNameContent:
-            self.seqname+=ch
-        elif self.inStartContent:
-            self.start+=ch
-        elif self.inEndContent:
-            self.end+=ch
-        elif self.inPeriodContent:
-            self.period+=ch            
-        elif self.inUnitContent:
-            self.unit+=ch            
-        elif self.inScoreContent:
-            self.score+=ch            
-
-    def endElement(self,name):
-        if name=="window":
-            self.inWindowContent=0
-        elif name=="sequence-name":
-            self.inSeqNameContent=0
-        elif name=="repeat":
-            self.inRepContent=0
-            start=int(self.start)
-            end=int(self.end)
-            period=int(self.period)
-            score=float(self.score)
-            if score>self.filter:
-                return
-            max = 0
-            self.count+=1
-            for k,n in self.type.items():
-                if n>max:
-                    max = n
-                    k_max = k
-
-            m=re.match("^[0-9]+.+\{Cut\}",self.seqname)
-            if m!=None:
-                seqname=self.seqname[m.start(0):m.end(0)-5].rstrip()
-                seqname=re.sub("^[0-9]+ ","",seqname).lstrip()
-                tok=self.seqname[m.end(0):].split("..")
-                astart=start+int(tok[0])-1
-                aend=end+int(tok[0])-1
-            else:
-                astart=start
-                aend=end
-                seqname=self.seqname
-            if len(k_max) > 100:
-                k_max=k_max[:48]+"..."+k_max[-51:]
-            strout="%d\t(%s)%d\t%s\t%d\t%d"%\
-                               (self.count,k_max,(abs(start-end)+1)/period,\
-                                seqname,astart,aend)
-            self._fileout.write("%s\n"%(strout))
-
-        elif name=="start":
-            self.inStartContent=0
-        elif name=="end":
-            self.inEndContent=0
-        elif name=="period":
-            self.inPeriodContent=0
-        elif name=="score":
-            self.inScoreContent=0
-        elif name=="unit":
-            self.inUnitContent=0
-            if self.type.has_key(self.unit):
-                self.type[self.unit]+=1
-            else:
-                self.type[self.unit]=1
-                
-    def endDocument(self):  
-        self._fileout.close()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/GbParser.py
--- a/commons/core/parsing/GbParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,111 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.structure.Transcript import Transcript
-from commons.core.parsing.TranscriptListParser import TranscriptListParser
-
-
-class GbParser(TranscriptListParser):
-    """A class that parses a GBrowse file and create a transcript list"""
-
-
-    def __init__(self, fileName, verbosity = 0):
-        self.reference = None
-        self.color         = None
-        super(GbParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(GbParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["gb", "gbrowse"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        for line in self.handle:
-            self.currentLineNb += 1
-            line = line.strip()
-            m = re.search(r"^\s*bgcolor\s*=\s*(\S+)\s*$", line)
-            if m != None:
-                self.color = m.group(1)
-            if line == "":
-                return
-
-
-    def parseLine(self, line):
-        transcript = Transcript()
-        # first line (reference)
-        m = re.search(r"^\s*reference\s*=\s*(\S+)\s*$", line)
-        if m != None:
-            self.reference = m.group(1)
-            for line in self.handle:
-                line = line.strip()
-                self.currentLineNb += 1
-                break
-        # second line (genomic coordinates)
-        m = re.search(r"^\s*READS\s+(\S+)\s+(\S+)\s+\"([^\"]*)\"\s*$", line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a GBrowse format" % (self.currentLineNb, line))
-        if self.reference == None:
-            sys.exit("Cannot get reference of GBrowse line %d '%s'" % (self.currentLineNb, line))
-        transcript.setChromosome(self.reference)
-        transcript.setName(m.group(1))
-        transcript.setComment(m.group(3))
-        # exons
-        exons = m.group(2).split(",")
-        transcriptStart = 1000000000
-        transcriptEnd = 0
-        direction = 0
-        for exon in exons:
-            m = re.search(r"^(\d+)-(\d+)$", exon)
-            if m == None:
-                sys.exit("\nCannot read GBrowse exon line %d '%s'" % (self.currentLineNb, exon))
-            interval = Interval()
-            interval.setChromosome(transcript.chromosome)
-            direction += int(m.group(2)) - int(m.group(1))
-            start = min(int(m.group(1)), int(m.group(2)))
-            end     = max(int(m.group(1)), int(m.group(2)))
-            interval.setStart(start)
-            interval.setEnd(end)
-            transcriptStart = min(transcriptStart, start)
-            transcriptEnd     = max(transcriptEnd, end)
-            transcript.addExon(interval)
-        transcript.setStart(transcriptStart)
-        transcript.setEnd(transcriptEnd)
-        transcript.setDirection(direction)
-        for exon in transcript.getExons():
-            exon.setDirection(direction)
-        return transcript
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/GffParser.py
--- a/commons/core/parsing/GffParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,149 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.structure.Transcript import Transcript
-from commons.core.parsing.TranscriptListParser import TranscriptListParser
-
-
-class GffParser(TranscriptListParser):
- """A class that parses a GFF file and create a transcript list"""
-
-
- def __init__(self, fileName, verbosity = 0):
- super(GffParser, self).__init__(fileName, verbosity)
-
-
- def __del__(self):
- super(GffParser, self).__del__()
-
-
- def getFileFormats():
- return ["gff", "gff2", "gff3"]
- getFileFormats = staticmethod(getFileFormats)
-
-
- def skipFirstLines(self):
- pass
-
-
- def getInfos(self):
- self.chromosomes = set()
- self.nbTranscripts = 0
- self.size = 0
- self.reset()
- if self.verbosity >= 10:
- print "Getting information on %s." % (self.fileName)
- self.reset()
- for line in self.handle:
- line = line.strip()
- if line == "" or line[0] == "#":
- continue
- parts = line.split("\t")
- if len(parts) != 9:
- raise Exception("Error! Line '%s' has %d tab-separated fields instead of 9!" % (line, len(parts)))
- self.chromosomes.add(parts[0])
- if parts[8].find("Parent") == -1:
- self.nbTranscripts += 1
- else:
- self.size += max(int(parts[3]), int(parts[4])) - min(int(parts[3]), int(parts[4])) + 1
- if self.verbosity >= 10 and self.nbTranscripts % 100000 == 0:
- sys.stdout.write(" %d transcripts read\r" % (self.nbTranscripts))
- sys.stdout.flush()
- self.reset()
- if self.verbosity >= 10:
- print " %d transcripts read" % (self.nbTranscripts)
- print "Done."
-
-
- def parseLine(self, line):
- if not line or line[0] == "#":
- return None
- m = re.search(r"^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+([+-.])\s+(\S+)\s+(\S.*)$", line)
- if m == None:
- raise Exception("\nLine %d '%s' does not have a GFF format\n" % (self.currentLineNb, line))
- interval = Interval()
- interval.setChromosome(m.group(1))
- interval.setName("unnamed transcript")
- interval.setStart(min(int(m.group(4)), int(m.group(5))))
- interval.setEnd(max(int(m.group(4)), int(m.group(5))))
- if m.group(7) == ".":
- interval.setDirection("+")
- else:
- interval.setDirection(m.group(7))
- interval.setTagValue("feature", m.group(3))
- if m.group(6).isdigit():
- interval.setTagValue("score", m.group(6))
-
- remainings = m.group(9).split(";")
- for remaining in remainings:
- remaining = remaining.strip()
- if remaining == "":
- continue
- posSpace = remaining.find(" ")
- posEqual = remaining.find("=")
- if posEqual != -1 and (posEqual < posSpace or posSpace == -1):
- parts = remaining.split("=")
- else:
- parts = remaining.split()
- field = parts[0].strip()
- value = " ".join(parts[1:]).strip(" \"")
- if field in ("Name", "name", "Sequence", "TE", "SAT"):
- interval.setName(value)
- else:
- try:
- intValue = int(value)
- interval.setTagValue(field, intValue)
- except ValueError:
- interval.setTagValue(field, value)
-
- self.currentTranscriptAddress = self.previousTranscriptAddress
- if "Parent" in interval.getTagNames():
- if self.currentTranscript == None:
- raise Exception("GFF file does not start with a transcript! First line is '%s'." % (line.strip()))
- if interval.getTagValue("Parent") != self.currentTranscript.getTagValue("ID"):
- raise Exception("Exon '%s' is not right after its transcript in GFF file!" % (interval))
- self.currentTranscript.addExon(interval)
- if interval.name == None:
- interval.name = self.currentTranscript.name
- return None
-
- transcript = self.currentTranscript
- self.currentTranscript = Transcript()
- self.currentTranscript.copy(interval)
- self.previousTranscriptAddress = self.currentAddress
-
- if transcript != None and transcript.name.startswith("unnamed"):
- if "ID" in transcript.getTagNames():
- transcript.name = transcript.getTagValue("ID")
- else:
- transcript.name = "unnamed transcript %s" % (self.currentLineNb)
- return transcript
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/GffParser.pyc
b
Binary file commons/core/parsing/GffParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/GtfParser.py
--- a/commons/core/parsing/GtfParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,113 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.structure.Transcript import Transcript
-from commons.core.parsing.TranscriptListParser import TranscriptListParser
-
-
-class GtfParser(TranscriptListParser):
-    """A class that parses a GTF file and create a transcript list"""
-
-
-    def __init__(self, fileName, verbosity = 0):
-        super(GtfParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(GtfParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["gtf", "gtf2"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def parseLine(self, line):
-        if line[0] == "#":
-            return None
-        m = re.search(r"^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+([+-.])\s+(\S+)\s+(\S.*)$", line)
-        if m == None:
-            raise Exception("\nLine %d '%s' does not have a GTF format\n" % (self.currentLineNb, line))
-        interval = Interval()
-        interval.setChromosome(m.group(1))
-        interval.setName("unnamed transcript")
-        interval.setStart(min(int(m.group(4)), int(m.group(5))))
-        interval.setEnd(max(int(m.group(4)), int(m.group(5))))
-        if m.group(7) == ".":
-            interval.setDirection("+")
-        else:
-            interval.setDirection(m.group(7))
-        if m.group(6).isdigit():
-            interval.setTagValue("score", m.group(6))
-        type = m.group(3)
-
-        if type not in ("transcript", "exon"):
-            return None
-
-        remainings = m.group(9).split(";")
-        for remaining in remainings:
-            remaining = remaining.strip()
-            if remaining == "":
-                continue
-            parts = remaining.split(" ", 1)
-            field = parts[0].strip()
-            value = " ".join(parts[1:]).strip(" \"")
-            if field == "transcript_id":
-                interval.setTagValue("ID", value)
-            elif field == "gene_name":
-                interval.setName(value)
-            elif field == "transcript_name":
-                interval.setName(value)
-            elif field == "exon_number":
-                continue
-            else:
-                try:
-                    intValue = int(value)
-                    interval.setTagValue(field, intValue)
-                except ValueError:
-                    interval.setTagValue(field, value)
-
-        self.currentTranscriptAddress = self.previousTranscriptAddress
-        if self.currentTranscript == None or interval.getTagValue("ID") != self.currentTranscript.getTagValue("ID"):
-            transcript = self.currentTranscript
-            self.currentTranscript = Transcript()
-            self.currentTranscript.copy(interval)
-            self.currentTranscript.setTagValue("feature", "transcript")
-            self.previousTranscriptAddress = self.currentAddress
-            return transcript
-        if type == "exon":
-            self.currentTranscript.addExon(interval)
-        return None
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/GtfParser.pyc
b
Binary file commons/core/parsing/GtfParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/MapParser.py
--- a/commons/core/parsing/MapParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,67 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Mapping import Mapping
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure.SubMapping import SubMapping
-from SMART.Java.Python.misc import Utils
-from SMART.Java.Python.structure.Transcript import Transcript
-from commons.core.parsing.TranscriptListParser import TranscriptListParser
-
-
-class MapParser(TranscriptListParser):
-    """A class that parses the repet .map files"""
-
-    def __init__(self, fileName, verbosity = 0):
-        self._lineParseRe = re.compile(r"(?P<seqName>\w+)\s(?P<chrName>\w+)\s(?P<sStart>\d+)\s(?P<sEnd>\d+)")
-        TranscriptListParser.__init__(self, fileName, verbosity)
-
-    def getFileFormats():
-        return ["map"]
-    getFileFormats = staticmethod(getFileFormats)
-
-    def skipFirstLines(self):    
-        return
-        
-    def parseLine(self, line):
-        m = self._lineParseRe.search(line)
-        
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a map format" % (self.currentLineNb, line))
-            
-        transcript = Transcript()
-        transcript.setChromosome(m.group("chrName"))
-        transcript.setStart(min(int(m.group("sStart")), int(m.group("sEnd"))))
-        transcript.setEnd(max(int(m.group("sStart")), int(m.group("sEnd"))))
-        transcript.setName(m.group("seqName"))
-        transcript.setDirection(1)
-
-        return transcript
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/MapParser.pyc
b
Binary file commons/core/parsing/MapParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/MapperParser.py
--- a/commons/core/parsing/MapperParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,129 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from SMART.Java.Python.structure.Mapping import Mapping
-
-
-class MapperParser(object):
-    """An interface that parses the output of a generic mapper"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(MapperParser, self).__init__()
-        self.verbosity = verbosity
-        self.nbMappings = None
-        self.chromosomes = None
-        self.size = None
-        self.currentMapping = Mapping()
-        self.handle = open(fileName)
-        self.currentLineNb = 0
-        self.skipFirstLines()
-        self.fileName = fileName
-        self.startingPoint = self.handle.tell()
-
-
-    def __del__(self):
-        self.handle.close()
-        
-
-    def reset(self):
-        self.handle.seek(self.startingPoint)
-        self.currentLineNb = 0
-
-
-    def getNextMapping(self):
-        for line in self.handle:
-            mapping = self.parseLine(line)
-            self.currentLineNb += 1
-            if mapping != None:
-                return mapping
-        return False
-        
-        
-    def getIterator(self):
-        self.reset()
-        mapping = self.getNextMapping()
-        while mapping:
-            yield mapping
-            mapping = self.getNextMapping()
-                
-                
-    def getInfos(self):
-        self.chromosomes = set()
-        self.nbMappings = 0
-        self.size = 0
-        self.reset()
-        if self.verbosity >= 10:
-            print "Getting information."
-        for mapping in self.getIterator():
-            transcript = mapping.getTranscript()
-            self.chromosomes.add(transcript.getChromosome())
-            self.nbMappings += 1
-            self.size += transcript.getSize()
-            if self.verbosity >= 10 and self.nbMappings % 100000 == 0:
-                sys.stdout.write("    %d mappings read\r" % (self.nbMappings))
-                sys.stdout.flush()
-        self.reset()
-        if self.verbosity >= 10:
-            print "    %d mappings read" % (self.nbMappings)
-            print "Done."
-
-
-    def getNbMappings(self):
-        if self.nbMappings != None:
-            return self.nbMappings
-        self.getInfos()
-        return self.nbMappings
-
-
-    def getNbItems(self):
-        return self.getNbMappings()
-
-
-    def getChromosomes(self):
-        if self.chromosomes != None:
-            return self.chromosomes
-        self.getInfos()
-        return self.chromosomes
-    
-    
-    def getSize(self):
-        if self.size != None:
-            return self.size
-        self.getInfos()
-        return self.size
-    
-    
-    def getNbNucleotides(self):
-        return self.getSize()
-
-
-    def setDefaultTagValue(self, name, value):
-        for mapping in self.getIterator():
-            mapping.setTagValue(name, value)
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/MapperParser.pyc
b
Binary file commons/core/parsing/MapperParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/MaqParser.py
--- a/commons/core/parsing/MaqParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,77 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Mapping import Mapping
-from commons.core.parsing.MapperParser import MapperParser
-
-
-class MaqParser(MapperParser):
-    """A class that parses the output of Maq"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(MaqParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(MaqParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["maq"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def parseLine(self, line):
-        m = re.search(r"^\s*(\S+)\s+(\S+)\s+(\d+)\s+([+-])\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s*$", line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a MAQ format" % (self.currentLineNb, line))
-
-        mapping = Mapping()
-
-        mapping.targetInterval.setStart(int(m.group(3)))
-        mapping.targetInterval.setSize(int(m.group(14)))
-        mapping.targetInterval.setChromosome(m.group(2))
-
-        mapping.queryInterval.setStart(1)
-        mapping.queryInterval.setSize(int(m.group(14)))
-        mapping.queryInterval.setName(m.group(1))
-
-        mapping.setDirection(m.group(4))
-        mapping.setSize(int(m.group(14)))
-        mapping.setNbMismatches(int(m.group(10)))
-        mapping.setRank(1)
-        mapping.setNbOccurrences(int(m.group(12)) + int(m.group(13)))
-
-        return mapping
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/MaqParser.pyc
b
Binary file commons/core/parsing/MaqParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/MrepsToSet.py
--- a/commons/core/parsing/MrepsToSet.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,31 +0,0 @@
-from commons.core.parsing.FindRep import FindRep
-from xml.sax import make_parser
-from xml.sax.handler import feature_namespaces
-import os
-
-
-class MrepsToSet(object):
-
-    def __init__(self, mrepsInputFileName="", mrepsOuputFileName="", outputFileName=None, errorFilter=0):
-        self._mrepsInputFileName = mrepsInputFileName
-        self._mrepsOuputFileName = mrepsOuputFileName
-        self._outputFileName = outputFileName or "%s.Mreps.set" % mrepsOuputFileName  
-        self._errorFilter = errorFilter
-        
-    def run(self):
-        xmlParser = make_parser()
-        xmlParser.setFeature( feature_namespaces, 0 )
-        xmlParser.setContentHandler( FindRep( self._outputFileName, self._errorFilter, 0 ) )
-        xmlParser.parse( self._mrepsOuputFileName )
-
-    def clean( self ):
-        """
-        Remove the output file (xml) from Mreps to keep only the 'set' file.
-        """
-        if os.path.exists(self._mrepsOuputFileName):
-            os.remove(self._mrepsOuputFileName)
-        
-        
-        
-
-        
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/Multifasta2SNPFile.py
--- a/commons/core/parsing/Multifasta2SNPFile.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,846 +0,0 @@\n-import re\n-import os\n-import logging\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.seq.BioseqDB import BioseqDB\n-from commons.core.seq.Bioseq import Bioseq\n-from commons.core.LoggerFactory import LoggerFactory\n-\n-DNA_ALPHABET_WITH_N_AND_DELS = set ([\'A\',\'T\',\'G\',\'C\',\'N\',\'-\'])\n-IUPAC = set([\'A\',\'T\',\'G\',\'C\',\'U\',\'R\',\'Y\',\'M\',\'K\',\'W\',\'S\',\'B\',\'D\',\'H\',\'V\',\'N\', \'-\', \'a\',\'t\',\'g\',\'c\',\'u\',\'r\',\'y\',\'m\',\'k\',\'w\',\'s\',\'b\',\'d\',\'h\',\'v\',\'n\'])\n-\n-class Multifasta2SNPFile( object ):\n-\n-    POLYM_TYPE_4_SNP = "SNP"\n-    POLYM_TYPE_4_INSERTION = "INSERTION"\n-    POLYM_TYPE_4_DELETION = "DELETION"\n-    POLYM_DEFAULT_CONFIDENCE_VALUE = "A"\n-    SNP_LENGTH = 1\n-    FLANK_LENGTH = 250\n-    \n-    def __init__(self, taxon, batchName="", geneName=""):\n-        \n-        if(batchName):\n-            self._batchName = batchName\n-            \n-        if(geneName):\n-            self._geneName = geneName\n-\n-        self._taxon = taxon\n-        self._outSubSNPFileName = "SubSNP.csv"\n-        self._outAlleleFileName = "Allele.csv"\n-        self._outIndividualFileName = "Individual.csv"\n-        self._outSequenceFSAFileName = "Sequences.fsa"\n-        self._outSequenceCSVFileName = "Sequences.csv"\n-        self._outBatchFileName = "Batch.txt"\n-        self._outBatchLineFileName = "BatchLine.csv"\n-        self._logFileName = "multifasta2SNP.log"\n-        \n-        self._lBatchFileResults = []\n-        self._lSubSNPFileResults = []\n-        self._lRefSequences = []\n-        self._lIndividualFileResults = []\n-        self._lBatchLineFileResults = []\n-        self._dIndividualNumbers4SubSNPResults = {}\n-        self._dAlleleFileResults = {}\n-        \n-        \n-        self.dcurrentIndel = {}\n-        self.lIndelsOfTheCurrentLine = []\n-        self.lIndelsOverAllLines = []\n-        self.dSNPsPositions = {}\n-        \n-        self._iCurrentLineNumber = 0\n-        self._currentBatchNumber = 1\n-        self.currentLineName = ""\n-        self.currentNucleotide = ""\n-        self.currentPosition = 0\n-        self._sPolymConfidenceValue = Multifasta2SNPFile.POLYM_DEFAULT_CONFIDENCE_VALUE        \n-        self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_SNP\n-        self._iPolymLength = Multifasta2SNPFile.SNP_LENGTH\n-        self._fileUtils = FileUtils()\n-        \n-        if self._fileUtils.isRessourceExists(self._logFileName):\n-            os.remove(self._logFileName)\n-        self._logFile = LoggerFactory.createLogger(self._logFileName, logging.INFO, "%(asctime)s %(levelname)s: %(message)s")\n-   \n-    def runOneBatch( self, inFileName):\n-        self._currentFileName = inFileName\n-        #TODO: methode a virer; n\'utiliser au final que runOneBatchWithoutWriting\n-        self._wrapper = self.createWrapperFromFile(inFileName)\n-        self._lBatchFileResults = self.completeBatchList()\n-        self.detectSNPsAndIndels(self._wrapper) \n-        self._writeAllOutputFiles()\n-        self._currentBatchNumber += 1\n-        \n-    def runOneBatchWithoutWriting( self, inFileName):\n-        self.lIndelsOverAllLines = []\n-        self._currentFileName = inFileName\n-        self._wrapper = self.createWrapperFromFile(inFileName)\n-        self._lBatchFileResults = self.completeBatchList()\n-        self.detectSNPsAndIndels(self._wrapper) \n-        self._currentBatchNumber += 1\n-    \n-\n-    def _cleanOutputsInTheCurrentDir(self):\n-        #TODO: create a list of files to be deleted\n-        FileUtils.removeFilesByPattern("*.csv")\n-        if (FileUtils.isRessourceExists(self._outBatchFileName)):\n-            os.remove(self._outBatchFileName)\n-        if (FileUtils.isRessourceExists(self._outSequenceFSAFileName)):\n-            os.remove(self._outSequenceFSAFileName)\n-\n-\n-    def _createOutputObjectsIteratingOnCurrentDir(self):\n-        #TODO: gerer les extensions multiples\n-        extList = [".fasta", ".fsa"]\n-        for dirname, dirnames, filenames in os.walk("."):\n-            filenames.sort()\n-            for filename in filenames:\n-         '..b'elf, batchLineFileName, lBatchLineResults):\n-        outF = open(batchLineFileName, "w")\n-        self._writeBatchLineFileHeader(outF)\n-        for dResult in lBatchLineResults:\n-            self._writeBatchLineFileLine(outF, dResult)\n-        outF.close()\n-        \n-    def _writeSNPFileHeader(self, outF):\n-        for head in Multifasta2SNPFileWriter.SUB_SNP_FILE_HEADER[:-1]:\n-            outF.write(head + self._csvFieldSeparator)\n-        outF.write(Multifasta2SNPFileWriter.SUB_SNP_FILE_HEADER[-1] + self._csvLineSeparator)\n- \n-    def _writeAlleleFileHeader(self, outF):\n-        for head in Multifasta2SNPFileWriter.ALLELE_FILE_HEADER[:-1]:\n-            outF.write(head + self._csvFieldSeparator)\n-        outF.write(Multifasta2SNPFileWriter.ALLELE_FILE_HEADER[-1] + self._csvLineSeparator)\n-        \n-    def _writeIndividualFileHeader(self, outF):\n-        for head in Multifasta2SNPFileWriter.INDIVIDUAL_FILE_HEADER[:-1]:\n-            outF.write(head + self._csvFieldSeparator)\n-        outF.write(Multifasta2SNPFileWriter.INDIVIDUAL_FILE_HEADER[-1] + self._csvLineSeparator)\n-        \n-    def _writeSequenceCSVHeader(self, outF):\n-        for head in Multifasta2SNPFileWriter.SEQUENCE_CSV_FILE_HEADER[:-1]:\n-            outF.write(head + self._csvFieldSeparator)\n-        outF.write(Multifasta2SNPFileWriter.SEQUENCE_CSV_FILE_HEADER[-1] + self._csvLineSeparator)\n-       \n-    def _writeBatchLineFileHeader(self, outF):\n-        for head in Multifasta2SNPFileWriter.BATCH_LINE_FILE_HEADER[:-1]:\n-            outF.write(head + self._csvFieldSeparator)\n-        outF.write(Multifasta2SNPFileWriter.BATCH_LINE_FILE_HEADER[-1] + self._csvLineSeparator)        \n-               \n-    def _writeSNPFileLine(self, outF, dSNP):\n-        outF.write(dSNP[\'subSNPName\'] + self._csvFieldSeparator)\n-        outF.write(dSNP[\'confidenceValue\'] + self._csvFieldSeparator + dSNP[\'type\'] + self._csvFieldSeparator)\n-        outF.write(str(dSNP[\'position\']) + self._csvFieldSeparator + dSNP[\'5flank\'] + self._csvFieldSeparator + dSNP[\'3flank\'] + self._csvFieldSeparator)\n-        outF.write(str(dSNP[\'length\']) + self._csvFieldSeparator + str(dSNP[\'batchNumber\']) + self._csvFieldSeparator)\n-        outF.write(str(dSNP[\'lineName\']) + self._csvFieldSeparator)\n-        outF.write(self._primerType + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + str(dSNP[\'allele\']) + self._csvLineSeparator)\n-\n-    def _writeAlleleFileLine(self, outF, sAllele2Write, iAlleleNumber):\n-        outF.write(str(iAlleleNumber) + self._csvFieldSeparator)\n-        outF.write(sAllele2Write + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvLineSeparator)\n-    \n-    def _writeIndividualFileLine(self, outF, dIndividual):\n-        outF.write(str(dIndividual[\'individualNumber\']) + self._csvFieldSeparator)\n-        outF.write(dIndividual[\'individualName\'] + self._csvFieldSeparator + self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator)\n-        outF.write(dIndividual[\'scientificName\'] + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator+ self._csvFieldSeparator + self._csvFieldSeparator + self._csvLineSeparator)\n-    \n-    def _writeSequenceCSVLine(self, outF, refSeq, taxon):\n-        outF.write(refSeq.header + self._csvFieldSeparator)\n-        outF.write("Reference" + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator)\n-        outF.write(taxon + self._csvLineSeparator)        \n-    \n-    def _writeBatchLineFileLine(self, outF, dResult):\n-        outF.write(str(dResult[\'IndividualNumber\']) + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator)\n-        outF.write(str(dResult[\'BatchNumber\']) + self._csvFieldSeparator + self._csvLineSeparator)\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/MummerParser.py
--- a/commons/core/parsing/MummerParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,93 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure.Mapping import Mapping
-from SMART.Java.Python.structure.SubMapping import SubMapping
-
-class MummerParser(MapperParser):
-    """A class that parses the output of Mummer format"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(MummerParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(MummerParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["mummer"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def parseLine(self, line):
-        mapping = Mapping()
-
-        subMapping = SubMapping()
-
-        # handle header
-        m = re.search(r"^>\s+(\S+)\s+Reverse\s+Len\s+=\s+(\d+)$", line)
-        if m != None:
-            subMapping.queryInterval.setName(m.group(1))
-            subMapping.queryInterval.setSize(int(m.group(2)))
-            subMapping.queryInterval.setDirection(-1)
-        else:
-            m = re.search(r"^>\s+(\S+)\s+Len\s+=\s+(\d+)$", line)
-            if m != None:
-                subMapping.queryInterval.setName(m.group(1))
-                subMapping.queryInterval.setSize(int(m.group(2)))
-                subMapping.queryInterval.setDirection(1)
-            else :
-                sys.exit("Header line %d '%s' is strange in Mummer file" % (self.currentLineNb, line))
-
-        for line in self.handle:
-            self.currentLineNb += 1
-            break
-        line = line.strip()
-
-        # handle line
-        m = re.search(r"^(\w+)\s+(\d+)\s+(\d+)\s+(\d+)$", line)
-        if m != None:
-            subMapping.targetInterval.setName(m.group(1))
-            subMapping.targetInterval.setStart(int(m.group(2)))
-            subMapping.queryInterval.setStart(int(m.group(3)))
-            subMapping.targetInterval.setSize(int(m.group(4)))
-        else:
-            sys.exit("Line %d '%s' is strange in Mummer file" % (self.currentLineNb, line))
-
-        mapping.addSubMapping(subMapping)
-
-        return mapping
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/NCListParser.py
--- a/commons/core/parsing/NCListParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,125 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2012
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from commons.core.parsing.TranscriptListParser import TranscriptListParser
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.ncList.NCList import NCList
-from SMART.Java.Python.ncList.NCListCursor import NCListCursor
-from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-try:
-   import cPickle as pickle
-except:
-   import pickle
-
-
-class NCListParser(TranscriptListParser):
-
-
- def __init__(self, fileName, verbosity = 0):
- self.title = None
- TranscriptListParser.__init__(self, fileName, verbosity)
- self.parse()
-
- def getFileFormats():
- return ["nclist"]
- getFileFormats = staticmethod(getFileFormats)
-
- def skipFirstLines(self):
- return
-
- def parse(self):
- handle                       = open(self.fileName)
- self.sortedFileNames         = pickle.load(handle)
- self.nbElements              = pickle.load(handle)
- self.nbElementsPerChromosome = pickle.load(handle)
- self.ncLists                 = pickle.load(handle)
- for ncList in self.ncLists.values():
- ncList._reopenFiles()
- handle.close()
- self.chromosomes     = sorted(self.nbElementsPerChromosome.keys())
- self.fileNames       = dict([chromosome, self.ncLists[chromosome]._transcriptFileName] for chromosome in self.chromosomes)
- self.currentReader   = None
- self.currentChrIndex = 0
-
- def getSortedFileNames(self):
- return self._sortedFileNames
-
- def getNbElements(self):
- return self._nbElements
-
- def getNbElementsPerChromosome(self):
- return self._nbElementsPerChromosome
-
- def getNCLists(self):
- return self._ncLists
-
- def reset(self):
- self.currentChrIndex = 0
- self.currentReader   = None
-
- def gotoAddress(self, address):
- self.currentReader.gotoAddress(address)
-
- def getCurrentAddress(self):
- return self.getCurrentTranscriptAddress()
-
- def getCurrentTranscriptAddress(self):
- if self.currentReader == None:
- return 0
- return self.currentReader.getCurrentTranscriptAddress()
-
- def getNextTranscript(self):
- if self.currentReader == None:
- self.currentReader = NCListFileUnpickle(self.fileNames[self.chromosomes[0]])
- transcript = self.currentReader.getNextTranscript()
- if transcript == False:
- self.currentChrIndex += 1
- if self.currentChrIndex >= len(self.chromosomes):
- return None
- self.currentReader = NCListFileUnpickle(self.fileNames[self.chromosomes[self.currentChrIndex]])
- transcript = self.currentReader.getNextTranscript()
- return transcript
-
- def getInfos(self):
- self.size = 0
- self.reset()
- progress = UnlimitedProgress(100000, "Getting information on %s." % (self.fileName), self.verbosity-9)
- transcript = self.getNextTranscript()
- for transcript in self.getIterator():
- self.size += transcript.getSize()
- progress.inc()
- progress.done()
- self.reset()
-
- def getNbTranscripts(self):
- return self.nbElements
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/NCListParser.pyc
b
Binary file commons/core/parsing/NCListParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/NucmerParser.py
--- a/commons/core/parsing/NucmerParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,88 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.SubMapping import SubMapping
-from SMART.Java.Python.structure.Mapping import Mapping
-from SMART.Java.Python.structure.Interval import Interval
-from commons.core.parsing.MapperParser import MapperParser
-
-
-class NucmerParser(MapperParser):
-    """A class that parses the output of Nucmer"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(NucmerParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(NucmerParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["nucmer"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def parseLine(self, line):
-        if not line:
-            return None
-        if line[0] == ">":
-            self.currentChromosome = line[1:].split()[0]
-            return None
-        splittedLine = line.strip().split()
-        if len(splittedLine) != 8:
-            raise Exception("Line %d '%s' does not have a NucMer format" % (self.currentLineNb, line))
-
-        subMapping = SubMapping()
-
-        subMapping.targetInterval.setChromosome(self.currentChromosome)
-        subMapping.targetInterval.setName(self.currentChromosome)
-        subMapping.targetInterval.setStart(min(int(splittedLine[0]), int(splittedLine[1])))
-        subMapping.targetInterval.setEnd(max(int(splittedLine[0]), int(splittedLine[1])))
-        subMapping.targetInterval.setDirection(splittedLine[6])
-
-        subMapping.queryInterval.setChromosome(splittedLine[7])
-        subMapping.queryInterval.setName(splittedLine[7])
-        subMapping.queryInterval.setStart(1)
-        subMapping.queryInterval.setEnd(int(splittedLine[3]))
-        subMapping.queryInterval.setDirection("+")
-
-        mapping = Mapping()
-        mapping.addSubMapping(subMapping)
-        mapping.setDirection(splittedLine[6])
-        mapping.setIdentity(float(splittedLine[5]))
-        mapping.setSize(int(splittedLine[3]))
-
-        return mapping
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/PalsToAlign.py
--- a/commons/core/parsing/PalsToAlign.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,66 +0,0 @@
-import time
-import os
-
-class PalsToAlign(object):
-    """
-    Convert the output from PALS (GFF2 format) into the 'align' format.
-    """
-    def __init__(self,inputPalsFileName="" , outputAlignFileName="", removeSameSequences=False):
-        self._removeSameSequences = removeSameSequences
-        self._inputPalsFileName = inputPalsFileName
-        self._outputAlignFileName = outputAlignFileName
-
-    def run (self):
-        file = open(self._inputPalsFileName, "r")
-        tmpFileName = "PalsToAlign%s"%str(os.getpid() ) 
-        tmpFile = open(tmpFileName, "w")
-        
-        for line in file.readlines():
-    
-            if line == "":
-                break
-    
-            data = line.split("\t")
-    
-            qryName = data[0]
-            source = data[1]
-            feature = data[2]
-            qryStart = data[3]
-            qryEnd = data[4]
-            score = data[5]
-            strand = data[6]
-            frame = data[7]
-            attributes = data[8][:-1].split()
-    
-            sbjName = attributes[1]
-            sbjStart = attributes[2]
-            sbjEnd = attributes[3][:-1]
-            percId = (1 - float(attributes[-1])) * 100.0
-    
-            if strand != "+":
-                tmp = sbjStart
-                sbjStart = sbjEnd
-                sbjEnd = tmp
-    
-            if self._removeSameSequences \
-            and "chunk" in qryName and "chunk" in sbjName \
-            and min(int(qryStart), int(qryEnd)) == 1 \
-            and min(int(sbjStart), int(sbjEnd)) == 1 \
-            and percId == 100.0:
-                line = self.inFile.readline()
-                continue
-    
-            if qryStart < qryEnd:
-                alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, "0.0", score, percId)
-            else:
-                alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryEnd, qryStart, sbjName, sbjEnd, sbjStart, "0.0", score, percId)
-    
-            tmpFile.write(alignLine)
-
-        file.close()
-        tmpFile.close()
-    
-        os.system("sort -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n %s > %s" % (tmpFileName, self._outputAlignFileName))
-        os.remove(tmpFileName)
-
-        
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/ParserChooser.py
--- a/commons/core/parsing/ParserChooser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,129 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from commons.core.parsing.TranscriptListParser import TranscriptListParser
-from commons.core.parsing.MapperParser import MapperParser
-from commons.core.parsing.SequenceListParser import SequenceListParser
-from commons.core.parsing.BedParser import BedParser
-from commons.core.parsing.GffParser import GffParser
-from commons.core.parsing.MapperParser import MapperParser
-from commons.core.parsing.CoordsParser import CoordsParser
-from commons.core.parsing.SeqmapParser import SeqmapParser
-from commons.core.parsing.SoapParser import SoapParser
-from commons.core.parsing.Soap2Parser import Soap2Parser
-from commons.core.parsing.BlastParser import BlastParser
-from commons.core.parsing.PslParser import PslParser
-from commons.core.parsing.RmapParser import RmapParser
-from commons.core.parsing.ShrimpParser import ShrimpParser
-from commons.core.parsing.AxtParser import AxtParser
-from commons.core.parsing.ExoParser import ExoParser
-from commons.core.parsing.MaqParser import MaqParser
-from commons.core.parsing.SamParser import SamParser
-from commons.core.parsing.BamParser import BamParser
-from commons.core.parsing.BowtieParser import BowtieParser
-from commons.core.parsing.ElandParser import ElandParser
-from commons.core.parsing.GtfParser import GtfParser
-from commons.core.parsing.FastaParser import FastaParser
-from commons.core.parsing.FastqParser import FastqParser
-from commons.core.parsing.MapParser import MapParser
-from commons.core.parsing.WigParser import WigParser
-from commons.core.parsing.NCListParser import NCListParser
-from commons.core.parsing.PklParser import PklParser
-
-#Attention!! Do not delete the imports!! They are used to know the type of file format!!!
-
-class ParserChooser(object):
-    """
-    A class that finds the correct parser
-    @ivar format: the format
-    @type format: string
-    @ivar type: transcript / mapping / sequence parser
-    @type type: string
-    @ivar parser: the parser
-    @type parser: object
-    @ivar verbosity: verbosity
-    @type verbosity: int        
-    """
-
-    def __init__(self, verbosity = 0):
-        """
-        Constructor
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.type = None
-        self.parserClass = None
-        self.verbosity = verbosity
-    
-
-    def findFormat(self, format, type = None):
-        """
-        Find the correct parser
-        @ivar format: the format
-        @type format: string
-        @ivar type: transcript / mapping / sequence parser (None is all)
-        @type type: string
-        @return: a parser
-        """
-        classes = {}
-        if (type == "transcript"):
-            classes = {TranscriptListParser: "transcript"}
-        elif (type == "mapping"):
-            classes = {MapperParser: "mapping"}
-        elif (type == "sequence"):
-            classes = {SequenceListParser: "sequence"}
-        elif (type == None):
-            classes = {TranscriptListParser: "transcript", MapperParser: "mapping", SequenceListParser: "sequence"}
-        else:
-            raise Exception("Do not understand format type '%s'" % (type))
-
-        for classType in classes:
-            for parserClass in classType.__subclasses__():
-                if format in parserClass.getFileFormats():
-                    self.parserClass = parserClass
-                    self.type = classes[classType]
-                    return
-        raise Exception("Cannot get parser for format '%s'" % (format))
-
-
-    def getParser(self, fileName):
-        """
-        Get the parser previously found
-        @return: the parser
-        """
-        return self.parserClass(fileName, self.verbosity)
-
-
-    def getType(self):
-        """
-        Get the type of parser previously found
-        @return: the type of parser
-        """
-        return self.type
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/ParserChooser.pyc
b
Binary file commons/core/parsing/ParserChooser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/PathNum2Id.py
--- a/commons/core/parsing/PathNum2Id.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,47 +0,0 @@
-class PathNum2Id( object ):
-
-    def __init__(self):
-        self._inFileName = None
-        self._outFileName = None
-
-    def setInFileName(self, fileName):  
-        self._inFileName = fileName  
-        
-    def setOutFileName(self, fileName):  
-        self._outFileName = fileName        
-        
-    def run( self ):
-        """
-        Adapt the path IDs as the input file is the concatenation of several 'path' files.
-        """
-        self._inFile = open( self._inFileName, "r" )
-        self._outFile = open( self._outFileName, "w" )
-        lines = self._inFile.readlines()
-        dID2count = {}
-        count = 1
-        for line in lines:
-            if line == "":
-                break
-            strippedLine = line.strip('\n')
-            data = strippedLine.split("\t")
-            path = data[0]
-            qryName = data[1]
-            qryStart = int(data[2])
-            qryEnd = int(data[3])
-            sbjName = data[4]
-            sbjStart = int(data[5])
-            sbjEnd = int(data[6])
-            BLAST_Eval = data[7]
-            BLAST_score = data[8]
-            percId = data[9]
-            key_id = path + "-" + qryName + "-" + sbjName
-            if key_id not in dID2count.keys():
-                newPath = count
-                count += 1
-                dID2count[ key_id ] = newPath
-            else:
-                newPath = dID2count[ key_id ]
-            cmd = "%i\t%s\t%i\t%i\t%s\t%i\t%i\t%s\t%s\t%s\n" % ( newPath, qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, BLAST_Eval, BLAST_score, percId )
-            self._outFile.write( cmd )
-        self._inFile.close()
-        self._outFile.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/PilerTAToGrouperMap.py
--- a/commons/core/parsing/PilerTAToGrouperMap.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,85 +0,0 @@
-import time
-import os
-
-class PilerTAToGrouperMap(object):
-    """
-   Convert the output file from Piler into grouper format.
-    """
-    def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName):
-        self._inputGffFileName = inputGffFileName
-        self._inputPYRFileName = inputPYRFileName
-        self._inputMOTIFFileName = inputMOTIFFileName
-        self._outFileName = outputFileName
-
-    def run (self):
-        inFileGff = open( self._inputGffFileName, "r" )  
-        inFilePyr = open( self._inputPYRFileName, "r" )   
-        outFile = open(self._outFileName,"w") 
-        
-        #step 0 : get pile Info and write out an info file
-        for pyrLine in inFilePyr.readlines():#-tan_pyr.gff
-            if pyrLine == "":
-                break
-            pileIndex = ""
-            pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid')
-            for gffLine in inFileGff.readlines(): #-tan.gff
-                if gffLine == "":
-                    break
-                if pyrIndex in gffLine:
-                    pileIndex = gffLine.split(';')[1].strip()
-                    break    
-            line = "%s\t%s" % (pileIndex, pyrIndex)
-            outFile.write(line)
-           
-        inFilePyr.close()
-        inFileGff.close()
-        outFile.close()    
-                
-        #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format
-        outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp"
-        outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map"
-        
-        inFileInfo = open(self._outFileName,"r") 
-        inFileMotif = open(self._inputMOTIFFileName, "r" )
-        outFileMotifGrp = open(outFileMotifGrpFileName, "w" )
-        outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" )
-         
-        inFileInfos = inFileInfo.readlines()
-        lineInfoIndex = 0
-        
-        for countMotif,lineMotif in enumerate(inFileMotif.readlines()):
-            if lineMotif == "":
-                    break
-            dataMotif = lineMotif.split(';')
-            motif, pyrNameMotif  = dataMotif[:2]
-            pyrNameMotif = pyrNameMotif.strip()
-            pileNameMotif = ""
-            
-            while lineInfoIndex < len(inFileInfos):
-                lineInfo = inFileInfos[lineInfoIndex]
-                if lineInfo == "":
-                    break
-                if pyrNameMotif in lineInfo:          
-                    pileNameMotif = lineInfo.split('\t')[0]
-                    break
-                lineInfoIndex +=1
-                
-            #translate to Grouper IdFormat
-            pyrID = pyrNameMotif.split(' ')[1]
-            pileID = pileNameMotif.split(' ')[1]
-            dataMotif = motif.split ('\t')
-            chrm = dataMotif [0]
-            start,end = dataMotif [3:5]
-            countMotif += 1
-            memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID
-            
-            stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif)
-            outFileMotifGrp.write( stringMotif)
-    
-            stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end )
-            outFileMotifGrpMap.write( stringGrpMap )    
-          
-        inFileMotif.close()
-        inFileInfo.close()
-        outFileMotifGrp.close()
-        outFileMotifGrpMap.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/PklParser.py
--- a/commons/core/parsing/PklParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,112 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-try:
-  import cPickle as pickle
-except:
-  import pickle
-from SMART.Java.Python.structure.Interval import Interval
-from commons.core.parsing.TranscriptListParser import TranscriptListParser
-from SMART.Java.Python.structure.Transcript import Transcript
-
-
-class PklParser(TranscriptListParser):
- """A class that parses the intern PKL file and create a transcript list"""
-
- def __init__(self, fileName, verbosity = 1):
- self.title = None
- super(PklParser, self).__init__(fileName, verbosity)
- self.handle    = open(fileName, "rb")
- self.verbosity    = verbosity
- self.initAddress   = 0
- self.address    = self.initAddress
- self.over    = False
- self.chromosome = None
-
- def __del__(self):
- super(PklParser, self).__del__()
-
- def getFileFormats():
- return ["pkl"]
- getFileFormats = staticmethod(getFileFormats)
-
-
- def skipFirstLines(self):
- return
-
-
- def reset(self):
- self.handle.seek(0)
- self.initAddress = 0
-
-
- def setChromosome(self, chromosome):
- self.chromosome = chromosome
-
-
- def gotoAddress(self, address):
- self.handle.seek(address)
- self.address = address
-
-
- def getNextTranscript(self):
- self.address = self.handle.tell()
- try:
- transcript = pickle.load(self.handle)
- if self.chromosome != None and transcript.getChromosome() != self.chromosome:
- self.over = True
- return False
- return transcript
- except EOFError:
- self.over = True
- return False
-
-
- def getIterator(self):
- self.gotoAddress(self.initAddress)
- while True:
- transcript = self.getNextTranscript()
- if not transcript:
- self.over = True
- return
- yield transcript
-
-
- def setInitAddress(self, address):
- self.initAddress = address
-
-
- def getCurrentTranscriptAddress(self):
- return self.address
-
-
- def isOver(self):
- return self.over
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/PklParser.pyc
b
Binary file commons/core/parsing/PklParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/PslParser.py
--- a/commons/core/parsing/PslParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,155 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure.Mapping import Mapping
-from SMART.Java.Python.structure.SubMapping import SubMapping
-from SMART.Java.Python.structure.Interval import Interval
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-class PslParser(MapperParser):
-    """A class that parses the output of PSL format (of SSAHA and BLAT)"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(PslParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(PslParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["psl"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def getInfos(self):
-        self.chromosomes = set()
-        self.nbMappings  = 0
-        self.size        = 0
-        self.reset()
-        progress = UnlimitedProgress(100000, "Getting info on PSL file, # mappings read:", self.verbosity)
-        for line in self.handle:
-            progress.inc()
-            line = line.strip()
-            if line == "":
-                continue
-            parts      = line.split("\t")
-            chromosome = parts[13]
-            self.chromosomes.add(chromosome)
-            self.nbMappings += 1
-            self.size += len(parts[0])
-        self.reset()
-        progress.done()
-
-
-    def skipFirstLines(self):
-        while "------" not in self.handle.readline():
-            self.currentLineNb += 1
-            pass
-
-    def _computeStarts(self,seqSize,blockSize,start,targetStrand):
-        if targetStrand == "+":
-            pass
-        else:
-            start = seqSize-blockSize-start
-        return start
-            
-
-
-    def parseLine(self, line):
-        m = re.search(r"^\s*(psl:\s+)?(\d+)\s+(\d+)\s+(\d+)\s+\d+\s+\d+\s+(\d+)\s+\d+\s+(\d+)\s+([+-]{1,2})\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s*$", line)
-        if m == None:
-            raise Exception("\nLine %d '%s' does not have a PSL format" % (self.currentLineNb, line))
-
-        mapping = Mapping()
-
-        queryStrand = m.group(7)[0]
-
-        if len(m.group(7)) == 1:
-            targetStrand = "+"
-        else:
-            targetStrand = m.group(7)[1]
-
-
-        for i in range(0, int(m.group(16))):
-            size        = int(m.group(17).split(",")[i])
-            queryStart  = int(m.group(18).split(",")[i])
-            targetStart = int(m.group(19).split(",")[i])
-            querySize   = int(m.group(9))
-            targetSize  = int(m.group(13))
-            
-            subMapping = SubMapping()
-            subMapping.setSize(size)
-            subMapping.setDirection(m.group(7)[0])
-
-            queryInterval  = Interval()
-            targetInterval = Interval()
-
-            queryInterval.setName(m.group(8))
-            queryStart = self._computeStarts(querySize,size,queryStart,targetStrand)
-            queryInterval.setStart(queryStart + 1)
-            queryInterval.setEnd(queryStart + size)
-            queryInterval.setDirection(queryStrand)
-
-            targetInterval.setChromosome(m.group(12))
-            targetStart = self._computeStarts(targetSize,size,targetStart,targetStrand)
-            targetInterval.setStart(targetStart + 1)
-            targetInterval.setEnd(targetStart + size)
-            targetInterval.setDirection(targetStrand)
-
-            subMapping.setQueryInterval(queryInterval)
-            subMapping.setTargetInterval(targetInterval)
-            mapping.addSubMapping(subMapping)
-
-        mapping.setSize(int(m.group(2)) + int(m.group(3)) + int(m.group(4)))
-        mapping.setNbMismatches(int(m.group(3)) + int(m.group(4)))
-        mapping.setNbGaps(int(m.group(5)))
-        mapping.setDirection(queryStrand)
-
-        queryInterval  = Interval()
-        targetInterval = Interval()
-
-        queryInterval.setName(m.group(8))
-        queryInterval.setStart(min(int(m.group(10)), int(m.group(11))))
-        queryInterval.setEnd(  max(int(m.group(10)), int(m.group(11))))
-        queryInterval.setDirection(queryStrand)
-
-        targetInterval.setChromosome(m.group(12))
-        targetInterval.setStart(min(int(m.group(14))+1, int(m.group(15))))
-        targetInterval.setEnd(  max(int(m.group(14))+1, int(m.group(15))))
-        targetInterval.setDirection(targetStrand)
-
-        mapping.setQueryInterval(queryInterval)
-        mapping.setTargetInterval(targetInterval)
-
-        return mapping
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/PslParser.pyc
b
Binary file commons/core/parsing/PslParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/README_MultiFasta2SNPFile
--- a/commons/core/parsing/README_MultiFasta2SNPFile Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,66 +0,0 @@
-*** DESCRIPTION: ***
-This program takes as input a multifasta file (with sequences already aligned together formated in fasta in the same file), considers the first sequence as the reference sequence, infers polymorphims and generates output files in GnpSNP exchange format.
-
-
-*** INSTALLATION: ***
-Dependancies: 
-- First you need Python installed in your system.
-- Repet libraries are also required.
-
-*** OPTIONS OF THE LAUNCHER: ***
-
-    -h: this help
-
-Mandatory options:    
-         -b: Name of the batch of submitted sequences
-         -g: Name of the gene
-         -t: Scientific name of the taxon concerned
-
-Exclusive options (use either the first or the second)
-         -f: Name of the multifasta input file (for one input file)
-         -d: Name of the directory containing multifasta input file(s) (for several input files)
-
-
-
-*** COMMAND LINE EXAMPLE (for package use): ***
-- First, you need to set up the environment variable PYTHONPATH (lo link with the dependancies).
-
-- Then for one input file (here our example), run:
-
-python multifastaParserLauncher.py -b Batch_test -g GeneX -t "Arabidopsis thaliana" -f Exemple_multifasta_input.fasta
-
-
-- For several input files, create a directory in the root of the uncompressed package and put your input files in it. Then use this type of command line:
-
-python multifastaParserLauncher.py -b Batch_test -g GeneX -t "Arabidopsis thaliana" -d <Name_of_the_directory>
-
-Each one of the input files will generate a directory with his set of output files.
-
-
-*** SIMPLE USE (for package use): ***
-Two executables (one for windows, the other for linux/unix) are in the package.
-They show the command lines to use in order to set up environment variables and then to run the parser on our sample input file (Example_multifasta_input.fasta).
-You can edit the executable and custom the command line to use it with your own input file.
-
-
-*** BACKLOG (next version) ***
-When the launcher is called for several input files (with -d option), the parser should be able to generate only one set of files describing all the batches (one batch per input file).
-So below are listed the tasks of the backlog dedicated to this feature:
-
-- in Multifasta2SNPFile class: 
-  # CONSTRUCTOR: Modify the constructor to add a "several batches" mode called without BatchName and GeneName
-  # RUNNING METHOD: Add the run_several_batches(directory) method that will browse the input files and iterate over them to run each of them successively (see runSeveralInputFile() method of the launcher)
-  => 2 days
-  
-  # BATCH MANAGEMENT: Modify createBatchDict() to create one batch per file in the dictionary and add a class variable to point toward the current batch (ex: self._iCurrentLineNumber)
-  # BATCH-LINE MANAGEMENT: Modify _completeBatchLineListWithCurrentIndividual method to allow several batch and link lines to batches (for the moment hard coded batch no1)
-  # SUBSNP MANAGEMENT: check that all elements (dSUbSNP) added in SubSNP list (lSubSNPFileResults) is linked to the current batch (for the moment hard coded batch no1)
-    Impacted methods: manageSNPs(), createSubSNPFromAMissingPolym(), addMissingAllelesAndSubSNPsForOnePolym(), mergeAllelesAndSubSNPsFromOverlappingIndels()
-  => + 2 days
-  
-- in Multifasta2SNPFileWriter class:
-  # Modify all the method _write<X>File (ex: _writeSubSNPFile) to write in append mode and externalize all open and close file 
-  # Create one method to open all the output files and call it in Multifasta2SNPFile run_several_batches method
-  # Create one method to close all the output files and call it in Multifasta2SNPFile run_several_batches method
-  
-  => + 2 days
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/RmapParser.py
--- a/commons/core/parsing/RmapParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,76 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure.Mapping import Mapping
-
-class RmapParser(MapperParser):
-    """A class that parses the output of Rmap format"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(RmapParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(RmapParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["rmap"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def parseLine(self, line):
-        m = re.search(r"^\s*(\S+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+([+-])\s*$", line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a RMAP format" % (self.currentLineNb, line))
-
-        mapping = Mapping()
-
-        mapping.targetInterval.setChromosome(m.group(1))
-        mapping.targetInterval.setStart(min(int(m.group(2)), int(m.group(3))))
-        mapping.targetInterval.setEnd(max(int(m.group(2)), int(m.group(3))))
-
-        mapping.queryInterval.setName(m.group(4))
-        mapping.queryInterval.setStart(1)
-        mapping.queryInterval.setSize(mapping.targetInterval.getEnd() - mapping.targetInterval.getStart())
-
-        mapping.setSize(mapping.targetInterval.getEnd() - mapping.targetInterval.getStart())
-        mapping.setNbMismatches(int(m.group(5)))
-        mapping.setDirection(m.group(6))
-
-        return mapping
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/RmapParser.pyc
b
Binary file commons/core/parsing/RmapParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/SamParser.py
--- a/commons/core/parsing/SamParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,234 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import re\n-import sys\n-from commons.core.parsing.MapperParser import MapperParser\n-from SMART.Java.Python.structure.Mapping import Mapping\n-from SMART.Java.Python.structure.SubMapping import SubMapping\n-from SMART.Java.Python.structure.Interval import Interval\n-\n-class SamParser(MapperParser):\n-    """A class that parses SAM format (as given by BWA)"""\n-\n-    def __init__(self, fileName, verbosity = 0):\n-        super(SamParser, self).__init__(fileName, verbosity)\n-\n-\n-    def __del__(self):\n-        super(SamParser, self).__del__()\n-\n-\n-    def getFileFormats():\n-        return ["sam"]\n-    getFileFormats = staticmethod(getFileFormats)\n-\n-\n-    def skipFirstLines(self):\n-        pass\n-\n-\n-    def getInfos(self):\n-        self.chromosomes = set()\n-        self.nbMappings  = 0\n-        self.size        = 0\n-        self.reset()\n-        if self.verbosity >= 10:\n-            print "Getting information on SAM file"\n-        self.reset()\n-        for line in self.handle:\n-            line = line.strip()\n-            if line == "" or line[0] == "@":\n-                continue\n-            parts      = line.split("\\t")\n-            chromosome = parts[2]\n-            if chromosome != "*":\n-                self.chromosomes.add(chromosome)\n-            self.nbMappings += 1\n-            self.size += len(parts[8])\n-            if self.verbosity >= 10 and self.nbMappings % 100000 == 0:\n-                sys.stdout.write("    %d mappings read\\r" % (self.nbMappings))\n-                sys.stdout.flush()\n-        self.reset()\n-        if self.verbosity >= 10:\n-            print "    %d mappings read" % (self.nbMappings)\n-            print "Done."\n-\n-\n-    def parseLine(self, line):\n-\n-        line = line.strip()\n-        if line[0] == "@":\n-            return\n-\n-        fields = line.split("\\t")\n-        if len(fields) < 11:\n-            raise Exception("Line %d \'%s\' does not look like a SAM line (number of fields is %d instead of 11)" % (self.currentLineNb, line, len(fields)))\n-\n-        name = fields[0]\n-        flag = int(fields[1])\n-\n-        if (flag & 0x4) == 0x4:\n-            return None\n-\n-        direction       = 1 if (flag & 0x10) == 0x0 else -1\n-        chromosome      = fields[2]\n-        genomeStart     = int(fields[3])\n-        quality         = fields[4]\n-        cigar           = fields[5]\n-        mate            = fields[6]\n-        mateGenomeStart = fields[7]\n-        gapSize         = fields[8]\n-        sequence        = fields[9]'..b'e:\n-                currentNumber = currentNumber * 10 + (ord(char) - ord("0"))\n-                continue\n-            # match\n-            m = re.match(r"[M]", char)\n-            if m != None:\n-                if readStart == None:\n-                    readStart = queryOffset\n-                if subMapping == None:\n-                    subMapping = SubMapping()\n-                    subMapping.setSize(currentNumber)\n-                    subMapping.setDirection(direction)\n-                    subMapping.queryInterval.setName(name)\n-                    subMapping.queryInterval.setStart(queryOffset)\n-                    subMapping.queryInterval.setDirection(direction)\n-                    subMapping.targetInterval.setChromosome(chromosome)\n-                    subMapping.targetInterval.setStart(genomeStart + targetOffset)\n-                    subMapping.targetInterval.setDirection(1)\n-                nbMatches    += currentNumber\n-                targetOffset += currentNumber\n-                queryOffset  += currentNumber\n-                currentNumber = 0\n-                continue\n-            # insertion on the read\n-            m = re.match(r"[I]", char)\n-            if m != None:\n-                nbGaps       += 1\n-                queryOffset  += currentNumber\n-                currentNumber = 0\n-                continue\n-            # insertion on the genome\n-            m = re.match(r"[D]", char)\n-            if m != None:\n-                if subMapping != None:\n-                    subMapping.queryInterval.setEnd(queryOffset - 1)\n-                    subMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n-                    mapping.addSubMapping(subMapping)\n-                subMapping    = None\n-                nbGaps       += 1\n-                targetOffset += currentNumber\n-                currentNumber = 0\n-                continue\n-            # intron\n-            m = re.match(r"[N]", char)\n-            if m != None:\n-                if subMapping != None:\n-                    subMapping.queryInterval.setEnd(queryOffset - 1)\n-                    subMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n-                    mapping.addSubMapping(subMapping)\n-                subMapping    = None\n-                targetOffset += currentNumber\n-                currentNumber = 0\n-                continue\n-            # soft clipping (substitution)\n-            m = re.match(r"[S]", char)\n-            if m != None:\n-                nbMismatches += currentNumber\n-                targetOffset += currentNumber\n-                queryOffset  += currentNumber\n-                currentNumber = 0\n-                continue\n-            # hard clipping\n-            m = re.match(r"[H]", char)\n-            if m != None:\n-                targetOffset += currentNumber\n-                queryOffset  += currentNumber\n-                currentNumber = 0\n-                continue\n-            # padding\n-            m = re.match(r"[P]", char)\n-            if m != None:\n-                continue\n-            raise Exception("Do not understand paramer \'%s\' in line %s" % (char, line))\n-\n-        if subMapping != None:\n-            subMapping.queryInterval.setEnd(queryOffset - 1)\n-            subMapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n-            mapping.addSubMapping(subMapping)\n-\n-        mapping.queryInterval.setStart(readStart)\n-        mapping.queryInterval.setEnd(queryOffset - 1)\n-        mapping.targetInterval.setEnd(genomeStart + targetOffset - 1)\n-        mapping.setNbMismatches(nbMismatches)\n-        mapping.setNbGaps(nbGaps)\n-\n-        mapping.queryInterval.setName(name)\n-        mapping.queryInterval.setDirection(direction)\n-        mapping.targetInterval.setChromosome(chromosome)\n-        mapping.targetInterval.setStart(genomeStart)\n-        mapping.targetInterval.setDirection(direction)\n-        mapping.setSize(len(sequence))\n-        mapping.setDirection(direction)\n-\n-        return mapping\n-\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/SamParser.pyc
b
Binary file commons/core/parsing/SamParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/SeqmapParser.py
--- a/commons/core/parsing/SeqmapParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,81 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from commons.core.parsing.MapperParser import MapperParser
-from SMART.Java.Python.structure.Mapping import Mapping
-
-
-class SeqmapParser(MapperParser):
-    """A class that parses the output of SeqMap"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(SeqmapParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(SeqmapParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["seqmap"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        self.startingPoint = self.handle.tell()
-        self.currentLineNb += 1
-        if "trans_id" not in self.handle.readline():
-            self.currentLineNb -= 1
-            self.handle.seek(self.startingPoint)
-        self.startingPoint = self.handle.tell()
-
-
-    def parseLine(self, line):
-        m = re.search(r"^\s*(\S+)\t+(\d+)\t+(\w+)\t+([^\t]+)\t+(\w+)\t+(\d+)\t+([+-])\s*$", line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a SeqMap format" % (self.currentLineNb, line))
-
-        mapping = Mapping()
-
-        mapping.targetInterval.setChromosome(m.group(1))
-        mapping.targetInterval.setStart(int(m.group(2)))
-        mapping.targetInterval.setSize(len(m.group(3)))
-
-        mapping.queryInterval.setName(m.group(4))
-        mapping.queryInterval.setStart(1)
-        mapping.queryInterval.setSize(len(m.group(3)))
-
-        mapping.setSize(len(m.group(3)))
-        mapping.setNbMismatches(int(m.group(6)))
-        mapping.setDirection(m.group(7))
-
-        return mapping
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/SeqmapParser.pyc
b
Binary file commons/core/parsing/SeqmapParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/SequenceListParser.py
--- a/commons/core/parsing/SequenceListParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,228 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from SMART.Java.Python.structure.SequenceList import SequenceList
-from SMART.Java.Python.misc.Progress import Progress
-
-class SequenceListParser(object):
- """
- A virtual class that reads a list of sequences
- @ivar verbosity:    verbosity
- @type verbosity:    int
- @ivar fileName:    name of the file to parse
- @type fileName:    string
- @ivar handle:    file to parse
- @type handle:    file
- @ivar nbSequences:    number of sequences in the file
- @type nbSequences:    int
- @ivar nbReadSequences: number of sequences read
- @type nbReadSequences: int
- @ivar currentLine:    line currently read
- @type currentLine:    string
- @ivar size:    total number of nucleotides in the sequences
- @type size:    int
- @ivar sizes:    number of nucleotides per sequences
- @type sizes:    dict of string to int
- """
-
- def __init__(self, fileName, verbosity = 0):
- """
- Constructor
- @param verbosity:  verbosity
- @type verbosity: int
- @param fileName:   name of the file to parse
- @type fileName:  string
- """
- self.verbosity = verbosity
- self.fileName = fileName
- self.nbSequences = None
- self.nbReadSequences = 0
- self.currentLine = None
- self.size = None
- self.sizes = None
- try:
- self.handle = open(self.fileName, "rb")
- except IOError:
- raise Exception("Error! Sequence file '%s' does not exist! Exiting..." % (self.fileName))
-
-
- def __del__(self):
- """
- Destructor
- """
- if not self.handle.closed:
- self.handle.close()
-
-
- def close(self):
- """
- Close file handle
- """
- self.handle.close()
-
-
- def reset(self):
- """
- Prepare the file to be read again from start
- """
- self.handle.seek(0)
- self.currentLine = None
- self.nbReadSequences = 0
-
-
- def getFileFormats(self):
- pass
- getFileFormats = staticmethod(getFileFormats)
-
-
- def parse(self):
- """
- Parse the whole file in one shot
- @return: a list of sequence
- """
- sequenceList = SequenceList()
- progress = Progress(self.getNbSequences(), "Reading %s" % (self.fileName), self.verbosity)
- for sequence in self.getIterator():
- sequenceList.addSequence(sequence)
- progress.inc()
- progress.done()
- return sequenceList
-
-
- def getIterator(self):
- """
- Iterate on the file, sequence by sequence
- @return: an iterator to sequences
- """
- self.reset()
- sequence = self.parseOne()
- while sequence != None:
- self.nbReadSequences += 1
- yield sequence
- sequence = self.parseOne()
-
-
- def getInfos(self):
- """
- Get some generic information about the sequences
- """
- self.nbSequences = 0
- self.size = 0
- self.reset()
- if self.verbosity >= 10:
- print "Getting information on %s." % (self.fileName)
- for sequence in self.getIterator():
- self.nbSequences += 1
- self.size += sequence.getSize()
- if self.verbosity >= 10 and self.nbSequences % 100000 == 0:
- sys.stdout.write(" %d sequences read\r" % (self.nbSequences))
- sys.stdout.flush()
- self.reset()
- if self.verbosity >= 10:
- print " %d sequences read" % (self.nbSequences)
- print "Done."
-
-
- def getNbSequences(self):
- """
- Get the number of sequences in the file
- @return: the number of sequences
- """
- if self.nbSequences != None:
- return self.nbSequences
- self.getInfos()
- return self.nbSequences
-
-
- def getNbItems(self):
- """
- Get the number of sequences in the file
- @return: the number of sequences
- """
- return self.getNbSequences()
-
-
- def getSize(self):
- """
- Get the size of all the sequences
- @return: the size
- """
- if self.size != None:
- return self.size
- self.getInfos()
- return self.size
-
-
- def getRegions(self):
- """
- Get the names of the sequences
- @return: the names
- """
- if self.sizes != None:
- return self.sizes.keys()
-
- self.sizes = {}
- self.reset()
- if self.verbosity >= 10:
- print "Getting information on %s." % (self.fileName)
- self.nbSequences = 0
- for sequence in self.getIterator():
- self.sizes[sequence.name] = sequence.getSize()
- self.nbSequences += 1
- if self.verbosity >= 10 and self.nbSequences % 100000 == 0:
- sys.stdout.write(" %d sequences read\r" % (self.nbSequences))
- sys.stdout.flush()
- self.reset()
- if self.verbosity >= 10:
- print " %d sequences read" % (self.nbSequences)
- print "Done."
- return self.sizes.keys()
-
-
- def getSizeOfRegion(self, region):
- """
- Get the size of a sequence
- @param region: the name of the sequence
- @type region: string
- @return: the size of the sequence
- """
- if self.sizes != None:
- if region not in self.sizes:
- raise Exception("Region %s is not found" % region)
- return self.sizes[region]
-
- self.getRegions()
- if region not in self.sizes:
- raise Exception("Region %s is not found" % region)
-
- def __eq__(self, o):
- if o == None:
- return False
- return self.fileName == o.fileName and self.nbSequences == o.nbSequences
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/SequenceListParser.pyc
b
Binary file commons/core/parsing/SequenceListParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/ShrimpParser.py
--- a/commons/core/parsing/ShrimpParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,107 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Mapping import Mapping
-from commons.core.parsing.MapperParser import MapperParser
-
-
-class ShrimpParser(MapperParser):
-    """A class that parses the output of Shrimp"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(ShrimpParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(ShrimpParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["shrimp"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        self.handle.readline()
-        self.currentLineNb += 1
-
-
-    def parseLine(self, line):
-        m = re.search(r"^\s*>([^\t]+)\t+(\S+)\s+([+-])\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s*$", line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a Shrimp format" % (self.currentLineNb, line))
-
-        mapping = Mapping()
-
-        mapping.queryInterval.setName(m.group(1))
-        mapping.queryInterval.setStart(min(int(m.group(6)), int(m.group(7))))
-        mapping.queryInterval.setEnd(max(int(m.group(6)), int(m.group(7))))
-
-        mapping.targetInterval.setChromosome(m.group(2))
-        mapping.targetInterval.setStart(min(int(m.group(4)), int(m.group(5))))
-        mapping.targetInterval.setEnd(max(int(m.group(4)), int(m.group(5))))
-
-        mapping.setSize(int(m.group(8)))
-        mapping.setDirection(m.group(3))
-
-        editString = m.group(10)
-        nbMismatches = 0
-        nbGaps = 0
-        while editString != "":
-            m = re.search(r"^(\d+)(\D.*)$", editString)
-            if m != None:
-                editString = m.group(2)
-            else:
-                m = re.search(r"^(\d+)$", editString)
-                if m != None:
-                    editString = ""
-                else:
-                    m = re.search(r"^([A-Z])(.*)$", editString)
-                    if m != None:
-                        nbMismatches += 1
-                        editString = m.group(2)
-                    else:
-                        m = re.search(r"^\((\w+)\)(.*)$", editString)
-                        if m != None:
-                            nbGaps += len(m.group(1))
-                            editString = m.group(2)
-                        else:
-                            m = re.search(r"^-(.*)$", editString)
-                            if m != None:
-                                nbGaps += 1
-                                editString = m.group(1)
-                            else:
-                                sys.exit("Cannot understand edit string %s from line %s" % (editString, line))
-
-        mapping.setNbMismatches(nbMismatches)
-        mapping.setNbGaps(nbGaps)
-
-        return mapping
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/ShrimpParser.pyc
b
Binary file commons/core/parsing/ShrimpParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/Soap2Parser.py
--- a/commons/core/parsing/Soap2Parser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,148 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Mapping import Mapping
-from SMART.Java.Python.structure.SubMapping import SubMapping
-from commons.core.parsing.MapperParser import MapperParser
-
-
-def mappingToSubMapping(mapping):
-    subMapping = SubMapping()
-    subMapping.targetInterval.copy(mapping.targetInterval)
-    subMapping.queryInterval.copy(mapping.queryInterval)
-    subMapping.setDirection(mapping.getDirection())
-    subMapping.size = mapping.size
-    subMapping.tags = mapping.tags
-    return subMapping
-
-
-
-class Soap2Parser(MapperParser):
-    """A class that parses the output of SOAP2"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(Soap2Parser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(Soap2Parser, self).__del__()
-
-
-    def getFileFormats():
-        return ["soap2"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def getIterator(self):
-        self.reset()
-        currentName = None
-        currentMappings = []
-        for line in self.handle:
-            mapping = self.parseLine(line)
-            name = mapping.queryInterval.name
-            if name == currentName:
-                if mapping.getTagValue("end") == "a":
-                    currentMappings.append(mapping)
-                else:
-                    otherEndMapping = currentMappings.pop(0)
-
-                    newMapping = Mapping()
-                    subMappingA = mappingToSubMapping(otherEndMapping)
-                    subMappingB = mappingToSubMapping(mapping)
-                    subMappingB.queryInterval.setDirection(subMappingA.queryInterval.getDirection())
-
-                    newMapping.addSubMapping(subMappingA)
-                    newMapping.addSubMapping(subMappingB)
-
-                    newMapping.tags = otherEndMapping.tags
-                    newMapping.setSize(otherEndMapping.size + mapping.size)
-                    newMapping.setNbMismatches(otherEndMapping.getTagValue("nbMismatches") + mapping.getTagValue("nbMismatches"))
-                    print otherEndMapping.getTagValue("nbMismatches")
-                    print mapping.getTagValue("nbMismatches")
-                    print newMapping.getTagValue("nbMismatches")
-                    sys.exit()
-                    newMapping.setTagValue("qualityString", otherEndMapping.getTagValue("qualityString") + mapping.getTagValue("qualityString"))
-                    newMapping.setTagValue("occurrence", "%d" % (newMapping.getTagValue("nbOccurrences") - len(currentMappings)))
-                    newMapping.setTagValue("ID", "%s-%s" % (name, newMapping.getTagValue("occurrence")))
-                    del newMapping.tags["end"]
-                    yield newMapping
-            else:
-                currentName = mapping.queryInterval.name
-                for currentMapping in currentMappings:
-                    yield currentMapping
-                currentMappings = [mapping]
-            self.currentLineNb += 1
-                
-                
-    def parseLine(self, line):
-        m = re.search(r"^\s*(\S+)\s+(\w+)\s+(\S+)\s+(\d+)\s+([ab])\s+(\d+)\s+([+-])\s+(\w+)\s+(\d+)\s+(\d+)\s+", line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a SOAP2 format" % (self.currentLineNb, line))
-
-        name          = m.group(1)
-        read          = m.group(2)
-        qualityString = m.group(3)
-        nbOccurrences = int(m.group(4))
-        end           = m.group(5)
-        size          = int(m.group(6))
-        direction     = m.group(7)
-        chromosome    = m.group(8)
-        genomeStart   = int(m.group(9))
-        nbMismatches  = int(m.group(10))
-
-        mapping = Mapping()
-        if name.endswith("/1") or name.endswith("/2"):
-            name = name[:-2]
-
-        mapping.queryInterval.name = name
-        mapping.queryInterval.setDirection(direction)
-        mapping.queryInterval.setStart(1)
-        mapping.queryInterval.setEnd(size)
-
-        mapping.targetInterval.setChromosome(chromosome)
-        mapping.targetInterval.setStart(genomeStart)
-        mapping.targetInterval.setSize(size)
-
-        mapping.setDirection(direction)
-        mapping.setSize(size)
-
-        mapping.setNbMismatches(nbMismatches)
-        mapping.setNbGaps(0)
-        mapping.setTagValue("qualityString", qualityString)
-        mapping.setTagValue("nbOccurrences", nbOccurrences)
-        mapping.setTagValue("end", end)
-
-        return mapping
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/Soap2Parser.pyc
b
Binary file commons/core/parsing/Soap2Parser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/SoapParser.py
--- a/commons/core/parsing/SoapParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,75 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import re
-import sys
-from SMART.Java.Python.structure.Mapping import Mapping
-from commons.core.parsing.MapperParser import MapperParser
-
-
-class SoapParser(MapperParser):
-    """A class that parses the output of SOAP"""
-
-    def __init__(self, fileName, verbosity = 0):
-        super(SoapParser, self).__init__(fileName, verbosity)
-
-
-    def __del__(self):
-        super(SoapParser, self).__del__()
-
-
-    def getFileFormats():
-        return ["soap"]
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def skipFirstLines(self):
-        pass
-
-
-    def parseLine(self, line):
-        m = re.search(r"^\s*(\S+)\s+(\w+)\s+(\w+)\s+(\d+)\s+(a)\s+(\d+)\s+([+-])\s+(\w+)\s+(\d+)\s+(\d+)", line)
-        if m == None:
-            sys.exit("\nLine %d '%s' does not have a SOAP format" % (self.currentLineNb, line))
-
-        mapping = Mapping()
-
-        mapping.queryInterval.setName(m.group(1))
-        mapping.queryInterval.setStart(1)
-        mapping.queryInterval.setSize(len(m.group(2)))
-
-        mapping.targetInterval.setChromosome(m.group(8))
-        mapping.targetInterval.setStart(int(m.group(9)))
-        mapping.targetInterval.setSize(len(m.group(2)))
-
-        mapping.setDirection(m.group(7))
-        mapping.setSize(len(m.group(2)))
-        mapping.setNbMismatches(int(m.group(10)))
-
-        return mapping
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/SoapParser.pyc
b
Binary file commons/core/parsing/SoapParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/SsrParser.py
--- a/commons/core/parsing/SsrParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,170 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-import sys
-
-## this class can parse a Ssr results output file. SSR.pl is developped by S.Cartinhour. (5/2000)
-#
-class SsrParser(object):
-
-
-    def __init__(self, BES_name='', BES_redundancy='', SSR_nbNucleotides='', SSR_Motif='', SSR_Motif_number='', SSR_start='', SSR_end='', BES_size=''):
-        self._BesName = BES_name
-        self._BesRedundancy = BES_redundancy
-        self._SsrNbNucleotides = SSR_nbNucleotides
-        self._SsrMotif = SSR_Motif
-        self._SsrMotifNumber = SSR_Motif_number
-        self._SsrStart = SSR_start
-        self._SsrEnd = SSR_end
-        self._BesSize = BES_size
-        
-    def __eq__(self, o):
-        return self._BesName == o._BesName and self._BesRedundancy == o._BesRedundancy and self._SsrNbNucleotides == o._SsrNbNucleotides and self._SsrMotif == o._SsrMotif and self._SsrMotifNumber == o._SsrMotifNumber and self._SsrStart == o._SsrStart and self._SsrEnd == o._SsrEnd and self._BesSize == o._BesSize
-        
-    def setBesName(self, BES_Name):
-        self._BesName = BES_Name
-        
-    def setBesRedundancy(self, BES_redundancy):
-        self._BesRedundancy = BES_redundancy
-        
-    def setSsrNbNucleotides(self, SSR_nbNucleotides):
-        self._SsrNbNucleotides = SSR_nbNucleotides
-        
-    def setSsrMotif(self, SSR_Motif):
-        self._SsrMotif = SSR_Motif
-        
-    def setSsrMotifNumber(self, SSR_Motif_number):
-        self._SsrMotifNumber = SSR_Motif_number
-        
-    def setSsrStart(self, SSR_start):
-        self._SsrStart = SSR_start
-        
-    def setSsrEnd(self, SSR_end):
-        self._SsrEnd = SSR_end
-        
-    def setBesSize(self, BES_size):
-        self._BesSize = BES_size
-        
-    def getBesName(self):
-        return self._BesName
-        
-    def getBesRedundancy(self):
-        return self._BesRedundancy
-        
-    def getSsrNbNucleotides(self):
-        return self._SsrNbNucleotides
-        
-    def getSsrMotif(self):
-        return self._SsrMotif
-        
-    def getSsrMotifNumber(self):
-        return self._SsrMotifNumber
-        
-    def getSsrStart(self):
-        return self._SsrStart
-        
-    def getSsrEnd(self):
-        return self._SsrEnd
-        
-    def getBesSize(self):
-        return self._BesSize
-    
-    def setAttributes(self, lResults, iCurrentLineNumber):
-        error = False
-        
-        if lResults[0] != '':
-            self.setBesName(lResults[0])
-        else:
-            sys.stderr.write("WARNING: The field BES Name is empty in SSR results file in line %s\n" % iCurrentLineNumber)
-            error = True
-        
-        if lResults[1] != '':
-            self.setBesRedundancy(lResults[1])
-        else:
-            sys.stderr.write("WARNING: The field BES Redundancy is empty in SSR results file in line %s\n" % iCurrentLineNumber)
-            error = True
-        
-        if lResults[2] != '':
-            self.setSsrNbNucleotides(lResults[2])
-        else:
-            sys.stderr.write("WARNING: The field SSR Number Nucleotides is empty in SSR results file in line %s\n" % iCurrentLineNumber)
-            error = True
-    
-        if lResults[3] != '':
-            self.setSsrMotif(lResults[3])
-        else:
-            sys.stderr.write("WARNING: The field SSR Motif is empty in SSR results file in line %s\n" % iCurrentLineNumber)
-            error = True
-    
-        if lResults[4] != '':
-            self.setSsrMotifNumber(lResults[4])
-        else:
-            sys.stderr.write("WARNING: The field SSR Motif Number is empty in SSR results file in line %s\n" % iCurrentLineNumber)
-            error = True
-    
-        if lResults[5] != '':
-            self.setSsrStart(lResults[5])
-        else:
-            sys.stderr.write("WARNING: The field SSR Start is empty in SSR results file in line %s\n" % iCurrentLineNumber)
-            error = True
-    
-        if lResults[6] != '':
-            self.setSsrEnd(lResults[6])
-        else:
-            sys.stderr.write("WARNING: The field SSR End is empty in SSR results file in line %s\n" % iCurrentLineNumber)
-            error = True
-    
-        if lResults[7] != '':
-            self.setBesSize(lResults[7])
-        else:
-            sys.stderr.write("WARNING: The field BES Size is empty in SSR results file in line %s\n" % iCurrentLineNumber)
-            error = True
-            
-        if error == True:
-            self._setAllToNull()
-            
-    def setAttributesFromString(self, ssrLine, iCurrentLineNumber ="", fieldSeparator ="\t"):
-        ssrLine = ssrLine.rstrip()
-        lSsrLineItem = ssrLine.split(fieldSeparator)
-        if len(lSsrLineItem) < 8:
-            sys.stderr.write("WARNING: The line %s is not a valid SSR Result line\n" % iCurrentLineNumber)
-        else:
-            self.setAttributes(lSsrLineItem, iCurrentLineNumber)
-            
-    def _setAllToNull(self):
-        self._BesName = ''
-        self._BesRedundancy = ''
-        self._SsrNbNucleotides = ''
-        self._SsrMotif = ''
-        self._SsrMotifNumber = ''
-        self._SsrStart = ''
-        self._SsrEnd = ''
-        self._BesSize = ''
-    
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/TranscriptListParser.py
--- a/commons/core/parsing/TranscriptListParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,182 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from SMART.Java.Python.structure.TranscriptList import TranscriptList
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-
-class TranscriptListParser(object):
-    """A (quite generic) class that reads a list of transcripts"""
-
-    def __init__(self, fileName, verbosity = 0):
-        self.verbosity         = verbosity
-        self.fileName          = fileName
-        self.nbTranscripts     = None
-        self.size              = None
-        self.chromosomes       = None
-        self.currentTranscript = None
-        self.currentLineNb     = 0
-        self.previousTranscriptAddress = None
-        try:
-            self.handle = open(self.fileName)
-        except IOError:
-            raise Exception("Error! Transcript file '%s' does not exist! Exiting..." % (self.fileName))
-        self.skipFirstLines()
-
-
-    def __del__(self):
-        self.close()
-        
-
-    def getFileFormats():
-        pass
-    getFileFormats = staticmethod(getFileFormats)
-
-
-    def close(self):
-        if self.handle != None and not self.handle.close:
-            self.handle.close()
-        self.handle = None
-
-
-    def reset(self):
-        self.handle.seek(0)
-        self.skipFirstLines()
-        self.currentTranscript = None
-        self.currentLineNb     = 0
-        self.currentTranscriptAddress  = self.handle.tell()
-        self.currentAddress            = self.handle.tell()
-
-
-    def gotoAddress(self, address):
-        self.reset()
-        self.handle.seek(address)
-        self.currentTranscriptAddress = address
-        self.currentAddress           = address
-                
-        
-    def parse(self):
-        transcriptList = TranscriptList()
-        progress = Progress(self.getNbTranscripts(), "Reading %s" % (self.fileName), self.verbosity)
-        for line in self.handle:
-            self.currentLineNb += 1
-            transcript = self.parseLine(line)
-            transcriptList.addTranscript(transcript)
-            progress.inc()
-        progress.done()
-        return transcriptList
-
-
-    def getIterator(self):
-        self.reset()
-        transcript = self.getNextTranscript()
-        while transcript != None:
-            yield transcript
-            transcript = self.getNextTranscript()
-
-
-    def getCurrentAddress(self):
-        return self.currentAddress
-
-
-    def getCurrentTranscriptAddress(self):
-        return self.currentTranscriptAddress
-
-
-    def getNextTranscript(self):
-        self.currentAddress = self.handle.tell()
-        line = self.handle.readline()
-        while line != "":
-            line = line.strip()
-            self.currentLineNb += 1
-            transcript = self.parseLine(line)
-            if transcript != None:
-                return transcript
-            self.currentAddress = self.handle.tell()
-            line = self.handle.readline()
-        transcript = self.currentTranscript
-        self.currentTranscriptAddress = self.previousTranscriptAddress
-        self.currentTranscript = None
-        return transcript
-
-
-    def getInfos(self):
-        self.chromosomes = set()
-        self.nbTranscripts = 0
-        self.size = 0
-        self.reset()
-        progress = UnlimitedProgress(100000, "Getting information on %s." % (self.fileName), self.verbosity-9)
-        transcript = self.getNextTranscript()
-        for transcript in self.getIterator():
-            self.chromosomes.add(transcript.getChromosome())
-            self.nbTranscripts += 1
-            self.size += transcript.getSize()
-            progress.inc()
-        progress.done()
-        self.reset()
-
-    
-    def getNbTranscripts(self):
-        if self.nbTranscripts != None:
-            return self.nbTranscripts
-        self.getInfos()
-        return self.nbTranscripts
-
-
-    def getNbItems(self):
-        return self.getNbTranscripts()
-
-
-    def getChromosomes(self):
-        if self.chromosomes != None:
-            return self.chromosomes
-        self.getInfos()
-        return self.chromosomes
-    
-    
-    def getSize(self):
-        if self.size != None:
-            return self.size
-        self.getInfos()
-        return self.size
-    
-    
-    def getNbNucleotides(self):
-        return self.getSize()
-
-
-    def setDefaultTagValue(self, name, value):
-        for transcript in self.getIterator():
-            transcript.setTag(name, value)
-            
-    def __eq__(self, o):
-        if o == None:
-            return False
-        return self.fileName == o.fileName and self.nbTranscripts == o.nbTranscripts and self.size == o.size and self.chromosomes == o.chromosomes
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/TranscriptListParser.pyc
b
Binary file commons/core/parsing/TranscriptListParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/VarscanFile.py
--- a/commons/core/parsing/VarscanFile.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,145 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from commons.core.parsing.VarscanHit import VarscanHit
-from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag
-from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8
-from commons.core.checker.CheckerException import CheckerException
-from commons.core.parsing.VarscanHit_v2_2_8_WithTag import VarscanHit_v2_2_8_WithTag
-
-class VarscanFile(object):
-
-    def __init__(self, varscanFileName = ""):
-        self._varscanFileName = varscanFileName
-        self._varscanFieldSeparator = "\t"
-        self._lVarscanHits = []
-        self._typeOfVarscanFile = ""
-        
-    def __eq__(self, o):
-        return self._varscanFieldSeparator == o._varscanFieldSeparator and self._lVarscanHits == o._lVarscanHits and self._varscanFileName == o._varscanFileName
-             
-    def setVarscanHitsList(self, lVarscanHits):
-        self._lVarscanHits = lVarscanHits
-    
-    def setHeaderVarcanFile(self, headerVarcanFile):
-        self._headerVarcanFile = headerVarcanFile
-        
-    def setTypeOfVarscanFile(self, type):
-        if type == "Varscan_2_2" or type == "Varscan_2_2_WithTag" or type == "Varscan_2_2_8" or type == "Varscan_2_2_8_WithTag":
-            self._typeOfVarscanFile = type
-        else:
-            self._typeOfVarscanFile = ""
-        
-    def getVarscanHitsList(self):
-        return self._lVarscanHits
-    
-    def getHeaderVarcanFile(self):
-        return self._headerVarcanFile
-    
-    def getListOfVarscanHits(self):
-        return self._lVarscanHits
-    
-    def getTypeOfVarscanFile(self):
-        return self._typeOfVarscanFile
-        
-    def parse(self):
-        varscanFile = open(self._varscanFileName, "r")
-        currentLineNumber = 0
-        line = varscanFile.readline()
-        if "Chrom\tPosition" in line:
-            self.setHeaderVarcanFile(line)
-            line = varscanFile.readline()
-        while line != "":
-            if not "Chrom\tPosition" in line:
-                currentLineNumber += 1
-                line = line.strip()
-                lResults = line.split(self._varscanFieldSeparator)
-                if len(lResults) == 12:
-                    currentVarscanLine = self.createVarscanHit(line, currentLineNumber)
-                    self._typeOfVarscanFile = "Varscan_2_2"
-                elif len(lResults) == 13:
-                    currentVarscanLine = self.createVarscanHitWithTag(line, currentLineNumber)
-                    self._typeOfVarscanFile = "Varscan_2_2_WithTag"
-                elif len(lResults) == 19:
-                    currentVarscanLine = self.createVarscanHit_v2_2_8(line, currentLineNumber)
-                    self._typeOfVarscanFile = "Varscan_2_2_8"
-                elif len(lResults) == 20:
-                    currentVarscanLine = self.createVarscanHit_v2_2_8_WithTag(line, currentLineNumber)
-                    self._typeOfVarscanFile = "Varscan_2_2_8_WithTag"
-                else:
-                    raise CheckerException ("Warning: this line (l.%s) is not a valid varscan line !" % currentLineNumber)
-                self._lVarscanHits.append(currentVarscanLine)
-                line = varscanFile.readline()
-        varscanFile.close()
-        
-    def createVarscanObjectFromLine(self, line, currentLineNumber):
-        if self._typeOfVarscanFile == "Varscan_2_2":
-            VarscanHit =  self.createVarscanHit(line, currentLineNumber)
-            return VarscanHit
-        elif self._typeOfVarscanFile == "Varscan_2_2_WithTag":
-            return self.createVarscanHitWithTag(line, currentLineNumber)
-        elif self._typeOfVarscanFile == "Varscan_2_2_8":
-            return self.createVarscanHit_v2_2_8(line, currentLineNumber)
-        elif self._typeOfVarscanFile == "Varscan_2_2_8_WithTag":
-            return self.createVarscanHit_v2_2_8_WithTag(line, currentLineNumber)
-            
-    def createVarscanHit(self, line, currentLineNumber):
-        iVarscanHit =  VarscanHit()
-        iVarscanHit.setAttributesFromString(line, currentLineNumber)
-        return iVarscanHit
-        
-    def createVarscanHitWithTag(self, line, currentLineNumber):
-        iVarscanHitWithTag =  VarscanHit_WithTag()
-        iVarscanHitWithTag.setAttributesFromString(line, currentLineNumber)
-        return iVarscanHitWithTag
-    
-    def createVarscanHit_v2_2_8(self, line, currentLineNumber):
-        iVarscanHit =  VarscanHit_v2_2_8()
-        iVarscanHit.setAttributesFromString(line, currentLineNumber)
-        return iVarscanHit
-    
-    def createVarscanHit_v2_2_8_WithTag(self, line, currentLineNumber):
-        iVarscanHitWithTag =  VarscanHit_v2_2_8_WithTag()
-        iVarscanHitWithTag.setAttributesFromString(line, currentLineNumber)
-        return iVarscanHitWithTag
-    
-    def selectTypeOfVarscanHitObject(self):
-        if self._typeOfVarscanFile == "":
-            raise CheckerException ("Error: no varscan object found !")
-        elif self._typeOfVarscanFile == "Varscan_2_2":
-            return VarscanHit()
-        elif self._typeOfVarscanFile == "Varscan_2_2_WithTag":
-            return VarscanHit_WithTag()
-        elif self._typeOfVarscanFile == "Varscan_2_2_8":
-            return VarscanHit_v2_2_8()
-        elif self._typeOfVarscanFile == "Varscan_2_2_8_WithTag":
-            return VarscanHit_v2_2_8_WithTag()        
-        
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/VarscanFileForGnpSNP.py
--- a/commons/core/parsing/VarscanFileForGnpSNP.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,72 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP
-from commons.core.parsing.VarscanFile import VarscanFile
-
-class VarscanFileForGnpSNP(VarscanFile):
-    
-    def __init__(self, varscanFileName, fastqFileName="", refFastaFileName="", taxonName=""):
-        VarscanFile.__init__(self, varscanFileName)
-        self._fastqFileName = fastqFileName
-        self._refFastaFileName = refFastaFileName
-        self._taxonName = taxonName
-        self._previousVarscanHit = None
-        
-    ## Equal operator
-    #
-    # @param o a VarscanFileAnalysis instance
-    #    
-    def __eq__(self, o):
-        return VarscanFile.__eq__(self, o) and self._fastqFileName == o._fastqFileName \
-            and self._refFastaFileName == o._refFastaFileName and self._taxonName == o._taxonName
-             
-    def getVarscanFieldSeparator(self):
-        return self._varscanFieldSeparator
-        
-    def getFastqFileName(self):
-        return self._fastqFileName
-    
-    def getRefFastaFileName(self):
-        return self._refFastaFileName
-    
-    def getTaxonName(self):
-        return self._taxonName
-        
-    def createVarscanHit(self, line, currentLineNumber):
-        line = line.strip()
-        lResults = line.split(self._varscanFieldSeparator)
-        iVarscanHit = VarscanHitForGnpSNP()
-        iVarscanHit.setAttributes(lResults, currentLineNumber)
-        iVarscanHit.formatAlleles2GnpSnp()
-        iVarscanHit.manageOccurrence(self._previousVarscanHit)
-        self._previousVarscanHit = iVarscanHit
-        return iVarscanHit
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/VarscanHit.py
--- a/commons/core/parsing/VarscanHit.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,175 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from commons.core.checker.CheckerException import CheckerException
-
-class VarscanHit(object):
-    
-    def __init__(self, chrom = "", position = "", ref = "", var = "", readsRef = "", readsVar = "", varFreq = "", strandsRef = "", strandsVar = "", qualRef = "", qualVar = "", pValue = ""):
-        self._chrom = chrom
-        self._position = position
-        self._ref = ref
-        self._var = var
-        self._readsRef = readsRef
-        self._readsVar = readsVar
-        self._varFreq = varFreq
-        self._strandsRef = strandsRef
-        self._strandsVar = strandsVar
-        self._qualRef = qualRef
-        self._qualVar = qualVar
-        self._pValue = pValue
-        
-    ## Equal operator
-    #
-    # @param o a VarscanFileAnalysis instance
-    #    
-    def __eq__(self, o):
-        return self._chrom == o._chrom and self._position == o._position and self._ref == o._ref and self._var == o._var
-
-    def setChrom(self, chromosome):
-        self._chrom = chromosome
-    
-    def setPosition(self, position):
-        self._position = position
-    
-    def setRef(self, referenceAllele):
-        self._ref = referenceAllele
-    
-    def setVar(self, variantAllele):
-        self._var = variantAllele
-    
-    def setReadsRef(self, readsRef):
-        self._readsRef = readsRef
-    
-    def setReadsVar(self, readsVar):
-        self._readsVar = readsVar
-        
-    def setVarFreq(self, varFreq):
-        self._varFreq = varFreq
-        
-    def setStrandsRef(self, strandsRef):
-        self._strandsRef = strandsRef
-        
-    def setStrandsVar(self, strandsVar):
-        self._strandsVar = strandsVar
-        
-    def setQualRef(self, qualRef):
-        self._qualRef = qualRef
-        
-    def setQualVar(self, qualVar):
-        self._qualVar = qualVar
-        
-    def setPValue(self, pValue):
-        self._pValue = pValue
-    
-    def getChrom(self):
-        return self._chrom
-    
-    def getPosition(self):
-        return self._position
-    
-    def getRef(self):
-        return self._ref
-    
-    def getVar(self):
-        return self._var
-    
-    def getReadsRef(self):
-        return self._readsRef
-    
-    def getReadsVar(self):
-        return self._readsVar
-    
-    def getVarFreq(self):
-        return self._varFreq
-    
-    def getStrandsRef(self):
-        return self._strandsRef
-    
-    def getStrandsVar(self):
-        return self._strandsVar
-    
-    def getQualRef(self):
-        return self._qualRef
-    
-    def getQualVar(self):
-        return self._qualVar
-    
-    def getPValue(self):
-        return self._pValue
-    
-    def getHeader(self):
-        return "Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n"
-    
-    def getVarscanLine(self):
-        return "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (self.getChrom(), self.getPosition(), self.getRef(), self.getVar(), self.getReadsRef(), self.getReadsVar(), self.getVarFreq(), self.getStrandsRef(), self.getStrandsVar(),  self.getQualRef(), self.getQualVar(), self.getPValue())
-    
-    def setAttributes(self, lResults, iCurrentLineNumber):
-        if lResults[0] != '':
-            self.setChrom(lResults[0])
-        else:
-            raise CheckerException ("The field Chrom is empty in varscan file in line %s" % iCurrentLineNumber)
-        if lResults[1] != '':
-            self.setPosition(lResults[1])
-        else:
-            raise CheckerException ("The field Position is empty in varscan file in line %s" % iCurrentLineNumber)
-        if lResults[2] != '':
-            self.setRef(lResults[2])
-        else:
-            raise CheckerException ("The field Ref is empty in varscan file in line %s" % iCurrentLineNumber)
-        if lResults[3] != '':
-            self.setVar(lResults[3])
-        else:
-            raise CheckerException ("The field Var is empty in varscan file in line %s" % iCurrentLineNumber)
-        if lResults[4] != '':
-            self.setReadsRef(lResults[4])
-        if lResults[5] != '':
-            self.setReadsVar(lResults[5])
-        if lResults[6] != '':
-            self.setVarFreq(lResults[6])
-        if lResults[7] != '':
-            self.setStrandsRef(lResults[7])
-        if lResults[8] != '':
-            self.setStrandsVar(lResults[8])
-        if lResults[9] != '':
-            self.setQualRef(lResults[9])
-        if lResults[10] != '':
-            self.setQualVar(lResults[10])
-        if lResults[11] != '':
-            self.setPValue(lResults[11])
-            
-    def setAttributesFromString(self, varscanString, iCurrentLineNumber ="", fieldSeparator ="\t"):
-        varscanString = varscanString.rstrip()
-        lvarscanStringItem = varscanString.split(fieldSeparator)
-        if len(lvarscanStringItem)<12:
-            for i in range(len(lvarscanStringItem), 12):
-                lvarscanStringItem.append ("")
-        self.setAttributes(lvarscanStringItem, iCurrentLineNumber)
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/VarscanHitForGnpSNP.py
--- a/commons/core/parsing/VarscanHitForGnpSNP.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,232 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-from commons.core.checker.CheckerException import CheckerException\n-from commons.core.parsing.VarscanHit import VarscanHit\n-import re\n-\n-class VarscanHitForGnpSNP(VarscanHit):\n-    \n-    def __init__(self):\n-        VarscanHit.__init__(self)\n-        self._reads1 = \'\'\n-        self._reads2 = \'\'\n-        self._varFreq = \'\'\n-        self._strands1 = \'\'\n-        self._strands2 = \'\'\n-        self._qual1 = \'\'\n-        self._qual2 = \'\'\n-        self._pvalue = \'\'\n-        self._5flank = \'\'\n-        self._3flank = \'\'\n-        self._gnpSnp_ref = \'\'\n-        self._gnpSnp_var = \'\'\n-        self._gnpSnp_position = 0\n-        self._polymType = \'\'\n-        self._polymLength = 0\n-        self._occurrence = 1\n-        \n-    ## Equal operator\n-    #\n-    # @param o a VarscanFileAnalysis instance\n-    #    \n-    def __eq__(self, o):\n-        return VarscanHit.__eq__(self, o) \\\n-             and self._reads1 == o._reads1 and self._reads2 == o._reads2 \\\n-             and self._varFreq == o._varFreq and self._strands1 == o._strands1 \\\n-             and self._strands2 == o._strands2 and self._qual1 == o._qual1 \\\n-             and self._qual2 == o._qual2 and self._pvalue == o._pvalue \\\n-             and self._3flank == o._3flank and self._5flank == o._5flank \\\n-             and self._gnpSnp_position == o._gnpSnp_position and self._gnpSnp_ref == o._gnpSnp_ref \\\n-             and self._gnpSnp_var == o._gnpSnp_var and self._polymLength == o._polymLength \\\n-             and self._polymType == o._polymType and self._occurrence == o._occurrence\n-    \n-    def isPolymTypeAlreadyFoundAtThisChromAndThisPosition(self, iVarscanHitForGnpSNP):\n-        return self._chrom == iVarscanHitForGnpSNP.getChrom() \\\n-            and self._position == iVarscanHitForGnpSNP.getPosition() \\\n-            and self._polymType == iVarscanHitForGnpSNP.getPolymType()\n-            \n-    def manageOccurrence(self, iVarscanHitForGnpSNP=None):\n-        if iVarscanHitForGnpSNP != None and self.isPolymTypeAlreadyFoundAtThisChromAndThisPosition(iVarscanHitForGnpSNP):\n-            self._occurrence = iVarscanHitForGnpSNP.getOccurrence() + 1\n-    \n-    def formatAlleles2GnpSnp(self):\n-        if self.getVar().find("-") != -1:\n-            self._polymType = "DELETION"\n-            self._gnpSnp_position = int(self._position) + 1\n-            self._gnpSnp_ref = self._var[1:]\n-            self._g'..b'randsOfReferenceAllele):\n-        self._strands1 = strandsOfReferenceAllele\n-    \n-    def setStrands2(self, strandsOfVariantAllele):\n-        self._strands2 = strandsOfVariantAllele\n-    \n-    def setQual1(self, averageQualityOfRef):\n-        self._qual1 = averageQualityOfRef\n-    \n-    def setQual2(self, averageQualityOfVar):\n-        self._qual2 = averageQualityOfVar\n-    \n-    def setPvalue(self, pvalue):\n-        self._pvalue = pvalue\n-    \n-    def set5flank(self, s5flank):\n-        self._5flank = s5flank\n-    \n-    def set3flank(self, s3flank):\n-        self._3flank = s3flank\n-        \n-    def setGnpSNPRef(self, ref):\n-        self._gnpSnp_ref = ref\n-        \n-    def setGnpSNPVar(self, var):\n-        self._gnpSnp_var = var\n-        \n-    def setGnpSNPPosition(self, position):\n-        self._gnpSnp_position = position\n-    \n-    def setOccurrence(self, occurrence):\n-        self._occurrence = occurrence\n-        \n-    def setPolymType(self, polymType):\n-        self._polymType = polymType\n-        \n-    def setPolymLength(self, polymLength):\n-        self._polymLength = polymLength\n-    \n-    def getReads1(self):\n-        return self._reads1\n-    \n-    def getReads2(self):\n-        return self._reads2\n-    \n-    def getVarFreq(self):\n-        return self._varFreq\n-    \n-    def getStrands1(self):\n-        return self._strands1\n-    \n-    def getStrands2(self):\n-        return self._strands2\n-    \n-    def getQual1(self):\n-        return self._qual1\n-    \n-    def getQual2(self):\n-        return self._qual2\n-    \n-    def getPvalue(self):\n-        return self._pvalue\n-    \n-    def get5flank(self):\n-        return self._5flank\n-    \n-    def get3flank(self):\n-        return self._3flank\n-    \n-    def getPolymType(self):\n-        return self._polymType\n-    \n-    def getGnpSnpVar(self):\n-        return self._gnpSnp_var\n-    \n-    def getGnpSnpRef(self):\n-        return self._gnpSnp_ref\n-    \n-    def getGnpSnpPosition(self):\n-        return self._gnpSnp_position\n-    \n-    def getPolymLength(self):\n-        return self._polymLength\n-    \n-    def getOccurrence(self):\n-        return self._occurrence\n-    \n-    def setAttributes(self, lResults, iCurrentLineNumber):\n-        VarscanHit.setAttributes(self, lResults, iCurrentLineNumber)\n-        if lResults[4] != \'\':\n-            self.setReads1(lResults[4])\n-        else:\n-            raise CheckerException ("The field Reads1 is empty in varscan file in line %s" % (iCurrentLineNumber))\n-        if lResults[5] != \'\':\n-            self.setReads2(lResults[5])\n-        else:\n-            raise CheckerException ("The field Reads2 is empty in varscan file in line %s" % (iCurrentLineNumber))\n-        if lResults[6] != \'\' and re.match("[0-9\\,\\%]+", lResults[6]):\n-            self.setVarFreq(lResults[6])\n-        else:\n-            raise CheckerException ("The field VarFreq is empty or in bad format in varscan file in line %s" % (iCurrentLineNumber))\n-        if lResults[7] != \'\':\n-            self.setStrands1(lResults[7])\n-        else:\n-            raise CheckerException ("The field Strands1 is empty in varscan file in line %s" % (iCurrentLineNumber))\n-        if lResults[8] != \'\':\n-            self.setStrands2(lResults[8])\n-        else:\n-            raise CheckerException ("The field Strands2 is empty in varscan file in line %s" % (iCurrentLineNumber))\n-        if lResults[9] != \'\':\n-            self.setQual1(lResults[9])\n-        else:\n-            raise CheckerException ("The field Qual1 is empty in varscan file in line %s" % (iCurrentLineNumber))\n-        if lResults[10] != \'\':\n-            self.setQual2(lResults[10])\n-        else:\n-            raise CheckerException ("The field Qual2 is empty in varscan file in line %s" % (iCurrentLineNumber))\n-        if lResults[11] != \'\':\n-            self.setPvalue(lResults[11])\n-        else:\n-            raise CheckerException ("The field Pvalue is empty in varscan file in line %s" % (iCurrentLineNumber))\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/VarscanHit_WithTag.py
--- a/commons/core/parsing/VarscanHit_WithTag.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,70 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-
-from commons.core.parsing.VarscanHit import VarscanHit
-
-class VarscanHit_WithTag(VarscanHit):
-    
-    def __init__(self, tag = "", chrom = "", position = "", ref = "", var = "", readsRef = "", readsVar = "", varFreq = "", strandsRef = "", strandsVar = "", qualRef = "", qualVar = "", pValue = ""):
-        self._tag = tag
-        VarscanHit.__init__(self, chrom, position, ref, var, readsRef, readsVar, varFreq, strandsRef, strandsVar, qualRef, qualVar, pValue)
-        
-    def __eq__(self, o):
-        if self._tag == o._tag: 
-            return VarscanHit.__eq__(self, o)
-        return False
-    
-    def setTag(self, tag):
-        self._tag = tag
-        
-    def getTag(self):
-        return self._tag
-    
-    def getHeader(self):
-        return "Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\tTag\n"
-    
-    def getVarscanLine(self):
-        return "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (self.getChrom(), self.getPosition(), self.getRef(), self.getVar(), self.getReadsRef(), self.getReadsVar(), self.getVarFreq(), self.getStrandsRef(), self.getStrandsVar(),  self.getQualRef(), self.getQualVar(), self.getPValue(), self.getTag())
-    
-    def setAttributes(self, lResults, iCurrentLineNumber):
-        VarscanHit.setAttributes(self, lResults, iCurrentLineNumber)
-        if lResults[12] != '':
-            self.setTag(lResults[12])
-            
-    def setAttributesFromString(self, varscanString, iCurrentLineNumber ="", fieldSeparator ="\t"):
-        varscanString = varscanString.rstrip()
-        lvarscanStringItem = varscanString.split(fieldSeparator)
-        if len(lvarscanStringItem)<13:
-            for i in range(len(lvarscanStringItem), 13):
-                lvarscanStringItem.append ("")
-        self.setAttributes(lvarscanStringItem, iCurrentLineNumber)
-    
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/VarscanHit_v2_2_8.py
--- a/commons/core/parsing/VarscanHit_v2_2_8.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,176 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-from commons.core.checker.CheckerException import CheckerException\n-from commons.core.parsing.VarscanHit import VarscanHit\n-\n-class VarscanHit_v2_2_8(VarscanHit):\n-    \n-    def __init__(self, chrom = "", position = "", ref = "", cns = "", readsRef = "", readsVar = "", varFreq = "", strandsRef = "", strandsVar = "", qualRef = "", qualVar = "", pValue = "", mapQualRef = "", mapQualVar = "", readsRefPlus = "", readsRefMinus = "", readsVarPlus = "", readsVarMinus = "", var = ""):\n-        self._cns = cns\n-        self._mapQualRef = mapQualRef\n-        self._mapQualVar = mapQualVar\n-        self._readsRefPlus = readsRefPlus\n-        self._readsRefMinus = readsRefMinus\n-        self._readsVarPlus = readsVarPlus\n-        self._readsVarMinus = readsVarMinus\n-        VarscanHit.__init__(self, chrom, position, ref, var, readsRef, readsVar, varFreq, strandsRef, strandsVar, qualRef, qualVar, pValue)\n-        \n-    ## Equal operator\n-    #\n-    # @param o a VarscanFileAnalysis instance\n-    #    \n-    def __eq__(self, o):\n-        if self._cns == o._cns:\n-            return VarscanHit.__eq__(self, o)\n-        return False\n-    \n-    def setCns(self, consensus):\n-        self._cns = consensus\n-        \n-    def setMapQualRef(self, mapQualRef):\n-        self._mapQualRef = mapQualRef\n-        \n-    def setMapQualVar(self, mapQualVar):\n-        self._mapQualVar = mapQualVar\n-        \n-    def setReadsRefPlus(self, readsRefPlus):\n-        self._readsRefPlus = readsRefPlus\n-        \n-    def setReadsRefMinus(self, readsRefMinus):\n-        self._readsRefMinus = readsRefMinus\n-        \n-    def setReadsVarPlus(self, readsVarPlus):\n-        self._readsVarPlus = readsVarPlus\n-        \n-    def setReadsVarMinus(self, readsVarMinus):\n-        self._readsVarMinus = readsVarMinus\n-    \n-    def getCns(self):\n-        return self._cns\n-        \n-    def getMapQualRef(self):\n-        return self._mapQualRef\n-        \n-    def getMapQualVar(self):\n-        return self._mapQualVar\n-        \n-    def getReadsRefPlus(self):\n-        return self._readsRefPlus\n-        \n-    def getReadsRefMinus(self):\n-        return self._readsRefMinus\n-        \n-    def getReadsVarPlus(self):\n-        return self._readsVarPlus\n-        \n-    def getReadsVarMinus(self):\n-        return self._readsVarMinus\n-    \n-    def getHeader(self):\n-        return "Chrom\\tPosition\\tRef\\tC'..b'\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\tMapQual1\\tMapQual2\\tReads1Plus\\tReads1Minus\\tReads2Plus\\tReads2Minus\\tVarAllele\\n"\n-    \n-    def getVarscanLine(self):\n-        return "%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\n" % (self.getChrom(), self.getPosition(), self.getRef(), self.getCns(), self.getReadsRef(), self.getReadsVar(), self.getVarFreq(), self.getStrandsRef(), self.getStrandsVar(),  self.getQualRef(), self.getQualVar(), self.getPValue(), self.getMapQualRef(), self.getMapQualVar(), self.getReadsRefPlus(), self.getReadsRefMinus(), self.getReadsVarPlus(), self.getReadsVarMinus(), self.getVar())\n-    \n-    def setAttributes(self, lResults, iCurrentLineNumber):\n-        if lResults[0] != \'\':\n-            self.setChrom(lResults[0])\n-        else:\n-            raise CheckerException ("The field Chrom is empty in varscan file in line %s" % iCurrentLineNumber)\n-        if lResults[1] != \'\':\n-            self.setPosition(lResults[1])\n-        else:\n-            raise CheckerException ("The field Position is empty in varscan file in line %s" % iCurrentLineNumber)\n-        if lResults[2] != \'\':\n-            self.setRef(lResults[2])\n-        else:\n-            raise CheckerException ("The field Ref is empty in varscan file in line %s" % iCurrentLineNumber)\n-        if lResults[3] != \'\':\n-            self.setCns(lResults[3])\n-        else:\n-            raise CheckerException ("The field Cons is empty in varscan file in line %s" % iCurrentLineNumber)\n-        if lResults[4] != \'\':\n-            self.setReadsRef(lResults[4])\n-        if lResults[5] != \'\':\n-            self.setReadsVar(lResults[5])\n-        if lResults[6] != \'\':\n-            self.setVarFreq(lResults[6])\n-        if lResults[7] != \'\':\n-            self.setStrandsRef(lResults[7])\n-        if lResults[8] != \'\':\n-            self.setStrandsVar(lResults[8])\n-        if lResults[9] != \'\':\n-            self.setQualRef(lResults[9])\n-        if lResults[10] != \'\':\n-            self.setQualVar(lResults[10])\n-        if lResults[11] != \'\':\n-            self.setPValue(lResults[11])\n-        if lResults[12] != \'\':\n-            self.setMapQualRef(lResults[12])\n-        if lResults[13] != \'\':\n-            self.setMapQualVar(lResults[13])\n-        if lResults[14] != \'\':\n-            self.setReadsRefPlus(lResults[14])\n-        if lResults[15] != \'\':\n-            self.setReadsRefMinus(lResults[15])\n-        if lResults[16] != \'\':\n-            self.setReadsVarPlus(lResults[16])\n-        if lResults[17] != \'\':\n-            self.setReadsVarMinus(lResults[17])\n-        if lResults[18] != \'\':\n-            self.setVar(lResults[18])\n-        else:\n-            raise CheckerException ("The field varAllele is empty in varscan file in line %s" % iCurrentLineNumber)\n-            \n-    def setAttributesFromString(self, varscanString, iCurrentLineNumber ="", fieldSeparator ="\\t"):\n-        varscanString = varscanString.rstrip()\n-        lvarscanStringItem = varscanString.split(fieldSeparator)\n-        if len(lvarscanStringItem) < 19:\n-            raise CheckerException ("This varscan line (l.%s) is not complete" % iCurrentLineNumber)\n-        self.setAttributes(lvarscanStringItem, iCurrentLineNumber)\n-        \n-    def convertVarscanHit_v2_2_8_To_VarscanHit(self):\n-        iVarscanHit = VarscanHit()\n-        iVarscanHit.setChrom(self.getChrom())\n-        iVarscanHit.setPosition(self.getPosition())\n-        iVarscanHit.setRef(self.getRef())\n-        iVarscanHit.setVar(self.getVar())\n-        iVarscanHit.setReadsRef(self.getReadsRef())\n-        iVarscanHit.setReadsVar(self.getReadsVar())\n-        iVarscanHit.setVarFreq(self.getVarFreq())\n-        iVarscanHit.setStrandsRef(self.getStrandsRef())\n-        iVarscanHit.setStrandsVar(self.getStrandsVar())\n-        iVarscanHit.setQualRef(self.getQualRef())\n-        iVarscanHit.setQualVar(self.getQualVar())\n-        iVarscanHit.setPValue(self.getPValue())\n-        return iVarscanHit\n-        \n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/VarscanHit_v2_2_8_WithTag.py
--- a/commons/core/parsing/VarscanHit_v2_2_8_WithTag.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,88 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from commons.core.checker.CheckerException import CheckerException
-from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8
-from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag
-
-class VarscanHit_v2_2_8_WithTag(VarscanHit_v2_2_8):
-    
-    def __init__(self, chrom = "", position = "", ref = "", cns = "", readsRef = "", readsVar = "", varFreq = "", strandsRef = "", strandsVar = "", qualRef = "", qualVar = "", pValue = "", mapQualRef = "", mapQualVar = "", readsRefPlus = "", readsRefMinus = "", readsVarPlus = "", readsVarMinus = "", var = "", tag = ""):
-        self._tag = tag
-        VarscanHit_v2_2_8.__init__(self, chrom, position, ref, var, readsRef, readsVar, varFreq, strandsRef, strandsVar, qualRef, qualVar, pValue, mapQualRef, mapQualVar, readsRefPlus, readsRefMinus, readsVarPlus, readsVarMinus, var)
-        
-    def __eq__(self, o):
-        if self._tag == o._tag:
-            return VarscanHit_v2_2_8.__eq__(self, o)
-        return False
-    
-    def setTag(self, tag):
-        self._tag = tag
-        
-    def getTag(self):
-        return self._tag
-    
-    def getHeader(self):
-        return "Chrom\tPosition\tRef\tCons\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\tMapQual1\tMapQual2\tReads1Plus\tReads1Minus\tReads2Plus\tReads2Minus\tVarAllele\tTag\n"
-    
-    def getVarscanLine(self):
-        return "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (self.getChrom(), self.getPosition(), self.getRef(), self.getCns(), self.getReadsRef(), self.getReadsVar(), self.getVarFreq(), self.getStrandsRef(), self.getStrandsVar(),  self.getQualRef(), self.getQualVar(), self.getPValue(), self.getMapQualRef(), self.getMapQualVar(), self.getReadsRefPlus(), self.getReadsRefMinus(), self.getReadsVarPlus(), self.getReadsVarMinus(), self.getVar(), self.getTag())
-    
-    def setAttributes(self, lResults, iCurrentLineNumber):
-        VarscanHit_v2_2_8.setAttributes(self, lResults, iCurrentLineNumber)
-        if lResults[19] != '':
-            self.setTag(lResults[19])
-        else:
-            raise CheckerException ("The field tag is empty in varscan file in line %s" % iCurrentLineNumber)
-            
-    def setAttributesFromString(self, varscanString, iCurrentLineNumber ="", fieldSeparator ="\t"):
-        varscanString = varscanString.rstrip()
-        lvarscanStringItem = varscanString.split(fieldSeparator)
-        if len(lvarscanStringItem) < 20:
-            raise CheckerException ("This varscan line (l.%s) is not complete" % iCurrentLineNumber)
-        self.setAttributes(lvarscanStringItem, iCurrentLineNumber)
-        
-    def convertVarscanHit_v2_2_8_WithTag_To_VarscanHit_WithTag(self):
-        iVarscanHit = VarscanHit_WithTag()
-        iVarscanHit.setChrom(self.getChrom())
-        iVarscanHit.setPosition(self.getPosition())
-        iVarscanHit.setRef(self.getRef())
-        iVarscanHit.setVar(self.getVar())
-        iVarscanHit.setReadsRef(self.getReadsRef())
-        iVarscanHit.setReadsVar(self.getReadsVar())
-        iVarscanHit.setVarFreq(self.getVarFreq())
-        iVarscanHit.setStrandsRef(self.getStrandsRef())
-        iVarscanHit.setStrandsVar(self.getStrandsVar())
-        iVarscanHit.setQualRef(self.getQualRef())
-        iVarscanHit.setQualVar(self.getQualVar())
-        iVarscanHit.setPValue(self.getPValue())
-        iVarscanHit.setTag(self.getTag())
-        return iVarscanHit
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/VarscanToVCF.py
--- a/commons/core/parsing/VarscanToVCF.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,152 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-import math
-from commons.core.LoggerFactory import LoggerFactory
-from commons.core.utils.RepetOptionParser import RepetOptionParser
-from commons.core.utils.FileUtils import FileUtils
-from commons.core.parsing.VarscanFile import VarscanFile
-from commons.core.seq.Bioseq import Bioseq
-
-LOG_DEPTH = "core.parsing"
-
-##Reference launcher implementation
-#
-class VarscanToVCF(object):
-    
-    def __init__(self, varscanFileName = "", vcfFileName = "", doClean = False, verbosity = 0):
-        self._varscanFileName = varscanFileName
-        self.setvcfFileName(vcfFileName)
-        self._doClean = doClean
-        self._verbosity = verbosity
-        
-        self._vcfRevision = "VCFv4.1"
-        self._vcfHeader = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"
-        
-        self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
-        
-    def setAttributesFromCmdLine(self):
-        description = "Conver Varscan file to VCF file."
-        epilog = "\t$ python VarscanToVCF.py -i varscanFileName -v 2"
-        parser = RepetOptionParser(description = description, epilog = epilog)
-        parser.add_option("-i", "--Varscan",    dest = "varscanFileName", action = "store",       type = "string", help = "input Varscan file name [compulsory] [format: varscan2.2.8]", default = "")
-        parser.add_option("-o", "--vcfFileName",dest = "vcfFileName",     action = "store",       type = "string", help = "vcfFileName file name [default: <input>.vcf]", default = "")
-        parser.add_option("-c", "--clean",      dest = "doClean",         action = "store_true",                   help = "clean temporary files [optional] [default: False]", default = False)
-        parser.add_option("-v", "--verbosity",  dest = "verbosity",       action = "store",       type = "int",    help = "verbosity [optional] [default: 1]", default = 1)
-        options = parser.parse_args()[0]
-        self._setAttributesFromOptions(options)
-        
-    def _setAttributesFromOptions(self, options):
-        self.setvarscanFileName(options.varscanFileName)
-        self.setvcfFileName(options.vcfFileName)
-        self.setDoClean(options.doClean)
-        self.setVerbosity(options.verbosity)
-
-    def setvarscanFileName(self, varscanFileName):
-        self._varscanFileName = varscanFileName
-        
-    def setvcfFileName(self, vcfFileName):
-        if vcfFileName == "":
-            self._vcfFileName = "%s.vcf" % self._varscanFileName
-        else:
-            self._vcfFileName = vcfFileName
-        
-    def setDoClean(self, doClean):
-        self._doClean = doClean
-        
-    def setVerbosity(self, verbosity):
-        self._verbosity = verbosity
-        
-    def _checkOptions(self):
-        if self._varscanFileName == "":
-            self._logAndRaise("ERROR: Missing input file name")
-        else:
-            if not FileUtils.isRessourceExists(self._varscanFileName):
-                self._logAndRaise("ERROR: Input Varscan file '%s' does not exist!" % self._varscanFileName)
-            
-    def _logAndRaise(self, errorMsg):
-        self._log.error(errorMsg)
-        raise Exception(errorMsg)
-
-    def _convertVarscanLineToVCFRecord(self, varscanLine, lineNumber):
-        iVarscanFile = VarscanFile()
-        iVarscanFile.setTypeOfVarscanFile("Varscan_2_2_8")
-        iVarscanHit = iVarscanFile.createVarscanObjectFromLine(varscanLine, lineNumber)
-        Chrom = iVarscanHit.getChrom()
-        Pos = int(iVarscanHit.getPosition())
-        #ID = str(lineNumber)
-        ID = "."
-        Ref = iVarscanHit.getRef()
-        Alt = iVarscanHit.getVar()
-        Qual = -10*math.log10(float(iVarscanHit.getPValue()))
-        Filter = "."
-        AF = float(iVarscanHit.getVarFreq()[:-1])/100
-        DP = int(iVarscanHit.getReadsRef()) + int(iVarscanHit.getReadsVar())
-        RBQ = iVarscanHit.getQualRef()
-        ABQ = iVarscanHit.getQualVar()
-        #MQ = iVarscanHit.getMapQualRef()
-        Info = ";".join(["AF=%.4f" %AF,"DP=%d" %DP,"RBQ=%s" %RBQ, "ABQ=%s" %ABQ])
-
-        allel = Bioseq().getATGCNFromIUPACandATGCN(iVarscanHit.getCns(), Ref)
-        if allel != Alt:
-            self._log.warning("'VarAllele' attribute of Varscan file line '%d' was not correct. Correcting using '%s' instead of '%s'." % (lineNumber, allel, Alt))
-            Alt = allel
-        
-        vcfLine = "%s\t%s\t%s\t%s\t%s\t%.9f\t%s\t%s\n" % (Chrom, Pos, ID, Ref, Alt, Qual, Filter, Info)
-        return vcfLine
-
-    def run(self):
-        LoggerFactory.setLevel(self._log, self._verbosity)
-        self._checkOptions()
-        self._log.info("START Varscan To VCF")
-        self._log.debug("Input file name: %s" % self._varscanFileName)
-        
-        with open(self._vcfFileName, "w") as fVCF:
-            fVCF.write("##fileformat=%s\n" % self._vcfRevision)
-            fVCF.write("%s\n" % self._vcfHeader)
-            
-            with open(self._varscanFileName, "r") as fVarscan:
-                lineNumber = 1
-                line = fVarscan.readline()
-                while line:
-                    if line[0] != "#" and "Chrom\tPosition\tRef\tCons" not in line:
-                        vcfLine = self._convertVarscanLineToVCFRecord(line, lineNumber)
-                        fVCF.write(vcfLine)
-                    line = fVarscan.readline()
-                    lineNumber += 1
-        
-        self._log.info("END Varscan To VCF")
-
-if __name__ == "__main__":
-    iLaunch = VarscanToVCF()
-    iLaunch.setAttributesFromCmdLine()
-    iLaunch.run()        
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/WigParser.py
--- a/commons/core/parsing/WigParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,333 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import re\n-import sys\n-import os.path\n-import struct\n-from commons.core.parsing.TranscriptListParser import TranscriptListParser\n-from SMART.Java.Python.structure.Transcript import Transcript\n-\n-STRANDTOSTR = {1: "(+)", 0: "(=)", None: "(=)", -1: "(-)"}\n-\n-nbOpenHandles = 30\n-\n-\n-class WigParser(TranscriptListParser):\n-\t"""A class that parses a big WIG file, creates an index and make it possible to quickly retrieve some data"""\n-\n-\tdef __init__(self, fileName, verbosity = 1):\n-\t\tself.fileName\t\t   = fileName\n-\t\tself.filler\t\t\t   = "\\xFF" * struct.calcsize(\'Q\')\n-\t\tself.strands\t\t   = False\n-\t\tself.indexFiles\t   \t   = {}\n-\t\tself.indexBuilt\t\t   = False\n-\t\tself.defaultValue\t   = 0.0\n-\t\tself.currentChromosome = None\n-\t\tself.currentStrand\t   = 1\n-\t\tself.verbosity         = verbosity\n-\t\tsuper(WigParser, self).__init__(fileName, verbosity)\n-\n-\n-\tdef __def__(self):\n-\t\tfor file in self.indexFiles.values():\n-\t\t\tfile.close()\n-\n-\n-\tdef setStrands(self, strands):\n-\t\tself.strands = strands\n-\n-\n-\tdef setDefaultValue(self, value):\n-\t\tself.defaultValue = value\n-\n-\n-\tdef getFileFormats():\n-\t\treturn ["wig"]\n-\tgetFileFormats = staticmethod(getFileFormats)\n-\n-\n-\tdef setStrands(self, strands):\n-\t\t"""\n-\t\tConsider both strands separately\n-\t\t"""\n-\t\tself.strands = strands\n-\n-\n-\tdef makeIndexName(self, chromosome, strand = None):\n-\t\t"""\n-\t\tCreate an index name for a file\n-\t\t"""\n-\t\tdirectoryName = os.path.dirname(self.fileName)\n-\t\tif strand == None:\n-\t\t\tstrandName = ""\n-\t\telse:\n-\t\t\tstrandName = "+" if strand == 1 else "-"\n-\t\tindexName = os.path.join(directoryName, ".%s%s.index" % (chromosome, strandName))\n-\t\treturn indexName\n-\t\n-\t\n-\tdef findIndexFile(self, chromosome, strand = None):\n-\t\t"""\n-\t\tCheck if the index of a file exists\n-\t\t""" \n-\t\tindexName = self.makeIndexName(chromosome, strand)\n-\t\tif os.path.exists(indexName):\n-\t\t\treturn indexName\n-\t\treturn False\n-\t\n-\t\n-\tdef makeIndexFile(self):\n-\t\t"""\n-\t\tCreate the index for a file\n-\t\t"""\n-\t\tif self.indexBuilt:\n-\t\t\treturn\n-\n-\t\tinputFile  = open(self.fileName)\n-\t\toutputFile = None\n-\t\tindex\t  = 0\n-\t\tmark\t   = inputFile.tell()\n-\t\tline\t   = inputFile.readline().strip()\n-\t\tchromosome = None\n-\n-\t\twhile line != "":\n-\t\t\tm1 = re.search(r"^\\s*-?\\d+\\.?\\d*\\s*$", line)\n-\t\t\tm2 = re.search(r"^\\s*(\\d+)\\s+-?\\d+\\.?\\d*\\s*$", line)\n-\t\t\tm3 = re.search(r"^\\s*fixedStep\\s+chrom=(\\S+)\\s+start=(\\d+)\\s+step=1\\s*$", line)\n-\t\t\tm4 = re.search(r"^\\s*fixedStep\\s+chrom=\\S+\\s+start=\\d+\\s+step=\\d+\\s+span=\\d+\\s*$", line)\n-\t\t\tm5 = re.search(r"^\\s*variable'..b'ndex for chromosome %s, strand %s does not exist." % (chromosome, STRANDTOSTR[strand])\n-\t\t\treturn False\n-\t\tindexFile = open(indexFileName, "rb")\n-\n-\t\tif len(self.indexFiles.keys()) > nbOpenHandles:\n-\t\t\tremovedKey = set(self.indexFiles.keys()).pop()\n-\t\t\tself.indexFiles[removedKey].close()\n-\t\t\tdel self.indexFiles[removedKey]\n-\t\tself.indexFiles[indexFileKey] = indexFile\n-\t\treturn indexFile\n-\t\t\n-\n-\t\n-\tdef findIndex(self, chromosome, start, strand = None):\n-\t\t"""\n-\t\tFind the point where to start reading file\n-\t\t"""\n-\n-\t\tsizeOfLong = struct.calcsize("Q")\n-\t\tempty\t  = int(struct.unpack("Q", self.filler)[0])\n-\t\toffset\t = empty\n-\t\tindexFile  = self.getIndexFileHandle(chromosome, strand)\n-\t\n-\t\tif not indexFile:\n-\t\t\treturn (None, None)\n-\t\t\n-\t\twhile offset == empty:\n-\t\t\taddress = start * sizeOfLong\n-\t\t\tindexFile.seek(address, os.SEEK_SET)\n-\t\t\t\n-\t\t\tbuffer = indexFile.read(sizeOfLong)\n-\t\t\tif len(buffer) != sizeOfLong:\n-\t\t\t\tif buffer == "":\n-\t\t\t\t\tprint "Warning! Index position %d of chromosome %s on strand %s seems out of range!" % (start, chromosome, STRANDTOSTR[strand])\n-\t\t\t\t\treturn (None, None)\n-\t\t\t\telse:\n-\t\t\t\t\traise Exception("Problem fetching position %d of chromosome %s on strand %s seems out of range!" % (start, chromosome, STRANDTOSTR[strand]))\n-\t\t\t\n-\t\t\toffset = int(struct.unpack("Q", buffer)[0])\n-\t\t\tstart += 1\n-\t\t\t\n-\t\tstart -= 1\n-\t\treturn (offset, start)\n-\t\n-\t\n-\n-\tdef getRange(self, chromosome, start, end):\n-\t\t"""\n-\t\tParse a wig file and output a range\n-\t\t"""\n-\t\tarrays  = {}\n-\t\tstrands = {1: "+", -1: "-"} if self.strands else {0: ""}\n-\n-\t\tfor strand in strands:\n-\n-\t\t\tarray = [self.defaultValue] * (end - start + 1)\n-\t\t\tfile  = open(self.fileName)\n-\t\t\toffset, index = self.findIndex(chromosome, start, strand if self.strands else None)\n-\t\t\tif offset == None:\n-\t\t\t\tarrays[strand] = array\n-\t\t\t\tcontinue\n-\t\t\tfile.seek(offset, os.SEEK_SET)\n-\n-\t\t\tfor line in file:\n-\t\t\t\tline = line.strip()\n-\n-\t\t\t\tm1 = re.search(r"^\\s*(-?\\d+\\.?\\d*)\\s*$", line)\n-\t\t\t\tm2 = re.search(r"^\\s*(\\d+)\\s+(-?\\d+\\.?\\d*)\\s*$", line)\n-\t\t\t\tm3 = re.search(r"^\\s*fixedStep\\s+chrom=(\\S+)\\s+start=(\\d+)\\s+step=\\d+\\s*$", line)\n-\t\t\t\tm4 = re.search(r"^\\s*variableStep\\s+chrom=(\\S+)\\s*$", line)\n-\n-\t\t\t\tif m1 != None:\n-\t\t\t\t\tif index > end:\n-\t\t\t\t\t\tbreak\n-\t\t\t\t\tif index >= start:\n-\t\t\t\t\t\tarray[index - start] = float(m1.group(1))\n-\t\t\t\t\tindex += 1\n-\t\t\t\telif m2 != None:\n-\t\t\t\t\tindex = int(m2.group(1))\n-\t\t\t\t\tif index > end:\n-\t\t\t\t\t\tbreak\n-\t\t\t\t\tif index >= start:\n-\t\t\t\t\t\tarray[index - start] = float(m2.group(2))\n-\t\t\t\t\tindex += 1\n-\t\t\t\telif m3 != None:\n-\t\t\t\t\tif m3.group(1) != "%s%s" % (chromosome, strands[strand]):\n-\t\t\t\t\t\tbreak\n-\t\t\t\t\tindex = int(m3.group(2))\n-\t\t\t\telif m4 != None:\n-\t\t\t\t\tif m4.group(1) != "%s%s" % (chromosome, strands[strand]):\n-\t\t\t\t\t\tbreak\n-\t\t\t\telif (len(line) == 0) or (line[0] == "#") or line.startswith("track"):\n-\t\t\t\t\tpass\n-\t\t\t\telse:\n-\t\t\t\t\traise Exception("Error! Cannot read line \'%s\' of wig file" % (line))\n-\n-\t\t\tfile.close()\n-\t\n-\t\t\tarrays[strand] = array\n-\t\t\t\n-\t\tif self.strands:\n-\t\t\treturn arrays\n-\t\treturn array\n-\t\n-\n-\tdef skipFirstLines(self):\n-\t\treturn\n-\n-\t\n-\tdef parseLine(self, line):\n-\t\tif line.startswith("track"):\n-\t\t\treturn None\n-\t\tm = re.search(r"^\\s*variableStep\\s+chrom=(\\S+)", line)\n-\t\tif m != None:\n-\t\t\tchromosome = m.group(1)\n-\t\t\tif chromosome.endswith("+"):\n-\t\t\t\tself.currentStrand = 1\n-\t\t\t\tself.currentChromosome = chromosome[:-1]\n-\t\t\telif chromosome.endswith("-"):\n-\t\t\t\tself.currentStrand = -1\n-\t\t\t\tself.currentChromosome = chromosome[:-1]\n-\t\t\telse:\n-\t\t\t\tself.currentStrand = 1\n-\t\t\t\tself.currentChromosome = chromosome\n-\t\t\treturn None\n-\t\tposition, value = line.split()\n-\t\tposition = int(position)\n-\t\tvalue\t= float(value)\n-\t\ttranscript = Transcript()\n-\t\ttranscript.setChromosome(self.currentChromosome)\n-\t\ttranscript.setStart(position)\n-\t\ttranscript.setEnd(position)\n-\t\ttranscript.setDirection(self.currentStrand)\n-\t\ttranscript.setTagValue("ID", "wig_%s_%d_%d" % (self.currentChromosome, self.currentStrand, position))\n-\t\ttranscript.setTagValue("nbElements", value)\n-\t\treturn transcript\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/WigParser.pyc
b
Binary file commons/core/parsing/WigParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/__init__.pyc
b
Binary file commons/core/parsing/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/multifastaParserLauncher.py
--- a/commons/core/parsing/multifastaParserLauncher.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Launcher for the multifasta parser.
-@param b: Name of the batch of sequences
-@param g: Name of the gene
-@param t: Scientific name of the taxon concerned
-@param f: Name of the multifasta input file  
-"""
-
-
-import os
-import sys
-import getopt
-from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile
-
-CURRENT_DIR = os.getcwd()
-
-def help():
-        
-    """
-    Give the list of the command-line options.
-    """
-        
-    print "Usage: ",sys.argv[0],"[ options ]"
-    print "     -h: this help"
-    print "Mandatory option:"
-    print "     -t: Scientific name of the taxon concerned"
-    print "Exclusive options (use either the first or the second, one should be used)"
-    print "     -f: Name of the multifasta input file in one batch mode"
-    print "     -d: Name of the directory containing multifasta input file(s) in multi-batch mode"
-    print "Only in one batch mode: mandatory options (when -f is used):"    
-    print "     -b: Name of the batch of submitted sequences"
-    print "     -g: Name of the gene"
-    print ""
-    
-
-def runOneInputFile(batchName, geneName, taxon, inputFileName):
-    print "Multifasta parseur launched:!\n"
-    print "-- Input File: " + inputFileName + "\n"
-    print "-- Batch name: " + batchName + "\n"
-    print "-- Gene name: " + geneName + "\n"
-    print "-- Taxon: " + taxon + "\n"
-    #TODO: gerer le delete des fichiers(mode append)
-    multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, geneName)
-    multifasta2SNPFile.runOneBatch(inputFileName)
-    print "OK: Files generated!"
-
-
-def runSeveralInputFile(taxon, rootDirectoryName):
-    multifasta2SNPFile = Multifasta2SNPFile(taxon)
-    multifasta2SNPFile.runSeveralBatches(rootDirectoryName)
-
-def main():
-    batchName = ""
-    geneName = ""
-    taxon = ""
-    inputFileName = ""
-    rootDirectoryName = ""
-    
-    
-    try:
-        opts,args = getopt.getopt(sys.argv[1:],"hb:g:t:f:d:")
-    except getopt.GetoptError:
-        print "Invalid options\n"
-        help()
-        sys.exit(2)
-
-    for o, a in opts:
-        if o == "-h":
-            help()
-            exit(0)
-        elif o == "-b":
-            batchName = a
-        elif o == "-g":
-            geneName = a
-        elif o == "-t":
-            taxon = a
-        elif o == "-f":
-            inputFileName = a
-        elif o == "-d":
-            rootDirectoryName = os.path.abspath(a)
-            
-    if taxon == "":
-        print "*** Error: The mandatory option -t is missing"
-        help()
-        sys.exit(1)
-    
-    if (inputFileName == "" and  rootDirectoryName == "") or (inputFileName != "" and  rootDirectoryName != ""):
-        print "*** Error: You have to specify the input mode: choose either -f (for one file) or -d (for one directory of several files)"
-        help()
-        sys.exit(1)
-        
-    if(inputFileName != ""):
-        if batchName == "" or geneName == "":
-            print "*** Error: A mandatory option is missing in one batch mode (-b or -g)"
-            help()
-            sys.exit(1)
-    
-    if(inputFileName != ""):
-        runOneInputFile(batchName, geneName, taxon, inputFileName)
-    else:
-        runSeveralInputFile(taxon, rootDirectoryName)
-    
-    
-    return 0
-
-#------------------------------------------------------------------------------
-if __name__ == "__main__":
-    main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_BedParser.py
--- a/commons/core/parsing/test/Test_BedParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,58 +0,0 @@
-import unittest, os
-from commons.core.parsing.BedParser import BedParser
-
-
-class Test_BedParser(unittest.TestCase):
-    
-    def test_Parser(self):
-        parser = BedParser("data/testBedParser1.bed")
-
-        self.assertEqual(parser.getNbTranscripts(), 1)
-
-        for transcript in parser.getIterator():
-            self.assertEqual(transcript.getChromosome(), "arm_X")
-            self.assertEqual(transcript.getName(), "test1.1")
-            self.assertEqual(transcript.getStart(), 1000)
-            self.assertEqual(transcript.getEnd(), 2999)
-            self.assertEqual(transcript.getDirection(), 1)
-            self.assertEqual(transcript.getNbExons(), 2)
-            exons = transcript.getExons()
-            self.assertEqual(exons[0].getChromosome(), "arm_X")
-            self.assertEqual(exons[0].getStart(), 1000)
-            self.assertEqual(exons[0].getEnd(), 1099)
-            self.assertEqual(exons[0].getDirection(), 1)
-            self.assertEqual(exons[1].getChromosome(), "arm_X")
-            self.assertEqual(exons[1].getStart(), 2000)
-            self.assertEqual(exons[1].getEnd(), 2999)
-            self.assertEqual(exons[1].getDirection(), 1)
-
-    def test_Parser_short(self):
-        tmpFileName = "tmpFile.bed"
-        tmpHandle   = open(tmpFileName, "w")
-        tmpHandle.write("""X\t554748\t554904\texon
-X\t554748\t554904\tCDS
-X\t554748\t554750\tstart_codon
-""")
-        tmpHandle.close()
-        parser = BedParser(tmpFileName)
-        self.assertEqual(parser.getNbTranscripts(), 3)
-        for cpt, transcript in enumerate(parser.getIterator()):
-            self.assertEqual(transcript.getNbExons(), 1)
-            self.assertEqual(transcript.getChromosome(), "X")
-            self.assertEqual(transcript.getStart(), 554748)
-            if cpt == 0:
-                self.assertEqual(transcript.getEnd(), 554903)
-                self.assertEqual(transcript.getName(), "exon")
-            elif cpt == 1:
-                self.assertEqual(transcript.getEnd(), 554903)
-                self.assertEqual(transcript.getName(), "CDS")
-            elif cpt == 2:
-                self.assertEqual(transcript.getEnd(), 554749)
-                self.assertEqual(transcript.getName(), "start_codon")
-        os.remove(tmpFileName)
-
-
-
-if __name__ == '__main__':
-        unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_BlatFileParser.py
--- a/commons/core/parsing/test/Test_BlatFileParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,61 +0,0 @@
-import unittest
-from commons.core.parsing.BlatFileParser import BlatFileParser
-
-
-class Test_BlatFileParser(unittest.TestCase):
-
-
-    def test_parseBlatFile(self):
-        fileName = "dummayBlat.psl"
-        self._writeBlatInputFile(fileName)
-        blatFileParser = BlatFileParser(fileName)
-        blatFileParser.parseBlatFile()
-        obsNbHits = len(blatFileParser.getListsOfHits())
-        self.assertTrue(obsNbHits == 10)
-        obsQueries = blatFileParser.getDictOfQueries()
-        expQueries = {'5:574_1:574_539_5:1:G/C': 1, '3:574_1:574_539_5:1:G/C': 1, '5:574_2:574_433_5:1:G/C': 1,"3:574_2:574_433_5:1:G/C":1, "5:574_5:574_607_5:1:G/C": 1, "3:574_5:574_607_5:1:G/C": 1}
-        self.assertEquals(expQueries, obsQueries)
-        
-    def test_parseBlatFileByQueries(self):
-        fileName = "dummayBlat.psl"
-        self._writeBlatInputFile(fileName)
-        blatFileParser = BlatFileParser(fileName)
-        blatFileParser.parseBlatFileByQueries()
-        obsDict = blatFileParser.getDictOfBlatHitsByQueries()
-        obs1 = len(obsDict["5:574_1:574_539_5:1:G/C"])
-        obs2 = len(obsDict["3:574_1:574_539_5:1:G/C"])
-        obs3 = len(obsDict["5:574_2:574_433_5:1:G/C"])
-        obs4 = len(obsDict["3:574_2:574_433_5:1:G/C"])
-        obs5 = len(obsDict["5:574_5:574_607_5:1:G/C"])
-        obs6 = len(obsDict["3:574_5:574_607_5:1:G/C"])
-        self.assertTrue(obs1 == 1)
-        self.assertTrue(obs2 == 1)
-        self.assertTrue(obs3 == 1)
-        self.assertTrue(obs4 == 5)
-        self.assertTrue(obs5 == 1)
-        self.assertTrue(obs6 == 1)
-        obsQueries = blatFileParser.getDictOfQueries()
-        expQueries = {'5:574_1:574_539_5:1:G/C': 1, '3:574_1:574_539_5:1:G/C': 1, '5:574_2:574_433_5:1:G/C': 1,"3:574_2:574_433_5:1:G/C":1, "5:574_5:574_607_5:1:G/C": 1, "3:574_5:574_607_5:1:G/C": 1}
-        self.assertEquals(expQueries, obsQueries)
-        
-    def _writeBlatInputFile(self, fileName):
-        file = open(fileName, "w")
-        file.write("psLayout version 3\n")
-        file.write("\n")
-        file.write("match\tmis- \trep. \tN's\tQ gap\tQ gap\tT gap\tT gap\tstrand\tQ        \tQ   \tQ    \tQ  \tT        \tT   \tT    \tT  \tblock\tblockSizes \tqStarts\t tStarts\n")
-        file.write("     \tmatch\tmatch\t   \tcount\tbases\tcount\tbases\t      \tname     \tsize\tstart\tend\tname     \tsize\tstart\tend\tcount\n")
-        file.write("---------------------------------------------------------------------------------------------------------------------------------------------------------------\n")
-        file.write("246\t0\t0\t4\t0\t0\t0\t0\t-\t5:574_1:574_539_5:1:G/C\t250\t0\t250\ttaecs3B_RPH7\t3109948\t1065213\t1065463\t1\t250,\t0,\t1065213,\n")
-        file.write("247\t0\t0\t2\t0\t0\t0\t0\t-\t3:574_1:574_539_5:1:G/C\t250\t1\t250\ttaecs3B_RPH7\t3109948\t1064962\t1065211\t1\t249,\t0,\t1064962,\n")
-        file.write("249\t0\t0\t1\t0\t0\t0\t0\t-\t5:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH7\t3109948\t1065319\t1065569\t1\t250,\t0,\t1065319,\n")
-        file.write("245\t0\t0\t5\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH8\t3109948\t1065068\t1065318\t1\t250,\t0,\t1065068,\n")
-        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH8\t3109948\t1065310\t1065560\t1\t250,\t0,\t1065310,\n")
-        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1065059\t1065309\t1\t250,\t0,\t1065059,\n")
-        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1064805\t1065055\t1\t250,\t0,\t1064805,\n")
-        file.write("68\t0\t0\t1\t0\t0\t0\t0\t-\t3:574_2:574_433_5:1:G/C\t69\t0\t69\ttaecs3B_RPH9\t3109948\t1064733\t1064802\t1\t69,\t0,\t1064733,\n")
-        file.write("245\t0\t0\t5\t0\t0\t0\t0\t-\t5:574_5:574_607_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1065145\t1065395\t1\t250,\t0,\t1065145,\n")
-        file.write("247\t0\t0\t3\t0\t0\t0\t0\t-\t3:574_5:574_607_5:1:G/C\t250\t0\t250\ttaecs3B_RPH9\t3109948\t1064894\t1065144\t1\t250,\t0,\t1064894,\n")
-        file.close()
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_BlatParser.py
--- a/commons/core/parsing/test/Test_BlatParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,445 +0,0 @@\n-from commons.core.parsing.BlatParser import BlatParser\n-import unittest\n-\n-\n-class Test_BlatParser(unittest.TestCase):\n-\n-\n-    def test_setAttributesFromString(self):\n-        blatLine = "315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,"\n-        \n-        iBlatParser = BlatParser()\n-        iBlatParser.setAttributesFromString(blatLine)\n-        \n-        obsmatch = iBlatParser.getMatch()\n-        obsmismatch = iBlatParser.getMismatch()\n-        obsrepMatch = iBlatParser.getRepMatch()\n-        obsN = iBlatParser.getN()\n-        obsQGapCount = iBlatParser.getQGapCount()\n-        obsQGapBases = iBlatParser.getQGapBases()\n-        obsTGapCount = iBlatParser.getTGapCount()\n-        obsTGapBases = iBlatParser.getTGapBases()\n-        obsstrand = iBlatParser.getStrand()\n-        obsQName = iBlatParser.getQName()\n-        obsQSize = iBlatParser.getQSize()\n-        obsQStart = iBlatParser.getQStart()\n-        obsQEnd = iBlatParser.getQEnd()\n-        obsTName = iBlatParser.getTName()\n-        obsTSize = iBlatParser.getTSize()\n-        obsTStart = iBlatParser.getTStart()\n-        obsTEnd = iBlatParser.getTEnd()\n-        obsblockCount = iBlatParser.getBlockCount()\n-        obsblockSizes = iBlatParser.getBlockSizes()\n-        obsqStarts = iBlatParser.getQStarts()\n-        obstStarts = iBlatParser.getTStarts()\n-        \n-        expmatch = "315"\n-        expmismatch = "20"\n-        exprepMatch = "0"\n-        expN = "0"\n-        expQGapCount = "3"\n-        expQGapBases = "10"\n-        expTGapCount = "2"\n-        expTGapBases = "9"\n-        expstrand = "+"\n-        expQName = "MRRE1H001H13FM1"\n-        expQSize = "378"\n-        expQStart = "0"\n-        expQEnd = "345"\n-        expTName = "chr16"\n-        expTSize = "22053297"\n-        expTStart = "21686950"\n-        expTEnd = "21687294"\n-        expblockCount = "4"\n-        expblockSizes = "76,185,7,67,"\n-        expqStarts = "0,77,263,278,"\n-        exptStarts = "21686950,21687026,21687213,21687227,"\n-        \n-        self.assertEquals(expmatch, obsmatch)\n-        self.assertEquals(expmismatch, obsmismatch)\n-        self.assertEquals(exprepMatch, obsrepMatch)\n-        self.assertEquals(expN, obsN)\n-        self.assertEquals(expQGapCount, obsQGapCount)\n-        self.assertEquals(expQGapBases, obsQGapBases)\n-        self.assertEquals(expTGapCount, obsTGapCount)\n-        self.assertEquals(expTGapBases, obsTGapBases)\n-        self.assertEquals(expstrand, obsstrand)\n-        self.assertEquals(expQName, obsQName)\n-        self.assertEquals(expQSize, obsQSize)\n-        self.assertEquals(expQStart, obsQStart)\n-        self.assertEquals(expQEnd, obsQEnd)\n-        self.assertEquals(expTName, obsTName)\n-        self.assertEquals(expTSize, obsTSize)\n-        self.assertEquals(expTStart, obsTStart)\n-        self.assertEquals(expTEnd, obsTEnd)\n-        self.assertEquals(expblockCount, obsblockCount)\n-        self.assertEquals(expblockSizes, obsblockSizes)\n-        self.assertEquals(expqStarts, obsqStarts)\n-        self.assertEquals(exptStarts, obstStarts)\n-        \n-    def test_setAttributesFromString_empty_QName(self):\n-        blatLine = "315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\t\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,"\n-        \n-        iBlatParser = BlatParser()\n-        iBlatParser.setAttributesFromString(blatLine)\n-        \n-        obsmatch = iBlatParser.getMatch()\n-        obsmismatch = iBlatParser.getMismatch()\n-        obsrepMatch = iBlatParser.getRepMatch()\n-        obsN = iBlatParser.getN()\n-        obsQGapCount = iBlatParser.getQGapCount()\n-        obsQGapBases = iBlatParser.getQGapBases()\n-        obsTGapCount = iBlatParser.getTGapCount()\n-        obsTGapBases = iBlatParser.getTGapBases()\n-        obsstrand = iBlatParser.getStrand()\n-        obsQName = iBlatParser.getQName()\n-       '..b'87227,")\n-        \n-        self.assertTrue(BlatParser1 == BlatParser2) \n-        \n-    def test_eq_Equals_case2(self):\n-        BlatParser1 = BlatParser()\n-        BlatParser1.setMatch("315")\n-        BlatParser1.setMismatch("20")\n-        BlatParser1.setRepMatch("0")\n-        BlatParser1.setN("0")\n-        BlatParser1.setQGapCount("3")\n-        BlatParser1.setQGapBases("10")\n-        BlatParser1.setTGapCount("2")\n-        BlatParser1.setTGapBases("9")\n-        BlatParser1.setStrand("+")\n-        BlatParser1.setQName("MRRE1H001H13FM1")\n-        BlatParser1.setQSize("378")\n-        BlatParser1.setQStart("0")\n-        BlatParser1.setQEnd("345")\n-        BlatParser1.setTName("chr16")\n-        BlatParser1.setTSize("22053297")\n-        BlatParser1.setTStart("21686950")\n-        BlatParser1.setTEnd("21687294")\n-        BlatParser1.setBlockCount("4")\n-        BlatParser1.setBlockSizes("76,185,7,67,")\n-        BlatParser1.setQStarts("0,77,263,278,")\n-        BlatParser1.setTStarts("21686950,21687026,21687213,21687227,")\n-        \n-        BlatParser2 = BlatParser()\n-        BlatParser2.setMatch("315")\n-        BlatParser2.setMismatch("20")\n-        BlatParser2.setRepMatch("0")\n-        BlatParser2.setN("0")\n-        BlatParser2.setQGapCount("3")\n-        BlatParser2.setQGapBases("10")\n-        BlatParser2.setTGapCount("2")\n-        BlatParser2.setTGapBases("9")\n-        BlatParser2.setStrand("+")\n-        BlatParser2.setQName("TotoFM2")\n-        BlatParser2.setQSize("378")\n-        BlatParser2.setQStart("0")\n-        BlatParser2.setQEnd("345")\n-        BlatParser2.setTName("chr16")\n-        BlatParser2.setTSize("22053297")\n-        BlatParser2.setTStart("21686950")\n-        BlatParser2.setTEnd("21687294")\n-        BlatParser2.setBlockCount("4")\n-        BlatParser2.setBlockSizes("76,185,7,67,")\n-        BlatParser2.setQStarts("0,77,263,278,")\n-        BlatParser2.setTStarts("21686950,21687026,21687213,21687227,")\n-        \n-        self.assertTrue(BlatParser1 == BlatParser2) \n-        \n-    def test_eq_notEquals(self):\n-        BlatParser1 = BlatParser()\n-        BlatParser1.setMatch("315")\n-        BlatParser1.setMismatch("20")\n-        BlatParser1.setRepMatch("0")\n-        BlatParser1.setN("0")\n-        BlatParser1.setQGapCount("3")\n-        BlatParser1.setQGapBases("10")\n-        BlatParser1.setTGapCount("2")\n-        BlatParser1.setTGapBases("9")\n-        BlatParser1.setStrand("+")\n-        BlatParser1.setQName("MRRE1H001H13FM1")\n-        BlatParser1.setQSize("378")\n-        BlatParser1.setQStart("0")\n-        BlatParser1.setQEnd("345")\n-        BlatParser1.setTName("chr16")\n-        BlatParser1.setTSize("22053297")\n-        BlatParser1.setTStart("21686950")\n-        BlatParser1.setTEnd("21687294")\n-        BlatParser1.setBlockCount("4")\n-        BlatParser1.setBlockSizes("76,185,7,67,")\n-        BlatParser1.setQStarts("0,77,263,278,")\n-        BlatParser1.setTStarts("21686950,21687026,21687213,21687227,")\n-        \n-        BlatParser2 = BlatParser()\n-        BlatParser2.setMatch("315")\n-        BlatParser2.setMismatch("20")\n-        BlatParser2.setRepMatch("0")\n-        BlatParser2.setN("0")\n-        BlatParser2.setQGapCount("3")\n-        BlatParser2.setQGapBases("10")\n-        BlatParser2.setTGapCount("2")\n-        BlatParser2.setTGapBases("9")\n-        BlatParser2.setStrand("+")\n-        BlatParser2.setQName("TotoFM2")\n-        BlatParser2.setQSize("378")\n-        BlatParser2.setQStart("0")\n-        BlatParser2.setQEnd("345")\n-        BlatParser2.setTName("chr8")\n-        BlatParser2.setTSize("2205")\n-        BlatParser2.setTStart("2124")\n-        BlatParser2.setTEnd("2168")\n-        BlatParser2.setBlockCount("4")\n-        BlatParser2.setBlockSizes("76,185,7,67,")\n-        BlatParser2.setQStarts("0,77,263,278,")\n-        BlatParser2.setTStarts("21686950,21687026,21687213,21687227,")\n-        \n-        self.assertFalse(BlatParser1 == BlatParser2) \n-\n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_BlatToGff.py
--- a/commons/core/parsing/test/Test_BlatToGff.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,28 +0,0 @@
-from commons.core.parsing.BlatToGff import BlatToGff
-import unittest
-
-
-class Test_BlatToGff(unittest.TestCase):
-
-
-    def test_convertBlatObjectToGffLine(self):
-        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
-        nbLine = 15
-        iBlatToGff = BlatToGff()
-        BlatToGff._methodName = ''
-        obsGffLine = iBlatToGff.convertBlatObjectToGffLine(blatLine, nbLine)
-        expGffLine = 'chr16\tBlatToGff\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n'
-        self.assertEquals(expGffLine, obsGffLine)
-
-    def test_convertBlatObjectToGffLine_with_methodName(self):
-        blatLine = '315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n'
-        nbLine = 15
-        iBlatToGff = BlatToGff()
-        BlatToGff._methodName = 'Test'
-        obsGffLine = iBlatToGff.convertBlatObjectToGffLine(blatLine, nbLine)
-        expGffLine = 'chr16\tBlatToGff\tTest:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n'
-        self.assertEquals(expGffLine, obsGffLine)
-
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_BlatToGffForBesPaired.py
--- a/commons/core/parsing/test/Test_BlatToGffForBesPaired.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b"@@ -1,292 +0,0 @@\n-import unittest, os\n-from commons.core.parsing.BlatToGffForBesPaired import BlatToGffForBesPaired\n-\n-\n-class Test_BlatToGffForBesPaired(unittest.TestCase):\n-\n-\n-    def test_convertBlatObjectToGffLine(self):\n-        blatLine = '315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,\\n'\n-        nbLine = 15\n-        besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']\n-        self._writeBesSequences(besFastaFileName)\n-        iBlatToGffForBesPaired = BlatToGffForBesPaired()\n-        iBlatToGffForBesPaired._methodName = ''\n-        iBlatToGffForBesPaired._inputFileFasta = besFastaFileName\n-        obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)\n-        expGffLine = 'chr16\\tBlatToGffForBesPaired\\tBES\\t21686950\\t21687294\\t.\\t+\\t.\\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\\n'\n-        expBesName = 'MRRE1H001H13FM1'\n-        expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'\n-        expBesType = 'FM'\n-        self.assertEquals(expGffLine, obsGffLine)\n-        self.assertEquals(expBesName, obsBesName)\n-        self.assertEquals(expBesSeq, obsBesSeq)\n-        self.assertEquals(expBesType, obsBesType)\n-        os.remove(besFastaFileName)\n-\n-    def test_convertBlatObjectToGffLine_with_methodName(self):\n-        blatLine = '315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,\\n'\n-        nbLine = 15\n-        besFastaFileName = '%s/commons/core/parsing/test/besSequences.fasta' % os.environ['REPET_PATH']\n-        self._writeBesSequences(besFastaFileName)\n-        iBlatToGffForBesPaired = BlatToGffForBesPaired()\n-        iBlatToGffForBesPaired._methodName = 'Test'\n-        iBlatToGffForBesPaired._inputFileFasta = besFastaFileName\n-        obsGffLine, obsBesName, obsBesSeq, obsBesType = iBlatToGffForBesPaired.convertBlatObjectToGffLine(blatLine, nbLine)\n-        expGffLine = 'chr16\\tBlatToGffForBesPaired\\tTest:BES\\t21686950\\t21687294\\t.\\t+\\t.\\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297;muscadine_seq=AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC\\n'\n-        expBesName = 'MRRE1H001H13FM1'\n-        expBesSeq = 'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGCCTAGCTAGCTAGCTAGCTAGCTAGC'\n-        expBesType = 'FM'\n-        self.assertEquals(expGffLine, obsGffLine)\n-        self.assertEquals(expBesName, obsBesName)\n-        self.assertEquals(expBesSeq, obsBesSeq)\n-        self.assertEquals(expBesType, obsBesType)\n-        os.remove(besFastaFileName)\n-    \n-    def test_getBesName(self):\n-        col9 = 'ID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\\n'\n-        iBlatToGffForBesPaired = BlatToGffForBesPaired()\n-        obsBesName = iBlatToGffForBesPaired.getBesName(col9)\n-        expBesName = 'machin1'\n-        self.assertEquals(expBesName, obsBesName)\n-                \n-    def test_checkBesNames_OK(self):\n-        besName1 = 'MRRE1H001H13FM8'\n-        besName2 = 'MRRE1H001H13RM2'\n-        line = 10\n-        iBlatToGffForBesPaired = BlatToGffForBesPaired()\n-        self.assertTrue(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))\n-        \n-    def test_checkBesNames_NOK(self):\n-        besName1 = 'MRRE1H001H13FM1'\n-        besName2 = 'TOTORM2'\n-        line = 10\n-        iBlatToGffForBesPaired = BlatToGffForBesPaired()\n-        self.assertFalse(iBlatToGffForBesPaired.checkBesNames(besName1, besName2, line))\n-        \n-    def test_checkBesPositions_OK1(self):\n-        tBes1 = ('chr16', 25, 150)\n-        tBes2 "..b'ommons/core/parsing/test/sequence.fasta\' % os.environ[\'REPET_PATH\']\n-        fastaFile = open(fastaFileName, \'w\')\n-        fastaFile.write(\'>seq1\\n\')\n-        fastaFile.write(\'ATCGATCGATCGATCGATACGTCAGCGATCGAT\\n\')\n-        fastaFile.write(\'TACGTACGTACGATCGATCGATCGATCGATCGG\\n\')\n-        fastaFile.write(\'TACGTACGTACGATCGACGATCGATGCCGATCG\\n\')\n-        fastaFile.write(\'ATCGAC\\n\')\n-        fastaFile.write(\'>seq2\\n\')\n-        fastaFile.write(\'GTCTAGCTAGCTATATCTGACTGACGCGACGGT\\n\')\n-        fastaFile.write(\'CATGCTAGCTAGCACTGTACAGCTATCGATGCT\\n\')\n-        fastaFile.write(\'ACTGACACTGTACGTAC\\n\')\n-        fastaFile.write(\'>seq3\\n\')\n-        fastaFile.write(\'ACTCGATCGATCG\\n\')\n-        fastaFile.close()\n-        \n-        seqName = \'seq4\'\n-        iBlatToGffForBesPaired = BlatToGffForBesPaired()\n-        iBlatToGffForBesPaired._inputFileFasta = fastaFileName\n-        obsSeq = iBlatToGffForBesPaired.extractBesSequenceFromFastaFile(seqName, 5)\n-        expSeq = \'NA\'\n-        self.assertEquals(expSeq, obsSeq)\n-        os.remove(fastaFileName)\n-        \n-    def test_getBesFmAndRmNamesAndSequences_case1(self):\n-        nameBes1 = \'MRRE1H0072T1FM1\'\n-        seqBes1 = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n-        typeBes1 = \'FM\'\n-        nameBes2 = \'MRRE1H0072T1RM3\'\n-        seqBes2 = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n-        typeBes2 = \'RM\'\n-        iBlatToGffForBesPaired = BlatToGffForBesPaired()\n-        obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)\n-        expNameBesFM = \'MRRE1H0072T1FM1\'\n-        expNameBesRM = \'MRRE1H0072T1RM3\'\n-        expSeqBesFM = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n-        expSeqBesRM = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n-        self.assertEquals(expNameBesFM, obsNameBesFM)\n-        self.assertEquals(expNameBesRM, obsNameBesRM)\n-        self.assertEquals(expSeqBesFM, obsSeqBesFM)\n-        self.assertEquals(expSeqBesRM, obsSeqBesRM)\n-        \n-    def test_getBesFmAndRmNamesAndSequences_case2(self):\n-        nameBes1 = \'MRRE1H0072T1RM1\'\n-        seqBes1 = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n-        typeBes1 = \'RM\'\n-        nameBes2 = \'MRRE1H0072T1FM3\'\n-        seqBes2 = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n-        typeBes2 = \'FM\'\n-        iBlatToGffForBesPaired = BlatToGffForBesPaired()\n-        obsNameBesFM, obsSeqBesFM, obsNameBesRM, obsSeqBesRM = iBlatToGffForBesPaired.getBesFmAndRmNamesAndSequences(nameBes1, seqBes1, typeBes1, nameBes2, seqBes2, typeBes2)\n-        expNameBesFM = \'MRRE1H0072T1FM3\'\n-        expNameBesRM = \'MRRE1H0072T1RM1\'\n-        expSeqBesFM = \'GCGCAGCGCGACTGACTTGACTATCGGCGACGCGACGATCGATCGATCGATC\'\n-        expSeqBesRM = \'TACGTCAGCTGATCGACATCGATCGATCGATCGATCGATCGTC\'\n-        self.assertEquals(expNameBesFM, obsNameBesFM)\n-        self.assertEquals(expNameBesRM, obsNameBesRM)\n-        self.assertEquals(expSeqBesFM, obsSeqBesFM)\n-        self.assertEquals(expSeqBesRM, obsSeqBesRM)\n-        \n-    def _writeBesSequences(self, fileName):\n-        file = open(fileName, \'w\')\n-        file.write(\'>MRRE1H001H13RM1\\n\')\n-        file.write(\'ATACGTACGTACGTCAGTACGACTACGTACGTACGTACGTCGTAC\\n\')\n-        file.write(\'TACGTCAGCATCGTACGTACGTACGTCGTGCTGGCTAGCTGACGA\\n\')\n-        file.write(\'ATCGATCGATCGATCGACATCGTACG\\n\')\n-        file.write(\'>MRRE1H001H13FM1\\n\')\n-        file.write(\'AGACCTACTACGACGTACGATCGATCGACTGCTAGCTAGCTAGGC\\n\')\n-        file.write(\'CTAGCTAGCTAGCTAGCTAGCTAGC\\n\')\n-        file.write(\'>MRRE2H007A13FM3\\n\')\n-        file.write(\'TCAGCTAGCTGACTGACATCGCTAGCTAGCTAGCTAGCTAGCTAG\\n\')\n-        file.write(\'TACGCAGCTACGGGGCATCGACTAAAAAAAAAAACCCACGACTGG\\n\')\n-        file.write(\'CTAGCTAGCTAGCTAGCTAGCTACGTCGATCGATCGACTGTTGCC\\n\')\n-        file.write(\'TCAGCTACTGACTGATCGATCGACTACGTACGTACGTAC\\n\')\n-        file.close()\n-        \n-        \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_BowtieParser.py
--- a/commons/core/parsing/test/Test_BowtieParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,57 +0,0 @@
-from commons.core.parsing.BowtieParser import BowtieParser
-import unittest, os
-
-
-class Test_BlatParser(unittest.TestCase):
-
-
-    def test_simple(self):
-        fileName = "tmpFile.bowtie"
-        handle   = open(fileName, "w")
-        handle.write("HWI-EAS179_0053:2:1:1365:7879#0/2\t+\tchrXHet\t191698\tACCGCTGAACCACTTTCATNCNTGGGATTGTGAACTGAAACTGTTCACATGAACTTGGAATTCCCAGTAAGTGTGA\tLcaYcacLaTdd`dacacYBaBTa^^TL^M`]`^aa`Tca`LaLTUa]a_bcLcTMMMMa^a^`bT`ccT_UbM_B\t0\t19:G>N,21:T>N\n")
-        handle.write("HWI-EAS179_0053:2:1:1365:7879#0/1\t-\tchrXHet\t191803\tCCCCTTGTACACACCGCCCGTCGCTACTACCGATTGAATTATGTAGTGAGGTCTCCGGACGTGATCACTGTGACGC\tBBBBBBBBB`O`DS]]aYabaaa[ULYLY]^b`^a^aZZZ_LLLca_a_b^^aYdbd``d^ccaY`_caccc^acc\t0\t33:T>G,72:T>C\n")
-        handle.write("HWI-EAS179_0053:2:1:1371:11420#0/2\t+\tchr3L\t16569206\tTATGAGCGCCAATTTTGCANTTTTATTTTTGTACAAGCCAAGGGTTTTGCAACATTCACAGCGCTTGCCACTTGTC\tcY^bcYLcaL]`]]`aaTaBaab^_ZZ__R[`[cYccc^Ybb^_L`L`Y`aM_a_TcTcc`LL]]MYaYabbTY`^\t0\t19:G>N\n")
-        handle.write("HWI-EAS179_0053:2:1:1371:11420#0/1\t-\tchr3L\t16569298\tAATGAACCATTGTAATTACCCACAACACATACAGTCACACACGAGATGCACACAAGTCGGAAACGGAAGCGAGACG\tBBBBBBBBBBBBBBBBBBBBBB^T`]Y^`KZY__LY_a]^T^ccYaYY__YT]VZbL]`b^cLT^a^caccYbT^b\t0\n")
-        handle.close()
-
-        parser = BowtieParser("tmpFile.bowtie", 0)
-        for cpt, mapping in enumerate(parser.getIterator()):
-            transcript = mapping.getTranscript()
-            if cpt == 0:
-                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1365:7879#0/2")
-                self.assertEquals(transcript.getChromosome(), "chrXHet")
-                self.assertEquals(transcript.getDirection(), 1)
-                self.assertEquals(transcript.getStart(), 191699)
-                self.assertEquals(transcript.getEnd(), 191774)
-                self.assertEquals(transcript.getTagValue("nbMismatches"), 2)
-            elif cpt == 1:
-                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1365:7879#0/1")
-                self.assertEquals(transcript.getChromosome(), "chrXHet")
-                self.assertEquals(transcript.getDirection(), -1)
-                self.assertEquals(transcript.getStart(), 191804)
-                self.assertEquals(transcript.getEnd(), 191879)
-                self.assertEquals(transcript.getTagValue("nbMismatches"), 2)
-            elif cpt == 2:
-                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1371:11420#0/2")
-                self.assertEquals(transcript.getChromosome(), "chr3L")
-                self.assertEquals(transcript.getDirection(), 1)
-                self.assertEquals(transcript.getStart(), 16569207)
-                self.assertEquals(transcript.getEnd(), 16569282)
-                self.assertEquals(transcript.getTagValue("nbMismatches"), 1)
-            elif cpt == 3:
-                self.assertEquals(transcript.getName(), "HWI-EAS179_0053:2:1:1371:11420#0/1")
-                self.assertEquals(transcript.getChromosome(), "chr3L")
-                self.assertEquals(transcript.getDirection(), -1)
-                self.assertEquals(transcript.getStart(), 16569299)
-                self.assertEquals(transcript.getEnd(), 16569374)
-                self.assertEquals(transcript.getTagValue("nbMismatches"), 0)
-            else:
-                self.fail()
-
-        os.remove(fileName)
-        
-        
-
-if __name__ == "__main__":
-    unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_CoordsParser.py
--- a/commons/core/parsing/test/Test_CoordsParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,105 +0,0 @@
-import unittest
-from commons.core.parsing.CoordsParser import CoordsParser
-from SMART.Java.Python.mappingToCoordinates import MappingToCoordinates
-
-
-class Test_CoordsParser(unittest.TestCase):
-    
-
-    def test_Parser(self):
-        parser = CoordsParser("data/testCoordsParser.coords")
-        
-        cpt = 0
-        for mapping in parser.getIterator():
-            transcript = mapping.getTranscript()
-            cpt += 1
-            if cpt == 1:
-                self.assertEqual(transcript.getChromosome(), "scaffold_1")
-                self.assertEqual(transcript.getName(), "gi|240254421:1-30427671")
-                self.assertEqual(transcript.getStart(), 1)
-                self.assertEqual(transcript.getEnd(), 6251)
-                self.assertEqual(transcript.getDirection(), -1)
-                self.assertEqual(transcript.getNbExons(), 1)
-                self.assertEqual(transcript.getTagValue("identity"), 89.030000000000001)
-                exons = transcript.getExons()
-                self.assertEqual(exons[0].getChromosome(), "scaffold_1")
-                self.assertEqual(exons[0].getStart(), 1)
-                self.assertEqual(exons[0].getEnd(), 6251)
-                self.assertEqual(exons[0].getDirection(), -1)
-                self.assertEqual(transcript.getSize(), 6251)
-            elif cpt == 2:
-                self.assertEqual(transcript.getChromosome(), "scaffold_1")
-                self.assertEqual(transcript.getName(), "gi|240254421:1-30427671")
-                self.assertEqual(transcript.getStart(), 9127)
-                self.assertEqual(transcript.getEnd(), 11947)
-                self.assertEqual(transcript.getDirection(), -1)
-                self.assertEqual(transcript.getNbExons(), 1)
-                self.assertEqual(transcript.getTagValue("identity"), 90.450000000000003)
-                exons = transcript.getExons()
-                self.assertEqual(exons[0].getChromosome(), "scaffold_1")
-                self.assertEqual(exons[0].getStart(), 9127)
-                self.assertEqual(exons[0].getEnd(), 11947)
-                self.assertEqual(exons[0].getDirection(), -1)
-                self.assertEqual(transcript.getSize(), 2821)
-            if cpt == 3:
-                self.assertEqual(transcript.getChromosome(), "scaffold_1")
-                self.assertEqual(transcript.getName(), "gi|240254421:1-30427671")
-                self.assertEqual(transcript.getStart(), 12201)
-                self.assertEqual(transcript.getEnd(), 12953)
-                self.assertEqual(transcript.getDirection(), -1)
-                self.assertEqual(transcript.getNbExons(), 1)
-                exons = transcript.getExons()
-                self.assertEqual(exons[0].getChromosome(), "scaffold_1")
-                self.assertEqual(exons[0].getStart(), 12201)
-                self.assertEqual(exons[0].getEnd(), 12953)
-                self.assertEqual(exons[0].getDirection(), -1)
-                self.assertEqual(transcript.getSize(), 753)
-            
-    def test_Parser_showcoord(self):
-        parser = CoordsParser("data/testCoordsParser_showcoord.coords")
-        expTranscriptCount = 1
-        obsTranscriptCount = 0
-        
-        for mapping in parser.getIterator():
-            transcript = mapping.getTranscript()
-            obsTranscriptCount += 1
-            self.assertEqual(transcript.getChromosome(), "mivi_sl_A1_scaffold00001")
-            self.assertEqual(transcript.getName(), "mivi_sl_A2_scaffold00003")
-            self.assertEqual(transcript.getStart(), 296)
-            self.assertEqual(transcript.getEnd(), 2292)
-            self.assertEqual(transcript.getDirection(), 1)
-            self.assertEqual(transcript.getTagValue("identity"), 98.30)
-            self.assertEqual(transcript.getTagValue("target_pident"), 98.30)                
-            self.assertEqual(transcript.getTagValue("target_pcover"), 3.32)
-            self.assertEqual(transcript.getTagValue("target_length"), 60273)                
-            self.assertEqual(transcript.getTagValue("Target"), "mivi_sl_A2_scaffold00003 1 2001")
-            self.assertEqual(transcript.getSize(), 1997)
-                
-        self.assertEquals(expTranscriptCount, obsTranscriptCount)
-                            
-    def test_Parser_showcoord_promer(self):
-        parser = CoordsParser("data/testCoordsParser_showcoord_promer.coords")
-        expTranscriptCount = 1
-        obsTranscriptCount = 0
-        
-        for mapping in parser.getIterator():
-            transcript = mapping.getTranscript()
-            obsTranscriptCount += 1
-            self.assertEqual(transcript.getChromosome(), "mivi_sl_A1_scaffold00001")
-            self.assertEqual(transcript.getName(), "mivi_sl_A2_scaffold00003")
-            self.assertEqual(transcript.getStart(), 291)
-            self.assertEqual(transcript.getEnd(), 1229)
-            self.assertEqual(transcript.getDirection(), -1)
-            self.assertEqual(transcript.getTagValue("identity"), 94.25)
-            self.assertEqual(transcript.getTagValue("target_pident"), 94.25)                
-            self.assertEqual(transcript.getTagValue("target_pcover"), 1.56)
-            self.assertEqual(transcript.getTagValue("target_length"), 60273)                
-            self.assertEqual(transcript.getTagValue("Target"), "mivi_sl_A2_scaffold00003 939 1")
-            self.assertEqual(transcript.getSize(), 939)
-                
-        self.assertEquals(expTranscriptCount, obsTranscriptCount)
-        

-if __name__ == '__main__':
-        unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_CrossSsrAndBesMappedByBlatToGff.py
--- a/commons/core/parsing/test/Test_CrossSsrAndBesMappedByBlatToGff.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,70 +0,0 @@
-from commons.core.parsing.CrossSsrAndBesMappedByBlatToGff import CrossSsrAndBesMappedByBlatToGff
-from commons.core.parsing.SsrParser import SsrParser
-
-import unittest
-import os
-
-
-class Test_CrossSsrAndBesMappedByBlatToGff(unittest.TestCase):
-
-
-    def test_createDictOfSsrParser(self):
-        obsDictSsrParser = {}
-        
-        ssrFileName = 'input_SSR_Resuts.tab'
-        SSRFile = open(ssrFileName, 'w')
-        SSRFile.write('BES_name\tBES_redundancy\tSSR_di/tri/tetranucleotide\tSSR_Motif\tSSR_Motif_number\tSSR_start\tSSR_end\tBES_size\n')
-        SSRFile.write('MRRE1H001A12RM1\t1\t4\tttta\t6\t272\t295\t724\n')
-        SSRFile.write('MRRE1H001B01RM1\t1\t3\taat\t8\t264\t287\t683\n')
-        SSRFile.write('MRRE1H001B07RM1\t1\t2\tta\t19\t153\t190\t734\n')
-        SSRFile.write('MRRE1H001B07RM1\t2\t2\taata\t25\t83\t90\t734\n')
-        SSRFile.close()
-        
-        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
-        iCrossSsrAndBesMappedByBlatToGff._inputFileSSR = ssrFileName
-        obsDictSsrParser = iCrossSsrAndBesMappedByBlatToGff.createDictOfSsrParser(obsDictSsrParser)
-        
-        SsrParser1 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
-        SsrParser2 = SsrParser('MRRE1H001B01RM1', '1', '3', 'aat', '8', '264', '287', '683')
-        SsrParser3 = SsrParser('MRRE1H001B07RM1', '1', '2', 'ta', '19', '153', '190', '734')
-        SsrParser4 = SsrParser('MRRE1H001B07RM1', '2', '2', 'aata', '25', '83', '90', '734')
-        
-        expDictSsrParser = {
-                         'MRRE1H001A12RM1': [SsrParser1], 
-                         'MRRE1H001B01RM1': [SsrParser2],
-                         'MRRE1H001B07RM1': [SsrParser3, SsrParser4]
-                        }
-        
-        self.assertEquals(expDictSsrParser, obsDictSsrParser)
-        os.remove(ssrFileName)
-        
-    def test_convertSSRPositionsToBlatPositions_strand_FW(self):
-        ssrPos = 75
-        blatPosStart = 10501475
-        blatPosEnd = 10501985
-        strand = '+'
-        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
-        obsNewPos = iCrossSsrAndBesMappedByBlatToGff.convertSSRPositionsToChromPositions(ssrPos, blatPosStart, blatPosEnd, strand)
-        expNewPos = 10501549
-        self.assertEquals(expNewPos, obsNewPos)
-        
-    def test_convertSSRPositionsToBlatPositions_strand_RV(self):
-        ssrPos = 75
-        blatPosStart = 10501475
-        blatPosEnd = 10501985
-        strand = '-'
-        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
-        obsNewPos = iCrossSsrAndBesMappedByBlatToGff.convertSSRPositionsToChromPositions(ssrPos, blatPosStart, blatPosEnd, strand)
-        expNewPos = 10501911
-        self.assertEquals(expNewPos, obsNewPos)
-        
-    def test_getSsrMotif(self):
-        ssrMotif = 'atg'
-        ssrNbMotif = 4
-        iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
-        obsSsrSeq = iCrossSsrAndBesMappedByBlatToGff.getSsrSeq(ssrMotif, ssrNbMotif)
-        expSsrSeq = 'atgatgatgatg'
-        self.assertEquals(expSsrSeq, obsSsrSeq)
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_F_BlatToGff.py
--- a/commons/core/parsing/test/Test_F_BlatToGff.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,77 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-
-
-class Test_F_BlatToGff(unittest.TestCase):
-
-
-    def test_run(self):
-        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
-        self._writeBlatInputFile(blatInputFileName)
-        
-        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFile.tab' % os.environ['REPET_PATH']
-        cmd = 'python %s/commons/core/parsing/BlatToGff.py -i %s -o %s' % (os.environ['REPET_PATH'], blatInputFileName, obsOutputFileName)
-        os.system(cmd)
-        
-        expOutputFileName = '%s/commons/core/parsing/test/expOutputFile.tab' % os.environ['REPET_PATH']
-        self._writeExpOutputFile(expOutputFileName)
-        
-        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
-        os.remove(blatInputFileName)
-        os.remove(obsOutputFileName)
-        os.remove(expOutputFileName)
-
-    def test_run_with_methodName(self):
-        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']
-        self._writeBlatInputFile(blatInputFileName)
-        
-        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFile.tab' % os.environ['REPET_PATH']
-        cmd = 'python %s/commons/core/parsing/BlatToGff.py -i %s -o %s -n Test_F' % (os.environ['REPET_PATH'], blatInputFileName, obsOutputFileName)
-        os.system(cmd)
-        
-        expOutputFileName = '%s/commons/core/parsing/test/expOutputFile.tab' % os.environ['REPET_PATH']
-        self._writeExpOutputFile_with_methodName(expOutputFileName)
-        
-        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
-        os.remove(blatInputFileName)
-        os.remove(obsOutputFileName)
-        os.remove(expOutputFileName)
-    
-    def _writeBlatInputFile(self, blatInputFileName):
-        file = open(blatInputFileName, 'w')
-        file.write('psLayout version 3\n')
-        file.write('\n')
-        file.write('match    mis-     rep.     N\'s    Q gap    Q gap    T gap    T gap    strand    Q            Q       Q        Q      T            T       T        T      block    blockSizes     qStarts     tStarts\n')
-        file.write('         match    match           count    bases    count    bases              name         size    start    end    name         size    start    end    count\n')
-        file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\n')
-        file.write('315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n')
-        file.write('690\t11\t0\t0\t1\t3\t2\t4\t-\tmachin1\t704\t0\t704\tchr16\t22053297\t21736364\t21737069\t3\t40,647,14,\t0,43,690,\t21736364,21736406,21737055,\n')
-        file.write('554\t26\t0\t0\t1\t16\t1\t17\t-\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
-        file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tmachin2\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
-        file.write('347\t25\t0\t0\t0\t0\t0\t0\t-\tmachin3\t393\t21\t393\tchr18\t29360087\t12067347\t12067719\t1\t372,\t0,\t12067347,\n')
-        file.close()
-        
-    def _writeExpOutputFile(self, expOutputFileName):
-        file = open(expOutputFileName, 'w')
-        file.write('##gff-version 3\n')
-        file.write('chr16\tBlatToGff\tBES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n')
-        file.write('chr16\tBlatToGff\tBES\t21736364\t21737069\t.\t+\t.\tID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n')
-        file.write('chr11\tBlatToGff\tBES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926\n')
-        file.write('chr11\tBlatToGff\tBES\t3794984\t3795627\t.\t+\t.\tID=machin2;Name=machin2;bes_start=3794984;bes_end=3795627;bes_size=19818926\n')
-        file.write('chr18\tBlatToGff\tBES\t12067347\t12067719\t.\t+\t.\tID=machin3;Name=machin3;bes_start=12067347;bes_end=12067719;bes_size=29360087\n')
-        file.close()
-        
-    def _writeExpOutputFile_with_methodName(self, expOutputFileName):
-        file = open(expOutputFileName, 'w')
-        file.write('##gff-version 3\n')
-        file.write('chr16\tBlatToGff\tTest_F:BES\t21686950\t21687294\t.\t+\t.\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\n')
-        file.write('chr16\tBlatToGff\tTest_F:BES\t21736364\t21737069\t.\t+\t.\tID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\n')
-        file.write('chr11\tBlatToGff\tTest_F:BES\t3725876\t3726473\t.\t+\t.\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926\n')
-        file.write('chr11\tBlatToGff\tTest_F:BES\t3794984\t3795627\t.\t+\t.\tID=machin2;Name=machin2;bes_start=3794984;bes_end=3795627;bes_size=19818926\n')
-        file.write('chr18\tBlatToGff\tTest_F:BES\t12067347\t12067719\t.\t+\t.\tID=machin3;Name=machin3;bes_start=12067347;bes_end=12067719;bes_size=29360087\n')
-        file.close()
-
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_F_BlatToGffForBesPaired.py
--- a/commons/core/parsing/test/Test_F_BlatToGffForBesPaired.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b"@@ -1,117 +0,0 @@\n-import unittest\n-import os\n-from commons.core.utils.FileUtils import FileUtils\n-\n-\n-class Test_F_BlatToGffForBesPaired(unittest.TestCase):\n-\n-\n-    def test_run(self):\n-        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']\n-        self._writeBlatInputFileName(blatInputFileName)\n-        fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']\n-        self._writeFastaInputFile(fastaInputFileName)\n-        \n-        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']\n-        cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)\n-        os.system(cmd)\n-        \n-        expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']\n-        self._writeExpOutputFileName(expOutputFileName)\n-        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))\n-        os.remove(blatInputFileName)\n-        os.remove(fastaInputFileName)\n-        os.remove(expOutputFileName)\n-        os.remove(obsOutputFileName)\n-        \n-    def test_run_with_methodName(self):\n-        blatInputFileName = '%s/commons/core/parsing/test/inputFile.tab' % os.environ['REPET_PATH']\n-        self._writeBlatInputFileName(blatInputFileName)\n-        fastaInputFileName = '%s/commons/core/parsing/test/sequences.fasta' % os.environ['REPET_PATH']\n-        self._writeFastaInputFile(fastaInputFileName)\n-        \n-        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFileName.gff' % os.environ['REPET_PATH']\n-        cmd = 'python %s/commons/core/parsing/BlatToGffForBesPaired.py -i %s -f %s -o %s -n TestF' % (os.environ['REPET_PATH'], blatInputFileName, fastaInputFileName, obsOutputFileName)\n-        os.system(cmd)\n-        \n-        expOutputFileName = '%s/commons/core/parsing/test/expOutputFileName.gff' % os.environ['REPET_PATH']\n-        self._writeExpOutputFileName_with_methodName(expOutputFileName)\n-        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))\n-        os.remove(blatInputFileName)\n-        os.remove(fastaInputFileName)\n-        os.remove(expOutputFileName)\n-        os.remove(obsOutputFileName)\n-\n-    def _writeBlatInputFileName(self, blatInputFileName):\n-        file = open(blatInputFileName, 'w')\n-        file.write('psLayout version 3\\n')\n-        file.write('\\n')\n-        file.write('match    mis-     rep.     N\\'s    Q gap    Q gap    T gap    T gap    strand    Q            Q       Q        Q      T            T       T        T      block    blockSizes     qStarts     tStarts\\n')\n-        file.write('         match    match           count    bases    count    bases              name         size    start    end    name         size    start    end    count\\n')\n-        file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n')\n-        file.write('315\\t20\\t0\\t0\\t3\\t10\\t2\\t9\\t+\\tMRRE1H001H13FM1\\t378\\t0\\t345\\tchr16\\t22053297\\t21686950\\t21687294\\t4\\t76,185,7,67,\\t0,77,263,278,\\t21686950,21687026,21687213,21687227,\\n')\n-        file.write('690\\t11\\t0\\t0\\t1\\t3\\t2\\t4\\t-\\tMRRE1H001H13RM1\\t704\\t0\\t704\\tchr16\\t22053297\\t21736364\\t21737069\\t3\\t40,647,14,\\t0,43,690,\\t21736364,21736406,21737055,\\n')\n-        file.write('554\\t26\\t0\\t0\\t1\\t16\\t1\\t17\\t+\\tMACHINFM1\\t606\\t10\\t606\\tchr11\\t19818926\\t3725876\\t3726473\\t2\\t553,27,\\t10,579,\\t3725876,3726446,\\n')\n-        file.write('620\\t23\\t0\\t0\\t0\\t0\\t0\\t0\\t-\\tBIDULERM1\\t643\\t0\\t643\\tchr11\\t19818926\\t3794984\\t3795627\\t1\\t643,\\t0,\\t3794984,\\n')\n-        file.write('554\\t26\\t0\\t0\\t1\\t16\\t1\\t17\\t+\\tMRRE1H032F08FM1\\t606\\t10\\t606\\tchr11\\t19818926\\t3725876\\t3726473\\t2\\t553,27,\\t10,579,\\t3725876,3726446,\\n')\n-        file.write('620\\t"..b'ATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG\\n\')\n-        file.write(\'chr16\\tBlatToGffForBesPaired\\tTestF:BES\\t21736364\\t21737069\\t.\\t+\\t.\\tID=MRRE1H001H13RM1;Name=MRRE1H001H13RM1;bes_start=21736364;bes_end=21737069;bes_size=22053297;muscadine_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\\n\')\n-        file.write(\'chr16\\tBlatToGffForBesPaired\\tTestF:BAC\\t21686950\\t21737069\\t.\\t.\\t.\\tID=MRRE1H001H13;Name=MRRE1H001H13;bac_start=21686950;bac_end=21737069;bac_size=50120;besFM_name=MRRE1H001H13FM1;muscadine_besFM_seq=ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCCTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGCACTGCTAGCTACG;besRM_name=MRRE1H001H13RM1;muscadine_besRM_seq=ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCGACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGTACTGATCGACTGATCGACTGC\\n\')\n-        file.write(\'chr11\\tBlatToGffForBesPaired\\tTestF:BES\\t3725876\\t3726473\\t.\\t+\\t.\\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926;muscadine_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG\\n\')\n-        file.write(\'chr11\\tBlatToGffForBesPaired\\tTestF:BES\\t3794984\\t3795627\\t.\\t+\\t.\\tID=MRRE1H032F08RM1;Name=MRRE1H032F08RM1;bes_start=3794984;bes_end=3795627;bes_size=19818926;muscadine_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\\n\')\n-        file.write(\'chr11\\tBlatToGffForBesPaired\\tTestF:BAC\\t3725876\\t3795627\\t.\\t.\\t.\\tID=MRRE1H032F08;Name=MRRE1H032F08;bac_start=3725876;bac_end=3795627;bac_size=69752;besFM_name=MRRE1H032F08FM1;muscadine_besFM_seq=TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGATATCGATCG;besRM_name=MRRE1H032F08RM1;muscadine_besRM_seq=ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTGTACGTACGTAC\\n\')\n-        file.write(\'chr18\\tBlatToGffForBesPaired\\tTestF:BES\\t12067347\\t12067719\\t.\\t+\\t.\\tID=MRRE1B072N12FM1;Name=MRRE1B072N12FM1;bes_start=12067347;bes_end=12067719;bes_size=29360087;muscadine_seq=ATCGTACGTACGATCGATCGCATGACTACGT\\n\')\n-        file.write(\'chr18\\tBlatToGffForBesPaired\\tTestF:BES\\t11978635\\t11979338\\t.\\t+\\t.\\tID=MRRE1B072N12RM1;Name=MRRE1B072N12RM1;bes_start=11978635;bes_end=11979338;bes_size=29360087;muscadine_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\\n\')\n-        file.write(\'chr18\\tBlatToGffForBesPaired\\tTestF:BAC\\t11978635\\t12067719\\t.\\t.\\t.\\tID=MRRE1B072N12;Name=MRRE1B072N12;bac_start=11978635;bac_end=12067719;bac_size=89085;besFM_name=MRRE1B072N12FM1;muscadine_besFM_seq=ATCGTACGTACGATCGATCGCATGACTACGT;besRM_name=MRRE1B072N12RM1;muscadine_besRM_seq=TACGTACGATCGACTGATGCTAGCTAGCTCC\\n\')\n-        file.close()\n-        \n-    def _writeFastaInputFile(self, fileName):\n-        file = open(fileName, \'w\')\n-        file.write(\'>MRRE1H001H13FM1\\n\')\n-        file.write(\'ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC\\n\')\n-        file.write(\'CTACGTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTAGC\\n\')\n-        file.write(\'ACTGCTAGCTACG\\n\')\n-        file.write(\'>MRRE1H001H13RM1\\n\')\n-        file.write(\'ACTGATCGATCGTACGTACGATCGCTGATCGTACGTACGATCGATCGATCG\\n\')\n-        file.write(\'ACTCGATCGTAGCTAGCTACGTCAGTCAGACTGACTGCTGCGCTGCATCGT\\n\')\n-        file.write(\'ACTGATCGACTGATCGACTGC\\n\')\n-        file.write(\'>MRRE1H032F08FM1\\n\')\n-        file.write(\'TCAGCTATCGATCGTACGTACGTCGATCGTACGTACGTACGATCGATCGAT\\n\')\n-        file.write(\'ATCGATCG\\n\')\n-        file.write(\'>MRRE1H032F08RM1\\n\')\n-        file.write(\'ATCGACTGATCGTCGATCGTACGATCGACTGATCGATCGATCGACTGACTG\\n\')\n-        file.write(\'TACGTACGTAC\\n\')\n-        file.write(\'>MRRE1B072N12FM1\\n\')\n-        file.write(\'ATCGTACGTACGATCGATCGCATGACTACGT\\n\')\n-        file.write(\'>MRRE1B072N12RM1\\n\')\n-        file.write(\'TACGTACGATCGACTGATGCTAGCTAGCTCC\\n\')\n-        file.write(\'>MACHINFM1\\n\')\n-        file.write(\'ATCGTACGCTAGCTAGTCGATCGATCGATCGATCG\\n\')\n-        file.write(\'>BIDULERM1\\n\')\n-        file.write(\'ACTCGATCGACTACGTACGTAGACTG\\n\')\n-        file.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_F_CrossSsrAndBesMappedByBlatToGff.py
--- a/commons/core/parsing/test/Test_F_CrossSsrAndBesMappedByBlatToGff.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,66 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-
-class Test_F_CrossSsrAndBesMappedByBlatToGff(unittest.TestCase):
-
-
-    def test_run(self):
-        ssrInputFileName = '%s/commons/core/parsing/test/ssrInputFile.tab' % os.environ['REPET_PATH']
-        self._writeSsrInputFile(ssrInputFileName)
-        blatInputFileName = '%s/commons/core/parsing/test/blatInputFile.tab' % os.environ['REPET_PATH']
-        self._writeBlatInputFile(blatInputFileName)
-        
-        obsOutputFileName = '%s/commons/core/parsing/test/obsOutputFile.tab' % os.environ['REPET_PATH']
-        cmd = 'python %s/commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py -s %s -b %s -o %s' % (os.environ['REPET_PATH'], ssrInputFileName, blatInputFileName, obsOutputFileName)
-        os.system(cmd)
-        
-        expOutputFileName = '%s/commons/core/parsing/test/expOutputFile.tab' % os.environ['REPET_PATH']
-        self._writeExpOutputFile(expOutputFileName)
-        
-        self.assertTrue(FileUtils.are2FilesIdentical(expOutputFileName, obsOutputFileName))
-        os.remove(ssrInputFileName)
-        os.remove(blatInputFileName)
-        os.remove(obsOutputFileName)
-        os.remove(expOutputFileName)
-    
-    def _writeBlatInputFile(self, blatInputFileName):
-        file = open(blatInputFileName, 'w')
-        file.write('psLayout version 3\n')
-        file.write('\n')
-        file.write('match    mis-     rep.     N\'s    Q gap    Q gap    T gap    T gap    strand    Q            Q       Q        Q      T            T       T        T      block    blockSizes     qStarts     tStarts\n')
-        file.write('         match    match           count    bases    count    bases              name         size    start    end    name         size    start    end    count\n')
-        file.write('---------------------------------------------------------------------------------------------------------------------------------------------------------------\n')
-        file.write('315\t20\t0\t0\t3\t10\t2\t9\t+\tMRRE1H001H13FM1\t378\t0\t345\tchr16\t22053297\t21686950\t21687294\t4\t76,185,7,67,\t0,77,263,278,\t21686950,21687026,21687213,21687227,\n')
-        file.write('690\t11\t0\t0\t1\t3\t2\t4\t-\tmachin1\t704\t0\t704\tchr16\t22053297\t21736364\t21737069\t3\t40,647,14,\t0,43,690,\t21736364,21736406,21737055,\n')
-        file.write('554\t26\t0\t0\t1\t16\t1\t17\t-\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n')
-        file.write('620\t23\t0\t0\t0\t0\t0\t0\t-\tmachin2\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n')
-        file.write('347\t25\t0\t0\t0\t0\t0\t0\t-\tmachin3\t393\t21\t393\tchr18\t29360087\t12067347\t12067719\t1\t372,\t0,\t12067347,\n')
-        file.close()
-
-    def _writeSsrInputFile(self, ssrInputFileName):
-        file = open(ssrInputFileName, 'w')
-        file.write('BES_name    BES_redundancy    SSR_di/tri/tetranucleotide    SSR_Motif    SSR_Motif_number    SSR_start    SSR_end    BES_size\n')
-        file.write('truc1\t1\t4\tttta\t6\t272\t295\t724\n')
-        file.write('truc2\t1\t3\taat\t8\t264\t287\t683\n')
-        file.write('MRRE1H001H13FM1\t1\t2\tta\t19\t153\t190\t378\n')
-        file.write('truc3\t2\t4\taaag\t8\t518\t549\t734\n')
-        file.write('MRRE1H032F08FM1\t1\t4\taaat\t7\t544\t571\t606\n')
-        file.write('MRRE1H032F08FM1\t2\t2\tag\t10\t587\t606\t606\n')
-        file.write('truc4\t1\t2\tat\t16\t519\t550\t672\n')
-        file.write('truc5\t1\t3\ttct\t8\t205\t228\t752\n')
-        file.write('truc6\t1\t2\tat\t33\t287\t352\t569\n')
-        file.close()
-        
-    def _writeExpOutputFile(self, expOutputFileName):
-        file = open(expOutputFileName, 'w')
-        file.write('##gff-version 3\n')
-        file.write('chr16\tCrossSsrAndBesAlignedByBlat\tSSR\t21687102\t21687139\t.\t+\t.\tID=SSR_MRRE1H001H13FM1_1;Name=SSR_MRRE1H001H13FM1_1;bes_name=MRRE1H001H13FM1;bes_size=378;bes_matchstart=0;bes_matchend=345;bes_redundancy=1;ssr_type=2;ssr_motif=ta;ssr_motif_number=19;ssr_start=153;ssr_end=190;muscadine_seq=tatatatatatatatatatatatatatatatatatata\n')
-        file.write('chr11\tCrossSsrAndBesAlignedByBlat\tSSR\t3725930\t3725903\t.\t-\t.\tID=SSR_MRRE1H032F08FM1_1;Name=SSR_MRRE1H032F08FM1_1;bes_name=MRRE1H032F08FM1;bes_size=606;bes_matchstart=10;bes_matchend=606;bes_redundancy=1;ssr_type=4;ssr_motif=aaat;ssr_motif_number=7;ssr_start=544;ssr_end=571;muscadine_seq=aaataaataaataaataaataaataaat\n')
-        file.write('chr11\tCrossSsrAndBesAlignedByBlat\tSSR\t3725887\t3725868\t.\t-\t.\tID=SSR_MRRE1H032F08FM1_2;Name=SSR_MRRE1H032F08FM1_2;bes_name=MRRE1H032F08FM1;bes_size=606;bes_matchstart=10;bes_matchend=606;bes_redundancy=2;ssr_type=2;ssr_motif=ag;ssr_motif_number=10;ssr_start=587;ssr_end=606;muscadine_seq=agagagagagagagagagag\n')
-        file.close()
-
-if __name__ == "__main__":
-    unittest.main()
-    
-        
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_F_VarscanToVCF.py
--- a/commons/core/parsing/test/Test_F_VarscanToVCF.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,173 +0,0 @@\n-import unittest\n-import os\n-from commons.core.parsing.VarscanToVCF import VarscanToVCF\n-from commons.core.utils.FileUtils import FileUtils\n-\n-class Test_F_VarscanToVCF(unittest.TestCase):\n-    \n-    def setUp(self):\n-        self.emptyVarscanFileName = "emptyfile.varscan"\n-        self.varscanFileName = "%s/commons/core/parsing/test/data/sampleForTestVarscanToVCF.varscan" % os.environ["REPET_PATH"]\n-        self.expVCFFileName = "expVCF.vcf"\n-        self.obsVCFFileName = "obsVCF.vcf"\n-        \n-    def tearDown(self):\n-        if os.path.exists(self.emptyVarscanFileName):\n-            os.remove(self.emptyVarscanFileName)\n-        os.remove(self.expVCFFileName)\n-        os.remove(self.obsVCFFileName)\n-        \n-    def test_VarscanToVCF(self):\n-        self._writeExpOutputFile()\n-        iVarscanFile = VarscanToVCF(self.varscanFileName, self.obsVCFFileName, doClean = True)\n-        iVarscanFile.run()\n-        self.assertTrue(FileUtils.are2FilesIdentical(self.expVCFFileName, self.obsVCFFileName))\n-       \n-    def test_VarscanToVCF_empty(self):\n-        self._writeInputFile_empty()\n-        self._writeExpOutputFile_empty()\n-        iVarscanFile = VarscanToVCF(self.emptyVarscanFileName, self.obsVCFFileName, doClean = True)\n-        iVarscanFile.run()\n-        self.assertTrue(FileUtils.are2FilesIdentical(self.expVCFFileName, self.obsVCFFileName))\n-           \n-    def _writeInputFile_empty(self):\n-        with open(self.emptyVarscanFileName, "w") as varscanFileName:\n-            varscanFileName.write("Chrom\\tPosition\\tRef\\tCons\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\tMapQual1\\tMapQual2\\tReads1Plus\\tReads1Minus\\tReads2Plus\\tReads2Minus\\tVarAllele\\n")\n-            \n-    def _writeExpOutputFile_empty(self):\n-        with open(self.expVCFFileName, "w") as vcfFileName:   \n-            vcfFileName.write("##fileformat=VCFv4.1\\n")\n-            vcfFileName.write("#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\n")\n-   \n-    def _writeExpOutputFile(self):\n-        with open(self.expVCFFileName, "w") as vcfFileName:\n-            vcfFileName.write("##fileformat=VCFv4.1\\n")\n-            vcfFileName.write("#CHROM\\tPOS\\tID\\tREF\\tALT\\tQUAL\\tFILTER\\tINFO\\n")\n-            vcfFileName.write("chr1\\t10759\\t.\\tC\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=47\\n")\n-            vcfFileName.write("chr1\\t12438\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=62;ABQ=42\\n")\n-            vcfFileName.write("chr1\\t17432\\t.\\tA\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=55;ABQ=37\\n")\n-            vcfFileName.write("chr1\\t20391\\t.\\tA\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=56;ABQ=37\\n")\n-            vcfFileName.write("chr1\\t21207\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=55;ABQ=41\\n")\n-            vcfFileName.write("chr1\\t26057\\t.\\tT\\tG\\t0.087739243\\t.\\tAF=0.1538;DP=13;RBQ=60;ABQ=37\\n")\n-            vcfFileName.write("chr1\\t36838\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=61;ABQ=36\\n")\n-            vcfFileName.write("chr1\\t37751\\t.\\tA\\tC\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=42\\n")\n-            vcfFileName.write("chr1\\t43500\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t50481\\t.\\tA\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=60;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t106849\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t108726\\t.\\tT\\tA\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=50;ABQ=41\\n")\n-            vcfFileName.write("chr1\\t114204\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=6;RBQ=60;ABQ=39\\n")\n-            vcfFileName.write("chr1\\t115030\\t.\\tA\\tC\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=57;ABQ=41\\n")\n-            vcfFileName.write("chr1\\t116173\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.2222;DP=9;RBQ=58;ABQ=39\\n")\n-            vcfFileName.write("chr1\\t118433\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.1429;DP=7;RBQ=64;ABQ=50\\n")\n-            vcfFileName.write("chr1\\t119042\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.1667;DP=6;RBQ=55;ABQ=51\\n")\n-            vcfFi'..b'me.write("chr1\\t498962\\t.\\tC\\tA\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=41\\n")\n-            vcfFileName.write("chr1\\t510532\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.2000;DP=5;RBQ=64;ABQ=53\\n")\n-            vcfFileName.write("chr1\\t516369\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=34;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t523631\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=64;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t524680\\t.\\tC\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=41\\n")\n-            vcfFileName.write("chr1\\t525898\\t.\\tT\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=62;ABQ=49\\n")\n-            vcfFileName.write("chr1\\t526118\\t.\\tA\\tC\\t0.087739243\\t.\\tAF=0.1667;DP=6;RBQ=61;ABQ=50\\n")\n-            vcfFileName.write("chr1\\t535762\\t.\\tC\\tT\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=42\\n")\n-            vcfFileName.write("chr1\\t543235\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=45;ABQ=41\\n")\n-            vcfFileName.write("chr1\\t550086\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=50;ABQ=41\\n")\n-            vcfFileName.write("chr1\\t550508\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=55;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t551143\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=39\\n")\n-            vcfFileName.write("chr1\\t552924\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.2500;DP=8;RBQ=62;ABQ=38\\n")\n-            vcfFileName.write("chr1\\t553541\\t.\\tA\\tG\\t0.087739243\\t.\\tAF=0.1250;DP=8;RBQ=65;ABQ=52\\n")\n-            vcfFileName.write("chr1\\t560806\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.1667;DP=6;RBQ=65;ABQ=49\\n")\n-            vcfFileName.write("chr1\\t562736\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.1667;DP=6;RBQ=64;ABQ=52\\n")\n-            vcfFileName.write("chr1\\t563224\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=51;ABQ=39\\n")\n-            vcfFileName.write("chr1\\t564217\\t.\\tT\\tA\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=62;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t567288\\t.\\tC\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t569652\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=48;ABQ=42\\n")\n-            vcfFileName.write("chr1\\t570280\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.1250;DP=8;RBQ=60;ABQ=53\\n")\n-            vcfFileName.write("chr1\\t582185\\t.\\tT\\tC\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=63;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t582453\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.5000;DP=2;RBQ=65;ABQ=38\\n")\n-            vcfFileName.write("chr1\\t583477\\t.\\tT\\tG\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=62;ABQ=39\\n")\n-            vcfFileName.write("chr1\\t584179\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.2500;DP=4;RBQ=65;ABQ=41\\n")\n-            vcfFileName.write("chr1\\t589074\\t.\\tG\\tC\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=36\\n")\n-            vcfFileName.write("chr1\\t596641\\t.\\tC\\tG\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=65;ABQ=40\\n")\n-            vcfFileName.write("chr1\\t599263\\t.\\tG\\tT\\t0.087739243\\t.\\tAF=0.3333;DP=3;RBQ=60;ABQ=38\\n")\n-                     \n-##fileDate=20090805\n-##source=myImputationProgramV3.1\n-##reference=1000Gchr1    10759    .    C    T    0.087739243    .    AF=33.33%;DP=3enomesPilot-NCBI36\n-##phasing=partial\n-##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">\n-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">\n-##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">\n-##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">\n-##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">\n-##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">\n-##FILTER=<ID=q10,Description="Quality below 10">\n-##FILTER=<ID=s50,Description="Less than 50% of samples have data">\n-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">\n-##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">\n-            \n-if __name__ == "__main__":\n-    unittest.main()\n-    \n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_FastaParser.py
--- a/commons/core/parsing/test/Test_FastaParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,75 +0,0 @@
-from commons.core.parsing.FastaParser import FastaParser
-from SMART.Java.Python.structure.Sequence import Sequence
-import unittest
-
-class Test_FastaParser(unittest.TestCase):
-
-    def test_getSubsequence(self):
-        fastaFile = "myFastaInput.fasta"
-        self._writeInputFastaFile(fastaFile)
-        parser = FastaParser(fastaFile)
-        chromosome = "1"
-        expSeq = Sequence("1:1-20 (1)", "CCTAAGCCATTGCTTGGTGA")
-        obsSeq = parser.getSubSequence(chromosome, 1, 20, 1)
-        self.assertEquals(expSeq, obsSeq)
-
-    def test_getSubsequence_long_sequence(self):
-        fastaFile = "myFastaInput.fasta"
-        self._writeInputFastaFile(fastaFile)
-        parser = FastaParser(fastaFile)
-        chromosome = "2"
-        expSeq = Sequence("subsequence", "TGAAGA")
-        obsSeq = parser.getSubSequence(chromosome, 55, 60, 1, "subsequence")
-        self.assertEquals(expSeq, obsSeq)
-
-    def test_getSubsequence_long_sequence_inside_and_outside(self):
-        fastaFile = "myFastaInput.fasta"
-        self._writeInputFastaFile(fastaFile)
-        parser = FastaParser(fastaFile)
-        chromosome = "2"
-        expSeq = Sequence("subsequence", "TTA")
-        obsSeq = parser.getSubSequence(chromosome, 137, 151, 1, "subsequence")
-        self.assertEquals(expSeq, obsSeq)
-
-    def test_getSubsequence_long_sequence_last_letter(self):
-        fastaFile = "myFastaInput.fasta"
-        self._writeInputFastaFile(fastaFile)
-        parser = FastaParser(fastaFile)
-        chromosome = "2"
-        expSeq = Sequence("subsequence", "A")
-        obsSeq = parser.getSubSequence(chromosome, 139, 151, 1, "subsequence")
-        self.assertEquals(expSeq, obsSeq)
-
-    def test_getSubsequence_long_sequence_totally_outside(self):
-        fastaFile = "myFastaInput.fasta"
-        self._writeInputFastaFile(fastaFile)
-        parser = FastaParser(fastaFile)
-        chromosome = "2"
-        isSysExit = False
-        try:
-            parser.getSubSequence(chromosome, 140, 151, 1, "subsequence")
-        except:
-            isSysExit = True
-        self.assertTrue(isSysExit)
-        
-    def test_setTags(self):
-        fastaFile = "myFastaInput.fasta"
-        self._writeInputFastaFile(fastaFile)
-        parser = FastaParser(fastaFile)
-        parser.setTags()
-        expTags = {"1" : 0,
-                   "2" : 54}
-        obsTags = parser.getTags()
-        self.assertEquals(expTags, obsTags)
-        
-    def _writeInputFastaFile(self, fastaFile):
-        myHandler = open(fastaFile, 'w')
-        myHandler.write(">1\n")
-        myHandler.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAAT\n")
-        myHandler.write(">2\n")
-        myHandler.write("TATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCG\n")
-        myHandler.write("GACCTGAAGAAATTCCTGATTGTACGTTCTGGTTACTCTTCAATTTGGGC\n")
-        myHandler.write("TGCTTAATTATCTCCTCAATTTCAATTTGGCCATGCTTA\n")
-
-if __name__ == "__main__":
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_FindRep.py
--- a/commons/core/parsing/test/Test_FindRep.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,108 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-from xml.sax import make_parser
-from xml.sax.handler import feature_namespaces
-from commons.core.parsing.FindRep import FindRep
-
-
-class Test_FindRep(unittest.TestCase):
-    def setUp(self):
-        self._mrepsOuputFileName = "output.xml"
-        self._obsSetFileName = "obsOuput.set"
-        self._expSetFileName = "expOuput.set"
-        self._writeExpSet(self._expSetFileName)
-        self._writeMrepsOutput(self._mrepsOuputFileName)

-    def tearDown(self):
-        os.remove(self._expSetFileName)
-        os.remove(self._obsSetFileName)
-        os.remove(self._mrepsOuputFileName)
-    
-    def test_parse(self):
-        xmlParser = make_parser()
-        xmlParser.setFeature( feature_namespaces, 0 )
-        xmlParser.setContentHandler( FindRep( self._obsSetFileName,0,  0 ) )
-        xmlParser.parse( self._mrepsOuputFileName )
-        self.assertTrue(FileUtils.are2FilesIdentical(self._obsSetFileName, self._expSetFileName))  
-    
-    def _writeExpSet(self, fileName):
-        f = open(fileName, "w")
-        f.write("1\t(tatt)3\tseq1\t4\t16\n")
-        f.write("2\t(tatt)3\tseq1\t23\t35\n")
-        f.write("3\t(tatt)3\tseq1\t42\t54\n")
-        f.close()
-        
-    def _writeMrepsOutput(self, fileName):
-        f = open(fileName, "w")
-        f.write("<?xml version='1.0' encoding='UTF-8' ?>\n")
-        f.write("<mreps>\n")
-        f.write("<time>Thu Dec  1 17:25:54 2011\n")
-        f.write("</time>\n")
-        f.write("<parameters>\n")
-        f.write("    <type-of-input>file in fasta format</type-of-input>\n")
-        f.write("    <err>3</err>\n")
-        f.write("    <from>1</from>\n")
-        f.write("    <to>-1</to>\n")
-        f.write("    <win>-1</win>\n")
-        f.write("    <minsize>1</minsize>\n")
-        f.write("    <maxsize>-1</maxsize>\n")
-        f.write("    <minperiod>1</minperiod>\n")
-        f.write("   <maxperiod>-1</maxperiod>\n")
-        f.write("   <minexponent>3.00</minexponent>\n")
-        f.write("</parameters>\n")
-        f.write("<results>\n")
-        f.write("<sequence-name>seq1</sequence-name>\n")
-        f.write("<repetitions>\n")
-        f.write("<window>\n")
-        f.write("<windowstart>1</windowstart>\n")
-        f.write("<windowend>60</windowend>\n")
-        f.write("    <repeat>\n")
-        f.write("        <start>4</start>\n")
-        f.write("        <end>16</end>\n")
-        f.write("        <length>13</length>\n")
-        f.write("       <period>4</period>\n")
-        f.write("       <exponent>3.25</exponent>\n")
-        f.write("        <score>0.000</score>\n")
-        f.write("        <sequence>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>t</unit>\n")
-        f.write("        </sequence>\n")
-        f.write("    </repeat>\n")
-        f.write("    <repeat>\n")
-        f.write("        <start>23</start>\n")
-        f.write("        <end>35</end>\n")
-        f.write("        <length>13</length>\n")
-        f.write("        <period>4</period>\n")
-        f.write("        <exponent>3.25</exponent>\n")
-        f.write("        <score>0.000</score>\n")
-        f.write("        <sequence>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>t</unit>\n")
-        f.write("        </sequence>\n")
-        f.write("    </repeat>\n")
-        f.write("    <repeat>\n")
-        f.write("        <start>42</start>\n")
-        f.write("       <end>54</end>\n")
-        f.write("        <length>13</length>\n")
-        f.write("        <period>4</period>\n")
-        f.write("        <exponent>3.25</exponent>\n")
-        f.write("        <score>0.000</score>\n")
-        f.write("        <sequence>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>t</unit>\n")
-        f.write("        </sequence>\n")
-        f.write("    </repeat>\n")
-        f.write("<nbofreps>3</nbofreps>\n")
-        f.write("</window>\n")
-        f.write("</repetitions>\n")
-        f.write("</results>\n")
-        f.write("<errorcode>0</errorcode>\n")
-        f.write("</mreps>\n")
-        f.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_GffParser.py
--- a/commons/core/parsing/test/Test_GffParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,62 +0,0 @@
-import unittest
-from commons.core.parsing.GffParser import GffParser
-
-
-class Test_GffParser(unittest.TestCase):
-    
-
-    def test_Parser(self):
-        parser = GffParser("data/testGffParser1.gff3")
-
-        self.assertEqual(parser.getNbTranscripts(), 3)
-
-        cpt = 0
-        for transcript in parser.getIterator():
-            cpt += 1
-            if cpt == 1:
-                self.assertEqual(transcript.getChromosome(), "arm_X")
-                self.assertEqual(transcript.getName(), "test1")
-                self.assertEqual(transcript.getStart(), 1000)
-                self.assertEqual(transcript.getEnd(), 2000)
-                self.assertEqual(transcript.getDirection(), 1)
-                self.assertEqual(transcript.getNbExons(), 1)
-                self.assertEqual(transcript.getTagValue("field"), "value1")
-                exons = transcript.getExons()
-                self.assertEqual(exons[0].getChromosome(), "arm_X")
-                self.assertEqual(exons[0].getStart(), 1000)
-                self.assertEqual(exons[0].getEnd(), 2000)
-                self.assertEqual(exons[0].getDirection(), 1)
-                self.assertEqual(transcript.getSize(), 1001)
-            elif cpt == 2:
-                self.assertEqual(transcript.getChromosome(), "arm_X")
-                self.assertEqual(transcript.getName(), "test2")
-                self.assertEqual(transcript.getStart(), 10000)
-                self.assertEqual(transcript.getEnd(), 20000)
-                self.assertEqual(transcript.getDirection(), -1)
-                self.assertEqual(transcript.getNbExons(), 2)
-                self.assertEqual(transcript.getTagValue("field"), "value2")
-                exons = transcript.getExons()
-                self.assertEqual(exons[0].getChromosome(), "arm_X")
-                self.assertEqual(exons[0].getStart(), 10000)
-                self.assertEqual(exons[0].getEnd(), 10100)
-                self.assertEqual(exons[0].getDirection(), -1)
-                self.assertEqual(transcript.getSize(), 9602)
-            if cpt == 3:
-                self.assertEqual(transcript.getChromosome(), "arm_X")
-                self.assertEqual(transcript.getName(), "test1.1")
-                self.assertEqual(transcript.getStart(), 1000)
-                self.assertEqual(transcript.getEnd(), 2000)
-                self.assertEqual(transcript.getDirection(), 1)
-                self.assertEqual(transcript.getNbExons(), 1)
-                self.assertEqual(transcript.getTagValue("ID"), "test1.1-1")
-                exons = transcript.getExons()
-                self.assertEqual(exons[0].getChromosome(), "arm_X")
-                self.assertEqual(exons[0].getStart(), 1000)
-                self.assertEqual(exons[0].getEnd(), 2000)
-                self.assertEqual(exons[0].getDirection(), 1)
-                self.assertEqual(transcript.getSize(), 1001)
-
-
-if __name__ == '__main__':
-        unittest.main()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_MapParser.py
--- a/commons/core/parsing/test/Test_MapParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,53 +0,0 @@
-import unittest
-import os
-from commons.core.parsing.MapParser import MapParser
-
-class Test_MapParser(unittest.TestCase):
-
-    
-    def setUp(self):
-        self.inputMapFileName = "testMapParser.map"
-        self._writeInputMapFile()
-        
-    def tearDown(self):
-        if os.path.exists(self.inputMapFileName):
-            os.remove(self.inputMapFileName)
-    
-    def test_Parser(self):
-        parser = MapParser(self.inputMapFileName)
-        
-        cpt = 0
-        for transcript in parser.getIterator():
-            cpt += 1
-            if cpt == 1:
-                self.assertEqual(transcript.getChromosome(), "dmel_chr4")
-                self.assertEqual(transcript.getName(), "aagatgcgtaacggccatac_17")
-                self.assertEqual(transcript.getStart(), 4380)
-                self.assertEqual(transcript.getEnd(), 4400)
-                self.assertEqual(transcript.getDirection(), 1)
-                self.assertEqual(transcript.getSize(), 21)
-            elif cpt == 10:
-                self.assertEqual(transcript.getChromosome(), "dmel_chr4")
-                self.assertEqual(transcript.getName(), "aacggccatacattggtttg_12")
-                self.assertEqual(transcript.getStart(), 4389)
-                self.assertEqual(transcript.getEnd(), 4409)
-                self.assertEqual(transcript.getDirection(), 1)
-                self.assertEqual(transcript.getSize(), 21)
-                
-                
-    def _writeInputMapFile(self):
-        inputFile = open(self.inputMapFileName,'w')
-        inputFile.write("aagatgcgtaacggccatac_17\tdmel_chr4\t4380\t4400\n")
-        inputFile.write("agatgcgtaacggccataca_16\tdmel_chr4\t4381\t4401\n")
-        inputFile.write("gatgcgtaacggccatacat_16\tdmel_chr4\t4382\t4402\n")
-        inputFile.write("atgcgtaacggccatacatt_15\tdmel_chr4\t4383\t4403\n")
-        inputFile.write("tgcgtaacggccatacattg_15\tdmel_chr4\t4384\t4404\n")
-        inputFile.write("gcgtaacggccatacattgg_15\tdmel_chr4\t4385\t4405\n")
-        inputFile.write("cgtaacggccatacattggt_14\tdmel_chr4\t4386\t4406\n")
-        inputFile.write("gtaacggccatacattggtt_14\tdmel_chr4\t4387\t4407\n")
-        inputFile.write("taacggccatacattggttt_14\tdmel_chr4\t4388\t4408\n")
-        inputFile.write("aacggccatacattggtttg_12\tdmel_chr4\t4389\t4409\n")
-        inputFile.close()
-
-if __name__ == '__main__':
-        unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_MrepsToSet.py
--- a/commons/core/parsing/test/Test_MrepsToSet.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,105 +0,0 @@
-import unittest
-import os
-from commons.core.utils.FileUtils import FileUtils
-from commons.core.parsing.MrepsToSet import MrepsToSet
-
-class Test_MrepsToSet(unittest.TestCase):
-    def setUp(self):
-        self._mrepsInputFileName = "mrepsInput.fa"
-        self._mrepsOuputFileName = "mrepsOutput.xml"
-        self._obsSetFileName = "obsOuput.set"
-        self._expSetFileName = "expOuput.set"
-        
-        self._writeExpSet(self._expSetFileName)
-        self._writeMrepsOutput(self._mrepsOuputFileName)

-    def tearDown(self):
-        os.remove(self._expSetFileName)
-        os.remove(self._obsSetFileName)
-        os.remove(self._mrepsOuputFileName)
-    
-    def test_convert(self):
-        iMrepsToSet = MrepsToSet(self._mrepsInputFileName, self._mrepsOuputFileName, self._obsSetFileName)
-        iMrepsToSet.run()
-        self.assertTrue(FileUtils.are2FilesIdentical(self._obsSetFileName, self._expSetFileName))  
-    
-    def _writeExpSet(self, fileName):
-        f = open(fileName, "w")
-        f.write("1\t(tatt)3\tseq1\t4\t16\n")
-        f.write("2\t(tatt)3\tseq1\t23\t35\n")
-        f.write("3\t(tatt)3\tseq1\t42\t54\n")
-        f.close()
-        
-    def _writeMrepsOutput(self, fileName):
-        f = open(fileName, "w")
-        f.write("<?xml version='1.0' encoding='UTF-8' ?>\n")
-        f.write("<mreps>\n")
-        f.write("<time>Thu Dec  1 17:25:54 2011\n")
-        f.write("</time>\n")
-        f.write("<parameters>\n")
-        f.write("    <type-of-input>file in fasta format</type-of-input>\n")
-        f.write("    <err>3</err>\n")
-        f.write("    <from>1</from>\n")
-        f.write("    <to>-1</to>\n")
-        f.write("    <win>-1</win>\n")
-        f.write("    <minsize>1</minsize>\n")
-        f.write("    <maxsize>-1</maxsize>\n")
-        f.write("    <minperiod>1</minperiod>\n")
-        f.write("   <maxperiod>-1</maxperiod>\n")
-        f.write("   <minexponent>3.00</minexponent>\n")
-        f.write("</parameters>\n")
-        f.write("<results>\n")
-        f.write("<sequence-name>seq1</sequence-name>\n")
-        f.write("<repetitions>\n")
-        f.write("<window>\n")
-        f.write("<windowstart>1</windowstart>\n")
-        f.write("<windowend>60</windowend>\n")
-        f.write("    <repeat>\n")
-        f.write("        <start>4</start>\n")
-        f.write("        <end>16</end>\n")
-        f.write("        <length>13</length>\n")
-        f.write("       <period>4</period>\n")
-        f.write("       <exponent>3.25</exponent>\n")
-        f.write("        <score>0.000</score>\n")
-        f.write("        <sequence>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>t</unit>\n")
-        f.write("        </sequence>\n")
-        f.write("    </repeat>\n")
-        f.write("    <repeat>\n")
-        f.write("        <start>23</start>\n")
-        f.write("        <end>35</end>\n")
-        f.write("        <length>13</length>\n")
-        f.write("        <period>4</period>\n")
-        f.write("        <exponent>3.25</exponent>\n")
-        f.write("        <score>0.000</score>\n")
-        f.write("        <sequence>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>t</unit>\n")
-        f.write("        </sequence>\n")
-        f.write("    </repeat>\n")
-        f.write("    <repeat>\n")
-        f.write("        <start>42</start>\n")
-        f.write("       <end>54</end>\n")
-        f.write("        <length>13</length>\n")
-        f.write("        <period>4</period>\n")
-        f.write("        <exponent>3.25</exponent>\n")
-        f.write("        <score>0.000</score>\n")
-        f.write("        <sequence>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>tatt</unit>\n")
-        f.write("            <unit>t</unit>\n")
-        f.write("        </sequence>\n")
-        f.write("    </repeat>\n")
-        f.write("<nbofreps>3</nbofreps>\n")
-        f.write("</window>\n")
-        f.write("</repetitions>\n")
-        f.write("</results>\n")
-        f.write("<errorcode>0</errorcode>\n")
-        f.write("</mreps>\n")
-        f.close()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_Multifasta2SNPFile.py
--- a/commons/core/parsing/test/Test_Multifasta2SNPFile.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1786 +0,0 @@\n-import os\n-import shutil\n-import unittest\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile\n-from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper\n-from commons.core.seq.Bioseq import Bioseq\n-from commons.core.seq.BioseqDB import BioseqDB\n-from smac_pipe.tests.Utils4Test import Utils4Test\n-\n-\n-class Test_Multifasta2SNPFile(unittest.TestCase):\n-# TODO TEST LOGFILE\n-    def setUp(self):\n-        os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])\n-        self._inFileName = "multifasta_input.fasta"\n-        \n-        self._expSubSNPFileName = "%s/commons/core/parsing/test/expSubSNP.csv" % os.environ["REPET_PATH"]\n-        self._expAlleleFileName = "%s/commons/core/parsing/test/expAllele.csv" % os.environ["REPET_PATH"]\n-        \n-        self._expIndividualFileName = "%s/commons/core/parsing/test/expIndividual.csv" % os.environ["REPET_PATH"]\n-        self._expSequenceFSAFileName = "%s/commons/core/parsing/test/expSequences.fsa" % os.environ["REPET_PATH"]\n-        self._expSequenceCSVFileName = "%s/commons/core/parsing/test/expSequences.csv" % os.environ["REPET_PATH"]\n-        self._expBatchFileName = "%s/commons/core/parsing/test/expBatch.txt" % os.environ["REPET_PATH"]\n-        self._expBatchLineFileName = "%s/commons/core/parsing/test/expBatchLine.csv" % os.environ["REPET_PATH"]\n-        \n-        self._realInputFileName = "data/real_multifasta_input.fasta"\n-        self._realExpSubSNPFileName = "data/realExpSubSNP.csv"\n-        self._realExpSequenceFSAFileName = "data/realExpSequences.fsa"\n-        self._realExpBatchLineFileName = "data/realExpBatchLine.csv"\n-        self._realExpIndividualFileName = "data/realExpIndividual.csv"\n-        \n-        self._inputDirSeveralBatches = "%s/commons/core/parsing/test/severalBatchDir" % os.environ["REPET_PATH"]\n-        \n-        self._obsSubSNPFileName = "SubSNP.csv"\n-        self._obsAlleleFileName = "Allele.csv"\n-        self._obsIndividualFileName = "Individual.csv"\n-        self._obsSequenceFSAFileName = "Sequences.fsa"\n-        self._obsSequenceCSVFileName = "Sequences.csv"\n-        self._obsBatchFileName = "Batch.txt"\n-        self._obsBatchLineFileName = "BatchLine.csv"\n-        \n-        self._fileUtils = FileUtils()\n-\n-    def tearDown(self):\n-        os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])\n-        logFileName = "multifasta2SNP.log"\n-        if self._fileUtils.isRessourceExists(self._inFileName):\n-            os.remove(self._inFileName)\n-        if self._fileUtils.isRessourceExists(self._obsSubSNPFileName):\n-            os.remove(self._obsSubSNPFileName)\n-        if self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered"):\n-            os.remove(self._obsSubSNPFileName + "_filtered")\n-        if self._fileUtils.isRessourceExists(self._obsAlleleFileName):\n-            os.remove(self._obsAlleleFileName)\n-        if self._fileUtils.isRessourceExists(self._obsIndividualFileName):\n-            os.remove(self._obsIndividualFileName)\n-        if self._fileUtils.isRessourceExists(self._obsSequenceFSAFileName):\n-            os.remove(self._obsSequenceFSAFileName)\n-        if self._fileUtils.isRessourceExists(self._obsSequenceCSVFileName):\n-            os.remove(self._obsSequenceCSVFileName)\n-        if self._fileUtils.isRessourceExists(self._obsBatchFileName):\n-            os.remove(self._obsBatchFileName)\n-        if self._fileUtils.isRessourceExists(self._obsBatchLineFileName):\n-            os.remove(self._obsBatchLineFileName)\n-\n-        if self._fileUtils.isRessourceExists(self._expSubSNPFileName):        \n-            os.remove(self._expSubSNPFileName)\n-        if self._fileUtils.isRessourceExists(self._realExpSubSNPFileName + "_filtered"):        \n-            os.remove(self._realExpSubSNPFileName + "_filtered")\n-        if self._fileUtils.isRessourceExists(self._expAlleleFileName):\n-     '..b'   \n-    def _writeInputFileSeveralBatches(self):\n-        if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):\n-            os.mkdir(self._inputDirSeveralBatches)\n-         \n-            inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")\n-            inFileHandle.write(">Sequence_de_Reference1\\n")\n-            inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n-            inFileHandle.write(">Line1\\n")\n-            inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n-            inFileHandle.write(">Line2\\n")\n-            inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n-            inFileHandle.close()\n-            \n-            inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")\n-            inFileHandle2.write(">Sequence_de_Reference2\\n")\n-            inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n-            inFileHandle2.write(">Line1\\n")\n-            inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n-            inFileHandle2.write(">Line2\\n")\n-            inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n-            inFileHandle2.close()\n-            \n-    def _writeInputFileSeveralBatches_different_lines_between_files(self):\n-        if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):\n-            os.mkdir(self._inputDirSeveralBatches)\n-         \n-            inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")\n-            inFileHandle.write(">Sequence_de_Reference1\\n")\n-            inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n-            inFileHandle.write(">Line1\\n")\n-            inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n-            inFileHandle.write(">Line2\\n")\n-            inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n-            inFileHandle.close()\n-            \n-            inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")\n-            inFileHandle2.write(">Sequence_de_Reference2\\n")\n-            inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n-            inFileHandle2.write(">Line3\\n")\n-            inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n-            inFileHandle2.write(">Line4\\n")\n-            inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n-            inFileHandle2.close()\n-        \n-    def _writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files(self): \n-        if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):\n-            os.mkdir(self._inputDirSeveralBatches)\n-         \n-            inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")\n-            inFileHandle.write(">Sequence_de_Reference1\\n")\n-            inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n-            inFileHandle.write(">Line1\\n")\n-            inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n-            inFileHandle.write(">Line2\\n")\n-            inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n-            inFileHandle.close()\n-            \n-            inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")\n-            inFileHandle2.write(">Sequence_de_Reference1\\n")\n-            inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n-            inFileHandle2.write(">Line3\\n")\n-            inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n-            inFileHandle2.write(">Line4\\n")\n-            inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n-            inFileHandle2.close()\n-    \n-        \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_Multifasta2SNPFileWriter.py
--- a/commons/core/parsing/test/Test_Multifasta2SNPFileWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,292 +0,0 @@\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.seq.BioseqDB import BioseqDB\n-from commons.core.seq.Bioseq import Bioseq\n-from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFileWriter\n-from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile\n-from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper\n-from commons.core.LoggerFactory import LoggerFactory\n-import os\n-import logging\n-import unittest\n-\n-class Test_Multifasta2SNPFileWriter(unittest.TestCase):\n-\n-    def setUp(self):\n-        self._obsSubSNPFile = "SubSNP.csv"\n-        self._expSubSNPFile = "ExpSubSNP.csv"\n-        \n-        self._obsAlleleFile = "Allele.csv"\n-        self._expAlleleFile = "ExpAllele.csv"\n-        \n-        self._obsIndividualFile = "Individual.csv"\n-        self._expIndividualFile = "ExpIndividual.csv"\n-        \n-        self._obsSequenceFSAFile = "Sequences.fsa"\n-        self._expSequenceFSAFile = "ExpSequences.fsa"\n-        \n-        self._obsSequenceCSVFile = "Sequences.csv"\n-        self._expSequenceCSVFile = "ExpSequences.csv"\n-        \n-        self._obsBatchFile = "Batch.txt"\n-        self._expBatchFile = "ExpBatch.txt"\n-        \n-        self._obsBatchLineFile = "BatchLine.csv"\n-        self._expBatchLineFile = "ExpBatchLine.csv"\n-        \n-        self._logFileName = "Test_Multifasta2SNPWriter.log"\n-        \n-        self._inputFileName = "multifasta.fsa"\n-        \n-        self._lSNPResult = []\n-        self._dAlleleResult = {}\n-        self._lIndividualResult = []\n-        self._refSeq = Bioseq()\n-        self._seqDb= BioseqDB()\n-        \n-        self._logFile = LoggerFactory.createLogger(self._logFileName, logging.INFO, "%(asctime)s %(levelname)s: %(message)s")\n-        self._lSequenceWrapper = ReferenceBioseqAndLinesBioseqDBWrapper(self._refSeq, self._seqDb,  self._logFile, self._inputFileName)\n-        self._lBatchLineResults = []\n-        \n-        self._Multifasta2SNPFileWriter = Multifasta2SNPFileWriter()\n-        \n-        self._inFileName = "multifasta.txt"\n-        self._taxon = "Arabidopsis thaliana"\n-\n-    def tearDown(self):\n-        if FileUtils.isRessourceExists(self._inFileName):\n-            os.remove(self._inFileName)\n-        if FileUtils.isRessourceExists("multifasta2SNP.log"):\n-            os.remove("multifasta2SNP.log")\n-        if FileUtils.isRessourceExists("Test_Multifasta2SNPWriter.log"):\n-            os.remove("Test_Multifasta2SNPWriter.log")\n-            \n-        if FileUtils.isRessourceExists(self._obsSubSNPFile):\n-            os.remove(self._obsSubSNPFile)\n-        if FileUtils.isRessourceExists(self._expSubSNPFile):\n-            os.remove(self._expSubSNPFile)\n-            \n-        if FileUtils.isRessourceExists(self._obsAlleleFile):\n-            os.remove(self._obsAlleleFile)\n-        if FileUtils.isRessourceExists(self._expAlleleFile):\n-            os.remove(self._expAlleleFile)\n-            \n-        if FileUtils.isRessourceExists(self._obsIndividualFile):\n-            os.remove(self._obsIndividualFile)\n-        if FileUtils.isRessourceExists(self._expIndividualFile):\n-            os.remove(self._expIndividualFile)\n-            \n-        if FileUtils.isRessourceExists(self._obsSequenceFSAFile):\n-            os.remove(self._obsSequenceFSAFile)\n-        if FileUtils.isRessourceExists(self._expSequenceFSAFile):\n-            os.remove(self._expSequenceFSAFile)\n-            \n-        if FileUtils.isRessourceExists(self._obsSequenceCSVFile):\n-            os.remove(self._obsSequenceCSVFile)\n-        if FileUtils.isRessourceExists(self._expSequenceCSVFile):\n-            os.remove(self._expSequenceCSVFile)\n-\n-        if FileUtils.isRessourceExists(self._obsBatchFile):\n-            FileUtils.removeFilesByPattern(self._obsBatchFile)\n-        if FileUtils.isRessourceExists(self._expBatchFile):\n-            FileUtils.removeFilesByPattern(self._expBatchFile)\n-        \n-        if FileUtils.isRessourceExists(self._ob'..b'File))\n-        self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFile))\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFile, self._obsBatchFile))\n-        self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFile))\n-        self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFile, self._obsBatchLineFile))      \n-    \n-    def _writeExpSubSNPFile(self):\n-        expFile = open(self._expSubSNPFile, "w")\n-        expFile.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\\n")\n-        expFile.write("SubSNP1;A;SNP;1;A;T;1;1;1;Sequence;;;1\\n")\n-        expFile.write("SubSNP2;A;SNP;10;T;A;1;1;1;Sequence;;;2\\n")\n-        expFile.write("SubSNP3;A;SNP;20;T;A;1;1;2;Sequence;;;3\\n")\n-        expFile.close()\n-        \n-    def _writeExpAlleleFile(self):\n-        expFile = open(self._expAlleleFile, "w")\n-        expFile.write("AlleleNumber;Value;Motif;NbCopy;Comment\\n")\n-        expFile.write("1;A;;;\\n")\n-        expFile.write("2;C;;;\\n")\n-        expFile.write("3;T;;;\\n")\n-        expFile.close()        \n-        \n-        \n-    def _writeExpIndividualFile(self):\n-        expFile = open(self._expIndividualFile, "w")\n-        expFile.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\\n")\n-        expFile.write("1;Individual1;;;;;;;;;;Arabidopsis thaliana;;;;;\\n")\n-        expFile.write("2;Individual2;;;;;;;;;;Arabidopsis thaliana;;;;;\\n")\n-        expFile.close()        \n-\n-    def _writeInputFile(self):\n-        inFileHandle = open(self._inFileName, "w")\n-        inFileHandle.write(">Sequence_de_Reference\\n")\n-        inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n-        inFileHandle.write(">Line1\\n")\n-        inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\\n")\n-        inFileHandle.write(">Line2\\n")\n-        inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")\n-        inFileHandle.close()\n-        \n-    def _writeExpSequenceFiles(self):\n-        SequenceFSAFileHandle = open(self._expSequenceFSAFile, "w")\n-        SequenceFSAFileHandle.write(">Sequence_de_Reference\\n")\n-        SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\\n")\n-        SequenceFSAFileHandle.close()\n-        SequenceCSVFileHandle = open(self._expSequenceCSVFile, "w")\n-        SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\\n")\n-        SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\\n")\n-        SequenceCSVFileHandle.close()\n-        \n-    def _writeExpBatchFile(self):\n-        BatchFileHandle = open(self._expBatchFile, "w")\n-        BatchFileHandle.write("BatchNumber: 1\\n")\n-        BatchFileHandle.write("BatchName: batch1\\n")\n-        BatchFileHandle.write("GeneName: gene1\\n")\n-        BatchFileHandle.write("Description: \\n")\n-        BatchFileHandle.write("ContactNumber: \\n")\n-        BatchFileHandle.write("ProtocolNumber: \\n")\n-        BatchFileHandle.write("ThematicNumber: \\n")\n-        BatchFileHandle.write("RefSeqName: Sequence de Reference\\n")\n-        BatchFileHandle.write("AlignmentFileName: \\n")\n-        BatchFileHandle.write("SeqName: \\n")\n-        BatchFileHandle.write("//\\n")\n-        BatchFileHandle.close()\n-        \n-    def _writeExpBatchLineFile(self):\n-        BatchLineFileHandle = open(self._expBatchLineFile, "w")\n-        BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\\n")\n-        BatchLineFileHandle.write("1;;;1;\\n")\n-        BatchLineFileHandle.write("2;;;1;\\n")\n-        BatchLineFileHandle.close()\n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_PalsToAlign.py
--- a/commons/core/parsing/test/Test_PalsToAlign.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,43 +0,0 @@
-import unittest
-from commons.core.utils.FileUtils import FileUtils
-import os
-from commons.core.parsing.PalsToAlign import PalsToAlign
-
-class Test_PalsToAlign(unittest.TestCase):
-    
-    def setUp(self):
-        self._palsFileName = "input.gff"
-        self._expAlignFileName = "file.align"
-        self._obsAlignFileName = "output.align"
-        
-    def tearDown(self):
-        os.remove(self._palsFileName)
-        os.remove(self._expAlignFileName)
-        os.remove(self._obsAlignFileName)
-
-    def testRun(self):
-        self._writePalsFile(self._palsFileName)
-        self._writeExpAlignFile(self._expAlignFileName)
-        
-        iPalsToAlign = PalsToAlign(self._palsFileName,self._obsAlignFileName)
-        iPalsToAlign.run()
-        
-        self.assertTrue(FileUtils.are2FilesIdentical(self._expAlignFileName, self._obsAlignFileName))
-
-
-    def _writePalsFile(self, fileName):
-        f = open(fileName, "w")
-        f.write("chunk01\tpals\thit\t32290\t32583\t252\t+\t.\tTarget chunk02 28975 29268; maxe 0.035\n")
-        f.write("chunk01\tpals\thit\t28975\t29268\t252\t+\t.\tTarget chunk02 32290 32583; maxe 0.035\n") 
-        f.write("chunk01\tpals\thit\t65932\t66032\t68\t+\t.\tTarget chunk02 59293 59395; maxe 0.085\n")
-        f.close()
-        
-    def _writeExpAlignFile(self, fileName):
-        f = open(fileName, "w")
-        f.write("chunk01\t28975\t29268\tchunk02\t32290\t32583\t0.0\t252\t96.5\n") 
-        f.write("chunk01\t32290\t32583\tchunk02\t28975\t29268\t0.0\t252\t96.5\n")
-        f.write("chunk01\t65932\t66032\tchunk02\t59293\t59395\t0.0\t68\t91.5\n")
-        f.close()
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_PathNum2Id.py
--- a/commons/core/parsing/test/Test_PathNum2Id.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,69 +0,0 @@
-import unittest
-import os
-from commons.core.parsing.PathNum2Id import PathNum2Id
-from commons.core.utils.FileUtils import FileUtils
-
-
-class Test_PathNum2Id(unittest.TestCase):
-
-    def setUp(self):
-        self._inputFileName = "dummyInputPathFile.path"
-        self._outputFileName = "dummyOutputPathFile.path"
-        self._expectedFileName = "expectedpathFile.path"
-        self._pathNum2Id = PathNum2Id()
-
-    def tearDown(self):
-        os.remove( self._inputFileName )
-        os.remove( self._outputFileName )
-        os.remove( self._expectedFileName )
-
-    def test_RunWhithoutReturnAtEndOfFile(self):
-        self._createAndFillInputFileWhithoutReturnAtTheEnd()
-        self._createExpectedFile()
-        self._pathNum2Id.setInFileName( self._inputFileName )
-        self._pathNum2Id.setOutFileName( self._outputFileName )
-        self._pathNum2Id.run()
-        fileutils = FileUtils()
-        self.assertTrue(fileutils.are2FilesIdentical(self._outputFileName, self._expectedFileName))
-      
-    def test_RunWhithReturnAtEndOfFile(self):
-        self._createAndFillInputFileWhithReturnAtTheEnd()
-        self._createExpectedFile()
-        self._pathNum2Id.setInFileName( self._inputFileName )
-        self._pathNum2Id.setOutFileName( self._outputFileName )
-        self._pathNum2Id.run()
-        fileutils = FileUtils()
-        self.assertTrue(fileutils.are2FilesIdentical(self._outputFileName, self._expectedFileName))
-        
-    def _createExpectedFile(self):
-        f = open(self._expectedFileName, "w")
-        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
-        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
-        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
-        f.write("4\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
-        f.write("5\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
-        f.write("6\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
-        f.close()
-
-    def _createAndFillInputFileWhithoutReturnAtTheEnd(self):
-        f = open(self._inputFileName, "w")
-        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
-        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
-        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
-        f.write("1\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
-        f.write("2\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
-        f.write("3\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0")
-        f.close()
-        
-    def _createAndFillInputFileWhithReturnAtTheEnd(self):
-        f = open(self._inputFileName, "w")
-        f.write("1\tblumeria_Grouper_590_20:NoCat_1\t91\t108\tDUF234\t5\t22\t1.5\t3.2\t0\n")
-        f.write("2\tblumeria_Grouper_590_20:NoCat_1\t111\t119\tDUF1414\t1\t9\t6.3\t2.9\t0\n")
-        f.write("3\tblumeria_Grouper_590_20:NoCat_3\t30\t37\tCPW_WPC\t1\t9\t7.7\t1.5\t0\n")
-        f.write("1\tblumeria_Grouper_590_20:NoCat_3\t55\t69\tHECT\t341\t355\t9.2e-06\t0.0\t0\n")
-        f.write("2\tblumeria_Grouper_590_20:NoCat_4\t82\t91\tDUF46\t173\t182\t0.11\t6.4\t0\n")
-        f.write("3\tblumeria_Grouper_590_20:NoCat_5\t121\t125\tPOC4\t276\t280\t6.3\t-1.7\t0\n")
-        f.close()
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_PslParser.py
--- a/commons/core/parsing/test/Test_PslParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,165 +0,0 @@\n-from commons.core.parsing.PslParser import PslParser\n-import unittest, os\n-\n-\n-class Test_PslParser(unittest.TestCase):\n-\n-    def test_forward(self):\n-        fileName = "tmpFile.psl"\n-        handle   = open(fileName, "w")\n-        handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\T\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n158\\t0\\t0\\t0\\t0\\t0\\t1\\t158\\t+\\ttest\\t158\\t0\\t158\\tchr1\\t1501\\t237\\t553\\t2\\t79,79,\\t0,79,\\t237,474,\\n")\n-        handle.close()\n-\n-        parser = PslParser(fileName, 0)\n-        self.assertEquals(parser.getNbMappings(), 1)\n-        for mapping in parser.getIterator():\n-            transcript = mapping.getTranscript()\n-            self.assertEquals(transcript.getName(), "test")\n-            self.assertEquals(transcript.getChromosome(), "chr1")\n-            self.assertEquals(transcript.getDirection(), 1)\n-            self.assertEquals(transcript.getStart(), 238)\n-            self.assertEquals(transcript.getEnd(),   553)\n-            self.assertEquals(transcript.getNbExons(), 2)\n-            for i, exon in enumerate(transcript.getExons()):\n-                if i == 0:\n-                    self.assertEquals(exon.getStart(), 238)\n-                    self.assertEquals(exon.getEnd(),   316)\n-                elif i == 1:\n-                    self.assertEquals(exon.getStart(), 475)\n-                    self.assertEquals(exon.getEnd(),   553)\n-        os.remove(fileName)\n-        \n-        \n-    def test_backward(self):\n-        fileName = "tmpFile.psl"\n-        handle   = open(fileName, "w")\n-        handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n158\\t0\\t0\\t0\\t0\\t0\\t1\\t158\\t-\\ttest\\t158\\t0\\t158\\tchr1\\t1501\\t237\\t553\\t2\\t79,79,\\t0,79,\\t237,474,\\n")\n-\n-        handle.close()\n-\n-        parser = PslParser(fileName, 0)\n-        self.assertEquals(parser.getNbMappings(), 1)\n-        for mapping in parser.getIterator():\n-            transcript = mapping.getTranscript()\n-            self.assertEquals(transcript.getName(), "test")\n-            self.assertEquals(transcript.getChromosome(), "chr1")\n-            self.assertEquals(transcript.getDirection(), -1)\n-            self.assertEquals(transcript.getStart(), 238)\n-            self.assertEquals(transcript.getEnd(),   553)\n-            self.assertEquals(transcript.getNbExons(), 2)\n-            for i, exon in enumerate(transcript.getExons()):\n-                if i == 1:\n-                    self.assertEquals(exon.getStart(), 238)\n-                    self.assertEquals(exon.getEnd(),   316)\n-                elif i == 0:\n-                    self.assertEquals(exon.getStart(), 475)\n-                    self.assertEquals(exon.getEnd(),   553)\n-        os.remove(fileName)\n-\n-\n-    def test_query_forward_target_forward(self):\n-        fileName = "tmpFile.psl"\n-        handle   = open(fileName, "w")\n-        handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n241\\t0\\t0\\t0\\t0\\t0\\t1\\t60\\t++\\tseq1\\t255\\t9\\t250\\tref\\t2262\\t59\\t360\\t2'..b'assertEquals(transcript.getChromosome(), "ref")\n-            self.assertEquals(transcript.getDirection(), -1)\n-            self.assertEquals(transcript.getStart(), 60)\n-            self.assertEquals(transcript.getEnd(),   360)\n-            self.assertEquals(transcript.getNbExons(), 2)\n-            for i, exon in enumerate(transcript.getExons()):\n-                if i == 1:\n-                    self.assertEquals(exon.getStart(), 60)\n-                    self.assertEquals(exon.getEnd(),   180)\n-                elif i == 0:\n-                    self.assertEquals(exon.getStart(), 241)\n-                    self.assertEquals(exon.getEnd(),   360)\n-        os.remove(fileName)\n-\n-    def test_query_backward_target_backward(self):\n-        fileName = "tmpFile.psl"\n-        handle   = open(fileName, "w")\n-        handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n241\\t1\\t0\\t0\\t0\\t0\\t1\\t60\\t--\\tseq1\\t255\\t8\\t250\\tref\\t2262\\t58\\t360\\t2\\t120,122,\\t5,125,\\t1902,2082,\\n")\n-        handle.close()\n-\n-        parser = PslParser(fileName, 0)\n-        self.assertEquals(parser.getNbMappings(), 1)\n-        for mapping in parser.getIterator():\n-            transcript = mapping.getTranscript()\n-            self.assertEquals(transcript.getName(), "seq1")\n-            self.assertEquals(transcript.getChromosome(), "ref")\n-            self.assertEquals(transcript.getDirection(), 1)\n-            self.assertEquals(transcript.getStart(), 59)\n-            self.assertEquals(transcript.getEnd(),   360)\n-            self.assertEquals(transcript.getNbExons(), 2)\n-            for i, exon in enumerate(transcript.getExons()):\n-                if i == 0:\n-                    self.assertEquals(exon.getStart(), 59)\n-                    self.assertEquals(exon.getEnd(),   180)\n-                elif i == 1:\n-                    self.assertEquals(exon.getStart(), 241)\n-                    self.assertEquals(exon.getEnd(),   360)\n-        os.remove(fileName)\n-\n-\n-    def test_query_forward_target_backward(self):\n-        fileName = "tmpFile.psl"\n-        handle   = open(fileName, "w")\n-        handle.write("psLayout version 3\\n\\nmatch\\tmis-\\trep.\\tN\'s\\tQ gap\\tQ gap\\tT gap\\tT gap\\tstrand\\tQ\\tQ\\tQ\\tQ\\tT\\tT\\tT\\tT\\tblock\\tblockSizes\\tqStarts\\ttStarts\\nmatch\\tmatch\\tcount\\tbases\\tcount\\tbases\\tname\\tsize\\tstart\\tend\\tname\\tsize\\tstart\\tend\\tcount\\n---------------------------------------------------------------------------------------------------------------------------------------------------------------\\n241\\t1\\t0\\t0\\t0\\t0\\t1\\t60\\t+-\\tseq2\\t255\\t5\\t247\\tref\\t2262\\t58\\t360\\t2\\t120,122,\\t5,125,\\t1902,2082,\\n")\n-        handle.close()\n-\n-        parser = PslParser(fileName, 0)\n-        self.assertEquals(parser.getNbMappings(), 1)\n-        for mapping in parser.getIterator():\n-            transcript = mapping.getTranscript()\n-            self.assertEquals(transcript.getName(), "seq2")\n-            self.assertEquals(transcript.getChromosome(), "ref")\n-            self.assertEquals(transcript.getDirection(), -1)\n-            self.assertEquals(transcript.getStart(), 59)\n-            self.assertEquals(transcript.getEnd(),   360)\n-            self.assertEquals(transcript.getNbExons(), 2)\n-            for i, exon in enumerate(transcript.getExons()):\n-                if i == 1:\n-                    self.assertEquals(exon.getStart(), 59)\n-                    self.assertEquals(exon.getEnd(),   180)\n-                elif i == 0:\n-                    self.assertEquals(exon.getStart(), 241)\n-                    self.assertEquals(exon.getEnd(),   360)\n-        os.remove(fileName)\n-\n-\n-if __name__ == "__main__":\n-    unittest.main()\n-\n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_SsrParser.py
--- a/commons/core/parsing/test/Test_SsrParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,154 +0,0 @@
-from commons.core.parsing.SsrParser import SsrParser
-import unittest
-
-
-class Test_SsrParser(unittest.TestCase):
-
-
-    def test_setAttributesFromString(self):
-        ssrLine = "MRRE1H001B07RM1\t1\t2\tta\t19\t153\t190\t734"
-        
-        iSsrParser = SsrParser()
-        iSsrParser.setAttributesFromString(ssrLine)
-        
-        obsBES_name = iSsrParser.getBesName()
-        obsBES_redundancy = iSsrParser.getBesRedundancy()
-        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
-        obsSSR_Motif = iSsrParser.getSsrMotif()
-        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
-        obsSSR_start = iSsrParser.getSsrStart()
-        obsSSR_end = iSsrParser.getSsrEnd()
-        obsBES_size = iSsrParser.getBesSize()
-        
-        expBES_name = 'MRRE1H001B07RM1'
-        expBES_redundancy = '1'
-        expSSR_nbNucleotides = '2'
-        expSSR_Motif = 'ta'
-        expSSR_Motif_number = '19'
-        expSSR_start = '153'
-        expSSR_end = '190'
-        expBES_size = '734'
-        
-        self.assertEquals(expBES_name, obsBES_name)
-        self.assertEquals(expBES_redundancy, obsBES_redundancy)
-        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
-        self.assertEquals(expSSR_Motif, obsSSR_Motif)
-        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
-        self.assertEquals(expSSR_start, obsSSR_start)
-        self.assertEquals(expSSR_end, obsSSR_end)
-        self.assertEquals(expBES_size, obsBES_size)
-        
-    def test_setAttributesFromString_empty_BESName(self):
-        ssrLine = "\t1\t2\tta\t19\t153\t190\t734"
-        
-        iSsrParser = SsrParser()
-        iSsrParser.setAttributesFromString(ssrLine)
-        
-        obsBES_name = iSsrParser.getBesName()
-        obsBES_redundancy = iSsrParser.getBesRedundancy()
-        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
-        obsSSR_Motif = iSsrParser.getSsrMotif()
-        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
-        obsSSR_start = iSsrParser.getSsrStart()
-        obsSSR_end = iSsrParser.getSsrEnd()
-        obsBES_size = iSsrParser.getBesSize()
-        
-        expBES_name = ''
-        expBES_redundancy = ''
-        expSSR_nbNucleotides = ''
-        expSSR_Motif = ''
-        expSSR_Motif_number = ''
-        expSSR_start = ''
-        expSSR_end = ''
-        expBES_size = ''
-        
-        self.assertEquals(expBES_name, obsBES_name)
-        self.assertEquals(expBES_redundancy, obsBES_redundancy)
-        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
-        self.assertEquals(expSSR_Motif, obsSSR_Motif)
-        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
-        self.assertEquals(expSSR_start, obsSSR_start)
-        self.assertEquals(expSSR_end, obsSSR_end)
-        self.assertEquals(expBES_size, obsBES_size)
-
-    def test_setAttributesFromString_less_than_8_fields(self):
-        ssrLine = "1\t2\tta\t19\t153\t190\t734"
-        
-        iSsrParser = SsrParser()
-        iSsrParser.setAttributesFromString(ssrLine)
-        
-        obsBES_name = iSsrParser.getBesName()
-        obsBES_redundancy = iSsrParser.getBesRedundancy()
-        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
-        obsSSR_Motif = iSsrParser.getSsrMotif()
-        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
-        obsSSR_start = iSsrParser.getSsrStart()
-        obsSSR_end = iSsrParser.getSsrEnd()
-        obsBES_size = iSsrParser.getBesSize()
-        
-        expBES_name = ''
-        expBES_redundancy = ''
-        expSSR_nbNucleotides = ''
-        expSSR_Motif = ''
-        expSSR_Motif_number = ''
-        expSSR_start = ''
-        expSSR_end = ''
-        expBES_size = ''
-        
-        self.assertEquals(expBES_name, obsBES_name)
-        self.assertEquals(expBES_redundancy, obsBES_redundancy)
-        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
-        self.assertEquals(expSSR_Motif, obsSSR_Motif)
-        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
-        self.assertEquals(expSSR_start, obsSSR_start)
-        self.assertEquals(expSSR_end, obsSSR_end)
-        self.assertEquals(expBES_size, obsBES_size)
-        
-    def test_setAttributes(self):
-        lResults = ['MRRE1H001B07RM1','1','2','ta','19','153','190','734']
-        lineNumber = 1
-        
-        iSsrParser = SsrParser()
-        iSsrParser.setAttributes(lResults, lineNumber)
-        
-        obsBES_name = iSsrParser.getBesName()
-        obsBES_redundancy = iSsrParser.getBesRedundancy()
-        obsSSR_nbNucleotides = iSsrParser.getSsrNbNucleotides()
-        obsSSR_Motif = iSsrParser.getSsrMotif()
-        obsSSR_Motif_number = iSsrParser.getSsrMotifNumber()
-        obsSSR_start = iSsrParser.getSsrStart()
-        obsSSR_end = iSsrParser.getSsrEnd()
-        obsBES_size = iSsrParser.getBesSize()
-        
-        expBES_name = 'MRRE1H001B07RM1'
-        expBES_redundancy = '1'
-        expSSR_nbNucleotides = '2'
-        expSSR_Motif = 'ta'
-        expSSR_Motif_number = '19'
-        expSSR_start = '153'
-        expSSR_end = '190'
-        expBES_size = '734'
-        
-        self.assertEquals(expBES_name, obsBES_name)
-        self.assertEquals(expBES_redundancy, obsBES_redundancy)
-        self.assertEquals(expSSR_nbNucleotides, obsSSR_nbNucleotides)
-        self.assertEquals(expSSR_Motif, obsSSR_Motif)
-        self.assertEquals(expSSR_Motif_number, obsSSR_Motif_number)
-        self.assertEquals(expSSR_start, obsSSR_start)
-        self.assertEquals(expSSR_end, obsSSR_end)
-        self.assertEquals(expBES_size, obsBES_size)
-        
-    def test_eq_Equals(self):
-        SsrParser1 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
-        SsrParser2 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
-        
-        self.assertTrue(SsrParser1 == SsrParser2)
-        
-    def test_eq_NotEquals(self):
-        SsrParser1 = SsrParser('MRRE1H001A12RM1', '1', '4', 'ttta', '6', '272', '295', '724')
-        SsrParser2 = SsrParser('MRRE1H001A12RM3', '1', '5', 'ttta', '6', '272', '295', '852')
-        
-        self.assertFalse(SsrParser1 == SsrParser2)
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_VarscanFile.py
--- a/commons/core/parsing/test/Test_VarscanFile.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,287 +0,0 @@\n-from commons.core.parsing.VarscanFile import VarscanFile\n-from commons.core.parsing.VarscanHit import VarscanHit\n-import unittest\n-import os\n-from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag\n-from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8\n-from commons.core.parsing.VarscanHit_v2_2_8_WithTag import VarscanHit_v2_2_8_WithTag\n-from commons.core.checker.CheckerException import CheckerException\n-\n-class Test_VarscanFile(unittest.TestCase):\n-\n-    def test_parse_fileWithHeader(self):\n-        varscanFileName = "file.varscan"\n-        self._writeVarscanFile(varscanFileName)\n-        \n-        varscanHit1 = VarscanHit()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'32\')\n-        varscanHit1.setRef(\'C\')\n-        varscanHit1.setVar(\'T\')\n-        \n-        varscanHit2 = VarscanHit()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setVar(\'T\')\n-        expVarscanHitsList = [varscanHit1, varscanHit2]\n-        \n-        iVarscanFile = VarscanFile(varscanFileName)\n-        iVarscanFile.parse()\n-        obsVarscanHitsList = iVarscanFile.getVarscanHitsList()\n-        os.remove(varscanFileName)\n-        \n-        self.assertEquals(expVarscanHitsList, obsVarscanHitsList)   \n-\n-    def test_parse_FileWithoutHeader(self):\n-        varscanFileName = "file.varscan"\n-        self._writeVarscanFileWithoutHeader(varscanFileName)\n-        \n-        varscanHit1 = VarscanHit()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'32\')\n-        varscanHit1.setRef(\'C\')\n-        varscanHit1.setVar(\'T\')\n-        \n-        varscanHit2 = VarscanHit()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setVar(\'T\')\n-        expVarscanHitsList = [varscanHit1, varscanHit2]\n-        \n-        iVarscanFile = VarscanFile(varscanFileName)\n-        iVarscanFile.parse()\n-        obsVarscanHitsList = iVarscanFile.getVarscanHitsList()\n-        obsTypeOfVarscanFile = iVarscanFile.getTypeOfVarscanFile()\n-        expTypeOfVarscanFile = "Varscan_2_2"\n-        \n-        self.assertEquals(expVarscanHitsList, obsVarscanHitsList) \n-        self.assertEquals(expTypeOfVarscanFile, obsTypeOfVarscanFile) \n-        os.remove(varscanFileName)\n-        \n-    def test_parse_VarscanFileWithTag(self):\n-        inputFileName = "%s/commons/core/parsing/test/varscan.tab" % os.environ["REPET_PATH"]\n-        self._writeVarscanFileWithTag(inputFileName)\n-        launcher = VarscanFile(inputFileName)\n-        launcher.parse()\n-        obsListOfVarscanHits = launcher.getListOfVarscanHits() \n-        \n-        varscanHit1 = VarscanHit_WithTag()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'32\')\n-        varscanHit1.setRef(\'C\')\n-        varscanHit1.setVar(\'T\')\n-        varscanHit1.setTag(\'EspeceA\')\n-        \n-        varscanHit2 = VarscanHit_WithTag()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setVar(\'T\')\n-        varscanHit2.setTag(\'EspeceA\')\n-        expVarscanHitsList = [varscanHit1, varscanHit2] \n-        \n-        obsTypeOfVarscanFile = launcher.getTypeOfVarscanFile()\n-        expTypeOfVarscanFile = "Varscan_2_2_WithTag"\n-        \n-        self.assertEquals(expVarscanHitsList, obsListOfVarscanHits) \n-        self.assertEquals(expTypeOfVarscanFile, obsTypeOfVarscanFile) \n-        os.remove(inputFileName)\n-        \n-    def test_parse_VarscanFile_v2_2_8(self):\n-        inputFileName = "%s/commons/core/parsing/test/varscan.tab" % os.environ["REPET_PATH"]\n-        self._writeVarscanFile_v2_2_8(inputFileName)\n-        launcher = VarscanFile(inputFileName)\n-        launcher.parse()\n-        obsListOfVarscanHits = launcher.getListOfVarscanHits('..b'expVarscanHit = VarscanHit_v2_2_8()\n-        expVarscanHit.setChrom(\'C11HBa0064J13_LR285\')\n-        expVarscanHit.setPosition(\'3227\')\n-        expVarscanHit.setRef(\'G\')\n-        expVarscanHit.setVar(\'A\')\n-        expVarscanHit.setCns(\'A\')\n-        self.assertEquals(expVarscanHit, obsVarscanHit)\n-        \n-    def test_createVarscanObjectFromLine_VarscanHit_v2_2_8_WithTag(self):\n-        line = "C11HBa0064J13_LR285\\t3227\\tG\\tA\\t0\\t1\\t100%\\t0\\t1\\t0\\t54\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tA\\tEspeceA\\n"\n-        nbLine = 1\n-        launcher = VarscanFile()\n-        launcher.setTypeOfVarscanFile("Varscan_2_2_8_WithTag")\n-        obsVarscanHit = launcher.createVarscanObjectFromLine(line, nbLine)\n-        expVarscanHit = VarscanHit_v2_2_8_WithTag()\n-        expVarscanHit.setChrom(\'C11HBa0064J13_LR285\')\n-        expVarscanHit.setPosition(\'3227\')\n-        expVarscanHit.setRef(\'G\')\n-        expVarscanHit.setVar(\'A\')\n-        expVarscanHit.setCns(\'A\')\n-        expVarscanHit.setTag(\'EspeceA\')\n-        self.assertEquals(expVarscanHit, obsVarscanHit)\n-    \n-    def _writeVarscanFile(self, varscanFileName):\n-        varscanFile = open(varscanFileName, \'w\')\n-        varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\n")\n-        varscanFile.close()\n-\n-    def _writeVarscanFileWithoutHeader(self, varscanFileName):\n-        varscanFile = open(varscanFileName, \'w\')\n-        varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\n")\n-        varscanFile.close()\n-    \n-    def _writeVarscanFileWithTag(self, varscanFileName):\n-        varscanFile = open(varscanFileName, \'w\')\n-        varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\tEspeceA\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\tEspeceA\\n")\n-        varscanFile.close()\n-    \n-    def _writeVarscanFile_v2_2_8(self, varscanFileName):\n-        varscanFile = open(varscanFileName, \'w\')\n-        varscanFile.write("Chrom\\tPosition\\tRef\\tCons\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\tMapQual1\\tMapQual2\\tReads1Plus\\tReads1Minus\\tReads2Plus\\tReads2Minus\\tVarAllele\\n")\n-        varscanFile.write("C11HBa0064J13_LR285\\t3227\\tG\\tA\\t0\\t1\\t100%\\t0\\t1\\t0\\t54\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tA\\n")\n-        varscanFile.write("C11HBa0064J13_LR285\\t3230\\tG\\tT\\t0\\t1\\t100%\\t0\\t1\\t0\\t54\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tT\\n")\n-        varscanFile.close()\n-    \n-    def _writeOther(self, fileName):\n-        file = open(fileName, \'w\')\n-        file.write(\'##gff-version 3\\n\')\n-        file.write(\'chr16\\tBlatToGff\\tBES\\t21686950\\t21687294\\t.\\t+\\t.\\tID=MRRE1H001H13FM1;Name=MRRE1H001H13FM1;bes_start=21686950;bes_end=21687294;bes_size=22053297\\n\')\n-        file.write(\'chr16\\tBlatToGff\\tBES\\t21736364\\t21737069\\t.\\t+\\t.\\tID=machin1;Name=machin1;bes_start=21736364;bes_end=21737069;bes_size=22053297\\n\')\n-        file.write(\'chr11\\tBlatToGff\\tBES\\t3725876\\t3726473\\t.\\t+\\t.\\tID=MRRE1H032F08FM1;Name=MRRE1H032F08FM1;bes_start=3725876;bes_end=3726473;bes_size=19818926\\n\')\n-        file.write(\'chr11\\tBlatToGff\\tBES\\t3794984\\t3795627\\t.\\t+\\t.\\tID=machin2;Name=machin2;bes_start=3794984;bes_end=3795627;bes_size=19818926\\n\')\n-        file.write(\'chr18\\tBlatToGff\\tBES\\t12067347\\t12067719\\t.\\t+\\t.\\tID=machin3;Name=machin3;bes_start=12067347;bes_end=12067719;bes_size=29360087\\n\')\n-        file.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_VarscanFileForGnpSNP.py
--- a/commons/core/parsing/test/Test_VarscanFileForGnpSNP.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,305 +0,0 @@\n-import unittest\n-import os\n-\n-from commons.core.parsing.VarscanFileForGnpSNP import VarscanFileForGnpSNP\n-from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP\n-\n-class Test_VarscanFileForGnpSNP(unittest.TestCase):\n-\n-    def test__init__(self):\n-        expFastqFileName = "SR.fastq"\n-        expRefFastaFileName = "ref.fasta"\n-        expTaxonName = "Arabidopsis thaliana"\n-        expVarscanFieldSeparator = "\\t"\n-        expVarscanHitsList = []\n-        \n-        iVarscanFileForGnpSNP = VarscanFileForGnpSNP("", expFastqFileName, expRefFastaFileName, expTaxonName)\n-        \n-        obsFastaqFileName = iVarscanFileForGnpSNP.getFastqFileName()\n-        obsRefFastaFileName = iVarscanFileForGnpSNP.getRefFastaFileName()\n-        obsTaxonName = iVarscanFileForGnpSNP.getTaxonName()\n-        obsVarscanFieldSeparator = iVarscanFileForGnpSNP.getVarscanFieldSeparator()\n-        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()\n-        \n-        self.assertEquals(expFastqFileName, obsFastaqFileName)\n-        self.assertEquals(expRefFastaFileName, obsRefFastaFileName)\n-        self.assertEquals(expTaxonName, obsTaxonName)\n-        self.assertEquals(expVarscanFieldSeparator, obsVarscanFieldSeparator)\n-        self.assertEquals(expVarscanHitsList, obsVarscanHitsList)\n-\n-    def test_parse(self):\n-        varscanFileName = "varscan.tab"\n-        self._writeVarscanFile(varscanFileName)\n-        \n-        varscanHit1 = VarscanHitForGnpSNP()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'32\')\n-        varscanHit1.setRef(\'C\')\n-        varscanHit1.setVar(\'T\')\n-        varscanHit1.setReads1(\'1\')\n-        varscanHit1.setReads2(\'2\')\n-        varscanHit1.setVarFreq(\'66,67%\')\n-        varscanHit1.setStrands1(\'1\')\n-        varscanHit1.setStrands2(\'1\')\n-        varscanHit1.setQual1(\'37\')\n-        varscanHit1.setQual2(\'35\')\n-        varscanHit1.setPvalue(\'0.3999999999999999\')\n-        varscanHit1.setGnpSNPRef("C")\n-        varscanHit1.setGnpSNPVar("T")\n-        varscanHit1.setGnpSNPPosition(32)\n-        varscanHit1.setOccurrence(1)\n-        varscanHit1.setPolymType("SNP")\n-        varscanHit1.setPolymLength(1)\n-        \n-        varscanHit2 = VarscanHitForGnpSNP()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setVar(\'T\')\n-        varscanHit2.setReads1(\'1\')\n-        varscanHit2.setReads2(\'2\')\n-        varscanHit2.setVarFreq(\'66,67%\')\n-        varscanHit2.setStrands1(\'1\')\n-        varscanHit2.setStrands2(\'1\')\n-        varscanHit2.setQual1(\'40\')\n-        varscanHit2.setQual2(\'34\')\n-        varscanHit2.setPvalue(\'0.3999999999999999\')\n-        varscanHit2.setGnpSNPRef("A")\n-        varscanHit2.setGnpSNPVar("T")\n-        varscanHit2.setGnpSNPPosition(34)\n-        varscanHit2.setOccurrence(1)\n-        varscanHit2.setPolymType("SNP")\n-        varscanHit2.setPolymLength(1)\n-        expVarscanHitsList = [varscanHit1, varscanHit2]\n-        \n-        iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, \'\', \'\', \'\')\n-        iVarscanFileForGnpSNP.parse()\n-        obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()\n-        os.remove(varscanFileName)\n-        \n-        self.assertEquals(expVarscanHitsList, obsVarscanHitsList)\n-    \n-    def test_parse_with_same_position_and_chr_and_type(self):\n-        varscanFileName = "varscan.tab"\n-        self._writeVarscanFile_2(varscanFileName)\n-        \n-        varscanHit1 = VarscanHitForGnpSNP()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'32\')\n-        varscanHit1.setRef(\'C\')\n-        varscanHit1.setVar(\'T\')\n-        varscanHit1.setReads1(\'1\')\n-        varscanHit1.setReads2(\'2\')\n-        varscanHit1.setVarFreq(\'66,67%\')\n-        varscanHit1.setStrands1(\'1\')\n-        varscanHit1.setStrands2(\'1\')\n-        varscanHit1.setQual1(\'37\')\n-        varscanHit1.setQual2(\'35\')\n-        var'..b'     refFastaFileName = "ref.fasta"\n-        taxonName = "Arabidopsis thaliana"\n-        \n-        varscanHit1 = VarscanHitForGnpSNP()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'34\')\n-        varscanHit1.setRef(\'A\')\n-        varscanHit1.setVar(\'T\')\n-        varscanHit1.setReads1(\'1\')\n-        varscanHit1.setReads2(\'2\')\n-        varscanHit1.setVarFreq(\'66,67%\')\n-        varscanHit1.setStrands1(\'1\')\n-        varscanHit1.setStrands2(\'1\')\n-        varscanHit1.setQual1(\'40\')\n-        varscanHit1.setQual2(\'34\')\n-        varscanHit1.setPvalue(\'0.3999999999999999\')\n-        lVarscanHits1 = [varscanHit1]\n-        \n-        iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)\n-        iVarscanFileForGnpSNP1.setVarscanHitsList(lVarscanHits1)\n-        \n-        varscanHit2 = VarscanHitForGnpSNP()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setVar(\'T\')\n-        varscanHit2.setReads1(\'1\')\n-        varscanHit2.setReads2(\'2\')\n-        varscanHit2.setVarFreq(\'66,67%\')\n-        varscanHit2.setStrands1(\'1\')\n-        varscanHit2.setStrands2(\'1\')\n-        varscanHit2.setQual1(\'40\')\n-        varscanHit2.setQual2(\'34\')\n-        varscanHit2.setPvalue(\'0.3999999999999999\')\n-        lVarscanHits2 = [varscanHit2]\n-\n-        iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)\n-        iVarscanFileForGnpSNP2.setVarscanHitsList(lVarscanHits2)\n-\n-        self.assertTrue(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)\n-    \n-    def _writeVarscanFile(self, varscanFileName):\n-        varscanFile = open(varscanFileName, \'w\')\n-        varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t34\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t40\\t34\\t0.3999999999999999\\n")\n-        varscanFile.close()\n-    \n-    def _writeVarscanFile_2(self, varscanFileName):\n-        varscanFile = open(varscanFileName, \'w\')\n-        varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t32\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.close()\n-        \n-    def _writeVarscanFile_3(self, varscanFileName):\n-        varscanFile = open(varscanFileName, \'w\')\n-        varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("C02HBa0291P19_LR48\\t32\\tC\\t+A\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.close()\n-\n-    def _writeVarscanFile_4(self, varscanFileName):\n-        varscanFile = open(varscanFileName, \'w\')\n-        varscanFile.write("Chrom\\tPosition\\tRef\\tVar\\tReads1\\tReads2\\tVarFreq\\tStrands1\\tStrands2\\tQual1\\tQual2\\tPvalue\\n")\n-        varscanFile.write("seqname\\t2\\tA\\tT\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("seqname\\t4\\tC\\tG\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("seqname\\t4\\tC\\tA\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("seqname\\t8\\tT\\tA\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("chrom\\t4\\tC\\tG\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.write("chrom\\t4\\tC\\tA\\t1\\t2\\t66,67%\\t1\\t1\\t37\\t35\\t0.3999999999999999\\n")\n-        varscanFile.close()\n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_VarscanHit.py
--- a/commons/core/parsing/test/Test_VarscanHit.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,165 +0,0 @@
-import unittest
-from commons.core.parsing.VarscanHit import VarscanHit
-from commons.core.checker.CheckerException import CheckerException
-
-class Test_VarscanHit(unittest.TestCase):
-
-    def test_setAttributesFromString(self):
-        line = "C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n"
-        
-        expChrom = "C02HBa0291P19_LR48"
-        expPosition = "32"
-        expRef = "C"
-        expVar = "T"
-        
-        varscanHit = VarscanHit()
-        varscanHit.setAttributesFromString(line)
-        
-        obsChrom = varscanHit.getChrom()
-        obsPosition = varscanHit.getPosition()
-        obsRef = varscanHit.getRef()
-        obsVar = varscanHit.getVar()
-        
-        self.assertEquals(expChrom, obsChrom)
-        self.assertEquals(expPosition, obsPosition)
-        self.assertEquals(expRef, obsRef)
-        self.assertEquals(expVar, obsVar)
-        
-    def test_setAttributesFromString_empty_chrom(self):
-        line = "\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n"
-        iVarscanHit = VarscanHit()
-        try :
-            iVarscanHit.setAttributesFromString(line)
-        except CheckerException, e:
-            checkerExceptionInstance = e
-        
-        expMessage = "The field Chrom is empty in varscan file in line "
-        obsMessage = checkerExceptionInstance.msg
-           
-        self.assertEquals(expMessage, obsMessage)
-
-    def  test_setAttributesFromString_less_than_12_fields(self):
-        line = "C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\n"
-        iVarscanHit = VarscanHit()
-        iVarscanHit.setAttributesFromString(line)
-        self.assertEquals("", iVarscanHit.getQualVar())
-        self.assertEquals("", iVarscanHit.getPValue())
-        
-    def test_setAttributes(self):
-        lResults = ["C02HBa0291P19_LR48", "32", "C", "T", "1", "2", "66,67%", "1", "1", "37", "35", "0.3999999999999999"]
-        lineNumber = 1
-        
-        expChrom = "C02HBa0291P19_LR48"
-        expPosition = "32"
-        expRef = "C"
-        expVar = "T"
-        
-        varscanHit = VarscanHit()
-        varscanHit.setAttributes(lResults, lineNumber)
-        
-        obsChrom = varscanHit.getChrom()
-        obsPosition = varscanHit.getPosition()
-        obsRef = varscanHit.getRef()
-        obsVar = varscanHit.getVar()
-        
-        self.assertEquals(expChrom, obsChrom)
-        self.assertEquals(expPosition, obsPosition)
-        self.assertEquals(expRef, obsRef)
-        self.assertEquals(expVar, obsVar)
-
-    def test_setAttributes_empty_chrom(self):
-        lResults = ["", "", "", "", "", "", "10", "", "", "", "", ""]
-        lineNumber = 1
-        
-        varscanHit = VarscanHit()
-        checkerExceptionInstance = None
-        try:
-            varscanHit.setAttributes(lResults, lineNumber)
-        except CheckerException, e:
-            checkerExceptionInstance = e
-        
-        expMessage = "The field Chrom is empty in varscan file in line 1"
-        obsMessage = checkerExceptionInstance.msg
-           
-        self.assertEquals(expMessage, obsMessage)
-
-    def test_setAttributes_empty_position(self):
-        lResults = ["chrom", "", "", "", "", "", "10", "", "", "", "", ""]
-        lineNumber = 5
-        
-        varscanHit = VarscanHit()
-        checkerExceptionInstance = None
-        try:
-            varscanHit.setAttributes(lResults, lineNumber)
-        except CheckerException, e:
-            checkerExceptionInstance = e
-        
-        expMessage = "The field Position is empty in varscan file in line 5"
-        obsMessage = checkerExceptionInstance.msg
-           
-        self.assertEquals(expMessage, obsMessage)
-
-    def test_setAttributes_empty_ref(self):
-        lResults = ["chrom", "position", "", "", "", "", "10", "", "", "", "", ""]
-        lineNumber = 5
-        
-        varscanHit = VarscanHit()
-        checkerExceptionInstance = None
-        try:
-            varscanHit.setAttributes(lResults, lineNumber)
-        except CheckerException, e:
-            checkerExceptionInstance = e
-        
-        expMessage = "The field Ref is empty in varscan file in line 5"
-        obsMessage = checkerExceptionInstance.msg
-           
-        self.assertEquals(expMessage, obsMessage)
-
-    def test_setAttributes_empty_var(self):
-        lResults = ["chrom", "position", "ref", "", "", "", "10", "", "", "", "", ""]
-        lineNumber = 5
-        
-        varscanHit = VarscanHit()
-        checkerExceptionInstance = None
-        try:
-            varscanHit.setAttributes(lResults, lineNumber)
-        except CheckerException, e:
-            checkerExceptionInstance = e
-        
-        expMessage = "The field Var is empty in varscan file in line 5"
-        obsMessage = checkerExceptionInstance.msg
-           
-        self.assertEquals(expMessage, obsMessage)
-
-    def test__eq__notEquals(self):
-        varscanHit1 = VarscanHit()
-        varscanHit1.setChrom('C02HBa0291P19_LR48')
-        varscanHit1.setPosition('32')
-        varscanHit1.setRef('C')
-        varscanHit1.setVar('T')
-        
-        varscanHit2 = VarscanHit()
-        varscanHit2.setChrom('C02HBa0291P19_LR48')
-        varscanHit2.setPosition('34')
-        varscanHit2.setRef('A')
-        varscanHit2.setVar('T')
-        
-        self.assertFalse(varscanHit1 == varscanHit2)
-
-    def test__eq__Equals(self):
-        varscanHit1 = VarscanHit()
-        varscanHit1.setChrom('C02HBa0291P19_LR48')
-        varscanHit1.setPosition('32')
-        varscanHit1.setRef('C')
-        varscanHit1.setVar('T')
-        
-        varscanHit2 = VarscanHit()
-        varscanHit2.setChrom('C02HBa0291P19_LR48')
-        varscanHit2.setPosition('32')
-        varscanHit2.setRef('C')
-        varscanHit2.setVar('T')
-        
-        self.assertTrue(varscanHit1 == varscanHit2)        
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_VarscanHitForGnpSNP.py
--- a/commons/core/parsing/test/Test_VarscanHitForGnpSNP.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,412 +0,0 @@\n-import unittest\n-from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP\n-from commons.core.checker.CheckerException import CheckerException\n-\n-class Test_VarscanHitForGnpSNP(unittest.TestCase):\n-\n-    def test_setAttributes(self):\n-        lResults = ["C02HBa0291P19_LR48", "32", "C", "T", "1", "2", "66,67%", "1", "1", "37", "35", "0.3999999999999999"]\n-        lineNumber = 1\n-    \n-        expChrom = "C02HBa0291P19_LR48"\n-        expPosition = "32"\n-        expRef = "C"\n-        expVar = "T"\n-        expReads1 = "1"\n-        expReads2 = "2"\n-        expVarFreq = 66.67\n-        expStrands1 = "1"\n-        expStrands2 = "1"\n-        expQual1 = "37"\n-        expQual2 = "35"\n-        expPvalue = "0.3999999999999999"\n-        \n-        varscanHit = VarscanHitForGnpSNP()\n-        varscanHit.setAttributes(lResults, lineNumber)\n-        \n-        obsChrom = varscanHit.getChrom()\n-        obsPosition = varscanHit.getPosition()\n-        obsRef = varscanHit.getRef()\n-        obsVar = varscanHit.getVar()\n-        obsReads1 = varscanHit.getReads1()\n-        obsReads2 = varscanHit.getReads2()\n-        obsVarFreq = varscanHit.getVarFreq()\n-        obsStrands1 = varscanHit.getStrands1()\n-        obsStrands2 = varscanHit.getStrands2()\n-        obsQual1 = varscanHit.getQual1()\n-        obsQual2 = varscanHit.getQual2()\n-        obsPvalue = varscanHit.getPvalue()\n-        \n-        self.assertEquals(expChrom, obsChrom)\n-        self.assertEquals(expPosition, obsPosition)\n-        self.assertEquals(expRef, obsRef)\n-        self.assertEquals(expVar, obsVar)\n-        self.assertEquals(expReads1, obsReads1)\n-        self.assertEquals(expReads2, obsReads2)\n-        self.assertEquals(expVarFreq, obsVarFreq)\n-        self.assertEquals(expStrands1, obsStrands1)\n-        self.assertEquals(expStrands2, obsStrands2)\n-        self.assertEquals(expQual1, obsQual1)\n-        self.assertEquals(expQual2, obsQual2)\n-        self.assertEquals(expPvalue, obsPvalue)\n-\n-    def test_setAttributes_empty_chrom(self):\n-        lResults = ["", "", "", "", "", "", "10", "", "", "", "", ""]\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHitForGnpSNP()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Chrom is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_position(self):\n-        lResults = ["chrom", "", "", "", "", "", "10", "", "", "", "", ""]\n-        lineNumber = 5\n-        \n-        varscanHit = VarscanHitForGnpSNP()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Position is empty in varscan file in line 5"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_ref(self):\n-        lResults = ["chrom", "position", "", "", "", "", "10", "", "", "", "", ""]\n-        lineNumber = 5\n-        \n-        varscanHit = VarscanHitForGnpSNP()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Ref is empty in varscan file in line 5"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_var(self):\n-        lResults = ["chrom", "position", "ref", "", "", "", "10", "", "", "", "", ""]\n-        lineNumber = 5\n-        \n-        varscanHit = VarscanHitForGnpS'..b'atAlleles2GnpSnp_for_Deletion(self):\n-        varscanHit1 = VarscanHitForGnpSNP()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'32\')\n-        varscanHit1.setRef(\'C\')\n-        varscanHit1.setVar(\'-ATT\')\n-        varscanHit1.setReads1(\'1\')\n-        varscanHit1.setReads2(\'2\')\n-        varscanHit1.setVarFreq(\'66,67%\')\n-        varscanHit1.setStrands1(\'1\')\n-        varscanHit1.setStrands2(\'1\')\n-        varscanHit1.setQual1(\'37\')\n-        varscanHit1.setQual2(\'35\')\n-        varscanHit1.setPvalue(\'0.3999999999999999\')\n-        \n-        expPolymType = "DELETION"\n-        expGnpSnpRef = "ATT"\n-        expGnpSnpVar = "---"\n-        expGnpSnpPosition = 33\n-        \n-        varscanHit1.formatAlleles2GnpSnp()\n-        \n-        obsPolymType = varscanHit1.getPolymType()\n-        obsGnpSnpRef = varscanHit1.getGnpSnpRef()\n-        obsGnpSnpVar = varscanHit1.getGnpSnpVar()\n-        obsGnpSnpPosition = varscanHit1.getGnpSnpPosition()\n-        \n-        self.assertEquals(expPolymType,obsPolymType)\n-        self.assertEquals(expGnpSnpRef, obsGnpSnpRef)\n-        self.assertEquals(expGnpSnpVar, obsGnpSnpVar)\n-        self.assertEquals(expGnpSnpPosition, obsGnpSnpPosition)\n-        \n-    def test_setVarFreq(self):\n-        varscanHit1 = VarscanHitForGnpSNP()  \n-        exp = 66.67      \n-        varscanHit1.setVarFreq(\'66,67%\')\n-        obs = varscanHit1.getVarFreq()\n-        self.assertEquals(exp, obs)\n-            \n-    def test_formatAlleles2GnpSnp_for_Insertion(self):\n-        varscanHit1 = VarscanHitForGnpSNP()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'32\')\n-        varscanHit1.setRef(\'C\')\n-        varscanHit1.setVar(\'+TG\')\n-        varscanHit1.setReads1(\'1\')\n-        varscanHit1.setReads2(\'2\')\n-        varscanHit1.setVarFreq(\'66,67%\')\n-        varscanHit1.setStrands1(\'1\')\n-        varscanHit1.setStrands2(\'1\')\n-        varscanHit1.setQual1(\'37\')\n-        varscanHit1.setQual2(\'35\')\n-        varscanHit1.setPvalue(\'0.3999999999999999\')\n-        \n-        expPolymType = "INSERTION"\n-        expGnpSnpRef = "--"\n-        expGnpSnpVar = "TG"\n-        expGnpSnpPosition = 32\n-        \n-        varscanHit1.formatAlleles2GnpSnp()\n-        \n-        obsPolymType = varscanHit1.getPolymType()\n-        obsGnpSnpRef = varscanHit1.getGnpSnpRef()\n-        obsGnpSnpVar = varscanHit1.getGnpSnpVar()\n-        obsGnpSnpPosition = varscanHit1.getGnpSnpPosition()\n-        \n-        self.assertEquals(expPolymType,obsPolymType)\n-        self.assertEquals(expGnpSnpRef, obsGnpSnpRef)\n-        self.assertEquals(expGnpSnpVar, obsGnpSnpVar)\n-        self.assertEquals(expGnpSnpPosition, obsGnpSnpPosition)\n-        \n-    def test_formatAlleles2GnpSnp_for_SNP(self):\n-        varscanHit1 = VarscanHitForGnpSNP()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'12\')\n-        varscanHit1.setRef(\'G\')\n-        varscanHit1.setVar(\'T\')\n-        varscanHit1.setReads1(\'1\')\n-        varscanHit1.setReads2(\'2\')\n-        varscanHit1.setVarFreq(\'66,67%\')\n-        varscanHit1.setStrands1(\'1\')\n-        varscanHit1.setStrands2(\'1\')\n-        varscanHit1.setQual1(\'37\')\n-        varscanHit1.setQual2(\'35\')\n-        varscanHit1.setPvalue(\'0.3999999999999999\')\n-        \n-        expPolymType = "SNP"\n-        expGnpSnpRef = "G"\n-        expGnpSnpVar = "T"\n-        expGnpSnpPosition = 12\n-        \n-        varscanHit1.formatAlleles2GnpSnp()\n-        \n-        obsPolymType = varscanHit1.getPolymType()\n-        obsGnpSnpRef = varscanHit1.getGnpSnpRef()\n-        obsGnpSnpVar = varscanHit1.getGnpSnpVar()\n-        obsGnpSnpPosition = varscanHit1.getGnpSnpPosition()\n-        \n-        self.assertEquals(expPolymType,obsPolymType)\n-        self.assertEquals(expGnpSnpRef, obsGnpSnpRef)\n-        self.assertEquals(expGnpSnpVar, obsGnpSnpVar)\n-        self.assertEquals(expGnpSnpPosition, obsGnpSnpPosition)\n-        \n-\n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_VarscanHit_WithTag.py
--- a/commons/core/parsing/test/Test_VarscanHit_WithTag.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,212 +0,0 @@\n-import unittest\n-from commons.core.checker.CheckerException import CheckerException\n-from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag\n-\n-class Test_VarscanHit_WithTag(unittest.TestCase):\n-\n-    def test_setAttributesFromString(self):\n-        line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\tEspeceA\\n"\n-        \n-        expChrom = "chr1"\n-        expPosition = "1804"\n-        expRef = "T"\n-        expVar = "C"\n-        expReadsRef = "0"\n-        expReadsVar = "1"\n-        expVarFreq = "100%"\n-        expStrandsRef = "0"\n-        expStrandsVar = "1"\n-        expQualRef = "0"\n-        expQualVar = "53"\n-        expPValue = "0.98"\n-        expTag = "EspeceA"\n-        \n-        varscanHit = VarscanHit_WithTag()\n-        varscanHit.setAttributesFromString(line)\n-        \n-        obsChrom = varscanHit.getChrom()\n-        obsPosition = varscanHit.getPosition()\n-        obsRef = varscanHit.getRef()\n-        obsVar = varscanHit.getVar()\n-        obsReadsRef = varscanHit.getReadsRef()\n-        obsReadsVar = varscanHit.getReadsVar()\n-        obsVarFreq = varscanHit.getVarFreq()\n-        obsStrandsRef = varscanHit.getStrandsRef()\n-        obsStrandsVar = varscanHit.getStrandsVar()\n-        obsQualRef = varscanHit.getQualRef()\n-        obsQualVar = varscanHit.getQualVar()\n-        obsPValue = varscanHit.getPValue()\n-        obsTag = varscanHit.getTag()\n-        \n-        self.assertEquals(expChrom, obsChrom)\n-        self.assertEquals(expPosition, obsPosition)\n-        self.assertEquals(expRef, obsRef)\n-        self.assertEquals(expVar, obsVar)\n-        self.assertEquals(expReadsRef, obsReadsRef)\n-        self.assertEquals(expReadsVar, obsReadsVar)\n-        self.assertEquals(expVarFreq, obsVarFreq)\n-        self.assertEquals(expStrandsRef, obsStrandsRef)\n-        self.assertEquals(expStrandsVar, obsStrandsVar)\n-        self.assertEquals(expQualRef, obsQualRef)\n-        self.assertEquals(expQualVar, obsQualVar)\n-        self.assertEquals(expPValue, obsPValue)\n-        self.assertEquals(expTag, obsTag)\n-        \n-    def test_setAttributesFromString_empty_chrom(self):\n-        line = "\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\tEspeceA\\n"\n-        varscanHit = VarscanHit_WithTag()\n-        try :\n-            varscanHit.setAttributesFromString(line)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        expMessage = "The field Chrom is empty in varscan file in line "\n-        obsMessage = checkerExceptionInstance.msg\n-        self.assertEquals(expMessage, obsMessage)\n-        \n-    def test_setAttributes(self):\n-        lResults = [\'chr1\', \'1804\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n-        lineNumber = 1\n-        \n-        expChrom = "chr1"\n-        expPosition = "1804"\n-        expRef = "T"\n-        expVar = "C"\n-        expReadsRef = "0"\n-        expReadsVar = "1"\n-        expVarFreq = "100%"\n-        expStrandsRef = "0"\n-        expStrandsVar = "1"\n-        expQualRef = "0"\n-        expQualVar = "53"\n-        expPValue = "0.98"\n-        expTag = "EspeceA"\n-        \n-        varscanHit = VarscanHit_WithTag()\n-        varscanHit.setAttributes(lResults, lineNumber)\n-        \n-        obsChrom = varscanHit.getChrom()\n-        obsPosition = varscanHit.getPosition()\n-        obsRef = varscanHit.getRef()\n-        obsVar = varscanHit.getVar()\n-        obsReadsRef = varscanHit.getReadsRef()\n-        obsReadsVar = varscanHit.getReadsVar()\n-        obsVarFreq = varscanHit.getVarFreq()\n-        obsStrandsRef = varscanHit.getStrandsRef()\n-        obsStrandsVar = varscanHit.getStrandsVar()\n-        obsQualRef = varscanHit.getQualRef()\n-        obsQualVar = varscanHit.getQualVar()\n-        obsPValue = varscanHit.getPValue()\n-        obsTag = varscanHit.getTag()\n-        \n-        self.assertEquals(expChrom, obsChrom)\n-        self.assertEquals(expPosition, obsPosition)\n-        self.assertEquals(expRef, obsRef)\n-        self.assertEquals'..b'q)\n-        self.assertEquals(expStrandsRef, obsStrandsRef)\n-        self.assertEquals(expStrandsVar, obsStrandsVar)\n-        self.assertEquals(expQualRef, obsQualRef)\n-        self.assertEquals(expQualVar, obsQualVar)\n-        self.assertEquals(expPValue, obsPValue)\n-        self.assertEquals(expTag, obsTag)\n-\n-    def test_setAttributes_empty_chrom(self):\n-        lResults = [\'\', \'1804\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_WithTag()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Chrom is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_position(self):\n-        lResults = [\'chr1\', \'\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_WithTag()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Position is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_ref(self):\n-        lResults = [\'chr1\', \'1000\', \'\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_WithTag()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Ref is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_cns(self):\n-        lResults = [\'chr1\', \'1000\', \'T\', \'\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'EspeceA\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_WithTag()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Var is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test__eq__notEquals(self):\n-        varscanHit1 = VarscanHit_WithTag()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'34\')\n-        varscanHit1.setRef(\'A\')\n-        varscanHit1.setVar(\'T\')\n-        varscanHit1.setVar(\'EspeceA\')\n-        \n-        varscanHit2 = VarscanHit_WithTag()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setVar(\'T\')\n-        varscanHit2.setVar(\'EspeceB\')\n-        \n-        self.assertFalse(varscanHit1 == varscanHit2)\n-\n-    def test__eq__Equals(self):\n-        varscanHit1 = VarscanHit_WithTag()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'34\')\n-        varscanHit1.setRef(\'A\')\n-        varscanHit1.setVar(\'T\')\n-        varscanHit1.setVar(\'EspeceA\')\n-        \n-        varscanHit2 = VarscanHit_WithTag()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setVar(\'T\')\n-        varscanHit2.setVar(\'EspeceA\')\n-    \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_VarscanHit_v2_2_8.py
--- a/commons/core/parsing/test/Test_VarscanHit_v2_2_8.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,290 +0,0 @@\n-import unittest\n-from commons.core.checker.CheckerException import CheckerException\n-from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8\n-from commons.core.parsing.VarscanHit import VarscanHit\n-\n-class Test_VarscanHit_v2_2_8(unittest.TestCase):\n-\n-    def test_setAttributesFromString(self):\n-        line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n-        \n-        expChrom = "chr1"\n-        expPosition = "1804"\n-        expRef = "T"\n-        expCns = "C"\n-        expReadsRef = "0"\n-        expReadsVar = "1"\n-        expVarFreq = "100%"\n-        expStrandsRef = "0"\n-        expStrandsVar = "1"\n-        expQualRef = "0"\n-        expQualVar = "53"\n-        expPValue = "0.98"\n-        expMapQualRef = "0"\n-        expMapQualVar = "1"\n-        expReadsRefPlus = "0"\n-        expReadsRefMinus = "0"\n-        expReadsVarPlus = "1"\n-        expReadsVarMinus = "0"\n-        expVar = "C"\n-        \n-        varscanHit = VarscanHit_v2_2_8()\n-        varscanHit.setAttributesFromString(line)\n-        \n-        obsChrom = varscanHit.getChrom()\n-        obsPosition = varscanHit.getPosition()\n-        obsRef = varscanHit.getRef()\n-        obsCns = varscanHit.getCns()\n-        obsReadsRef = varscanHit.getReadsRef()\n-        obsReadsVar = varscanHit.getReadsVar()\n-        obsVarFreq = varscanHit.getVarFreq()\n-        obsStrandsRef = varscanHit.getStrandsRef()\n-        obsStrandsVar = varscanHit.getStrandsVar()\n-        obsQualRef = varscanHit.getQualRef()\n-        obsQualVar = varscanHit.getQualVar()\n-        obsPValue = varscanHit.getPValue()\n-        obsMapQualRef = varscanHit.getMapQualRef()\n-        obsMapQualVar = varscanHit.getMapQualVar()\n-        obsReadsRefPlus = varscanHit.getReadsRefPlus()\n-        obsReadsRefMinus = varscanHit.getReadsRefMinus()\n-        obsReadsVarPlus = varscanHit.getReadsVarPlus()\n-        obsReadsVarMinus = varscanHit.getReadsVarMinus()\n-        obsVar = varscanHit.getVar()\n-        \n-        self.assertEquals(expChrom, obsChrom)\n-        self.assertEquals(expPosition, obsPosition)\n-        self.assertEquals(expRef, obsRef)\n-        self.assertEquals(expCns, obsCns)\n-        self.assertEquals(expReadsRef, obsReadsRef)\n-        self.assertEquals(expReadsVar, obsReadsVar)\n-        self.assertEquals(expVarFreq, obsVarFreq)\n-        self.assertEquals(expStrandsRef, obsStrandsRef)\n-        self.assertEquals(expStrandsVar, obsStrandsVar)\n-        self.assertEquals(expQualRef, obsQualRef)\n-        self.assertEquals(expQualVar, obsQualVar)\n-        self.assertEquals(expPValue, obsPValue)\n-        self.assertEquals(expMapQualRef, obsMapQualRef)\n-        self.assertEquals(expMapQualVar, obsMapQualVar)\n-        self.assertEquals(expReadsRefPlus, obsReadsRefPlus)\n-        self.assertEquals(expReadsRefMinus, obsReadsRefMinus)\n-        self.assertEquals(expReadsVarPlus, obsReadsVarPlus)\n-        self.assertEquals(expReadsVarMinus, obsReadsVarMinus)\n-        self.assertEquals(expVar, obsVar)\n-        \n-    def test_setAttributesFromString_empty_chrom(self):\n-        line = "\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n-        varscanHit = VarscanHit_v2_2_8()\n-        try :\n-            varscanHit.setAttributesFromString(line)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        expMessage = "The field Chrom is empty in varscan file in line "\n-        obsMessage = checkerExceptionInstance.msg\n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributesFromString_less_than_19_fields(self):\n-        line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n-        varscanHit = VarscanHit_v2_2_8()\n-        try :\n-            varscanHit.setAttributesFromString(line)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        expMessage = "This varscan line (l.) is not complete"\n-        obsMessage = checkerExceptionInstance.msg\n-        self.a'..b' expMessage = "The field Position is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_ref(self):\n-        lResults = [\'chr1\', \'1000\', \'\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_v2_2_8()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Ref is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_cns(self):\n-        lResults = [\'chr1\', \'1000\', \'T\', \'\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_v2_2_8()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Cons is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_var(self):\n-        lResults = [\'chr1\', \'1000\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_v2_2_8()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field varAllele is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test__eq__notEquals(self):\n-        varscanHit1 = VarscanHit_v2_2_8()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'32\')\n-        varscanHit1.setRef(\'C\')\n-        varscanHit1.setCns(\'T\')\n-        varscanHit1.setVar(\'T\')\n-        \n-        varscanHit2 = VarscanHit_v2_2_8()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setCns(\'T\')\n-        varscanHit2.setVar(\'T\')\n-        \n-        self.assertFalse(varscanHit1 == varscanHit2)\n-\n-    def test__eq__Equals(self):\n-        varscanHit1 = VarscanHit_v2_2_8()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'34\')\n-        varscanHit1.setRef(\'A\')\n-        varscanHit1.setCns(\'T\')\n-        varscanHit1.setVar(\'T\')\n-        \n-        varscanHit2 = VarscanHit_v2_2_8()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setCns(\'T\')\n-        varscanHit2.setVar(\'T\')\n-        \n-        self.assertTrue(varscanHit1 == varscanHit2)\n-        \n-    def test_convertVarscanHit_v2_2_8_To_VarscanHit(self):\n-        line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\n"\n-        iVarscanHit_v2_2_8_WithTag = VarscanHit_v2_2_8()\n-        iVarscanHit_v2_2_8_WithTag.setAttributesFromString(line)\n-        obsVarcanHit_WithTag = iVarscanHit_v2_2_8_WithTag.convertVarscanHit_v2_2_8_To_VarscanHit()\n-        \n-        line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\n"\n-        expVarcanHit_WithTag = VarscanHit()\n-        expVarcanHit_WithTag.setAttributesFromString(line)\n-        \n-        self.assertEquals(expVarcanHit_WithTag, obsVarcanHit_WithTag)\n-\n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_VarscanHit_v2_2_8_WithTag.py
--- a/commons/core/parsing/test/Test_VarscanHit_v2_2_8_WithTag.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,300 +0,0 @@\n-import unittest\n-from commons.core.checker.CheckerException import CheckerException\n-from commons.core.parsing.VarscanHit_v2_2_8_WithTag import VarscanHit_v2_2_8_WithTag\n-from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag\n-\n-class Test_VarscanHit_v2_2_8_WithTag(unittest.TestCase):\n-\n-    def test_setAttributesFromString(self):\n-        line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n-        \n-        expChrom = "chr1"\n-        expPosition = "1804"\n-        expRef = "T"\n-        expCns = "C"\n-        expReadsRef = "0"\n-        expReadsVar = "1"\n-        expVarFreq = "100%"\n-        expStrandsRef = "0"\n-        expStrandsVar = "1"\n-        expQualRef = "0"\n-        expQualVar = "53"\n-        expPValue = "0.98"\n-        expMapQualRef = "0"\n-        expMapQualVar = "1"\n-        expReadsRefPlus = "0"\n-        expReadsRefMinus = "0"\n-        expReadsVarPlus = "1"\n-        expReadsVarMinus = "0"\n-        expVar = "C"\n-        expTag = "EspeceA"\n-        \n-        varscanHit = VarscanHit_v2_2_8_WithTag()\n-        varscanHit.setAttributesFromString(line)\n-        \n-        obsChrom = varscanHit.getChrom()\n-        obsPosition = varscanHit.getPosition()\n-        obsRef = varscanHit.getRef()\n-        obsCns = varscanHit.getCns()\n-        obsReadsRef = varscanHit.getReadsRef()\n-        obsReadsVar = varscanHit.getReadsVar()\n-        obsVarFreq = varscanHit.getVarFreq()\n-        obsStrandsRef = varscanHit.getStrandsRef()\n-        obsStrandsVar = varscanHit.getStrandsVar()\n-        obsQualRef = varscanHit.getQualRef()\n-        obsQualVar = varscanHit.getQualVar()\n-        obsPValue = varscanHit.getPValue()\n-        obsMapQualRef = varscanHit.getMapQualRef()\n-        obsMapQualVar = varscanHit.getMapQualVar()\n-        obsReadsRefPlus = varscanHit.getReadsRefPlus()\n-        obsReadsRefMinus = varscanHit.getReadsRefMinus()\n-        obsReadsVarPlus = varscanHit.getReadsVarPlus()\n-        obsReadsVarMinus = varscanHit.getReadsVarMinus()\n-        obsVar = varscanHit.getVar()\n-        obsTag = varscanHit.getTag()\n-        \n-        self.assertEquals(expChrom, obsChrom)\n-        self.assertEquals(expPosition, obsPosition)\n-        self.assertEquals(expRef, obsRef)\n-        self.assertEquals(expCns, obsCns)\n-        self.assertEquals(expReadsRef, obsReadsRef)\n-        self.assertEquals(expReadsVar, obsReadsVar)\n-        self.assertEquals(expVarFreq, obsVarFreq)\n-        self.assertEquals(expStrandsRef, obsStrandsRef)\n-        self.assertEquals(expStrandsVar, obsStrandsVar)\n-        self.assertEquals(expQualRef, obsQualRef)\n-        self.assertEquals(expQualVar, obsQualVar)\n-        self.assertEquals(expPValue, obsPValue)\n-        self.assertEquals(expMapQualRef, obsMapQualRef)\n-        self.assertEquals(expMapQualVar, obsMapQualVar)\n-        self.assertEquals(expReadsRefPlus, obsReadsRefPlus)\n-        self.assertEquals(expReadsRefMinus, obsReadsRefMinus)\n-        self.assertEquals(expReadsVarPlus, obsReadsVarPlus)\n-        self.assertEquals(expReadsVarMinus, obsReadsVarMinus)\n-        self.assertEquals(expVar, obsVar)\n-        self.assertEquals(expTag, obsTag)\n-        \n-    def test_setAttributesFromString_empty_chrom(self):\n-        line = "\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n-        varscanHit = VarscanHit_v2_2_8_WithTag()\n-        try :\n-            varscanHit.setAttributesFromString(line)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        expMessage = "The field Chrom is empty in varscan file in line "\n-        obsMessage = checkerExceptionInstance.msg\n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributesFromString_less_than_20_fields(self):\n-        line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n-        varscanHit = VarscanHit_v2_2_8_WithTag()\n-        try :\n-            varscanHit.setAttributesFromString(line)\n-       '..b'\'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\', \'EspeceA\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_v2_2_8_WithTag()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Ref is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_cns(self):\n-        lResults = [\'chr1\', \'1000\', \'T\', \'\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'C\', \'EspeceA\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_v2_2_8_WithTag()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field Cons is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test_setAttributes_empty_var(self):\n-        lResults = [\'chr1\', \'1000\', \'T\', \'C\', \'0\', \'1\', \'100%\', \'0\', \'1\', \'0\', \'53\', \'0.98\', \'0\', \'1\', \'0\', \'0\', \'1\', \'0\', \'\', \'EspeceA\']\n-        lineNumber = 1\n-        \n-        varscanHit = VarscanHit_v2_2_8_WithTag()\n-        checkerExceptionInstance = None\n-        try:\n-            varscanHit.setAttributes(lResults, lineNumber)\n-        except CheckerException, e:\n-            checkerExceptionInstance = e\n-        \n-        expMessage = "The field varAllele is empty in varscan file in line 1"\n-        obsMessage = checkerExceptionInstance.msg\n-           \n-        self.assertEquals(expMessage, obsMessage)\n-\n-    def test__eq__notEquals(self):\n-        varscanHit1 = VarscanHit_v2_2_8_WithTag()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'34\')\n-        varscanHit1.setRef(\'A\')\n-        varscanHit1.setCns(\'T\')\n-        varscanHit1.setVar(\'T\')\n-        varscanHit1.setVar(\'EspeceA\')\n-        \n-        varscanHit2 = VarscanHit_v2_2_8_WithTag()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setCns(\'T\')\n-        varscanHit2.setVar(\'T\')\n-        varscanHit2.setVar(\'EspeceB\')\n-        \n-        self.assertFalse(varscanHit1 == varscanHit2)\n-\n-    def test__eq__Equals(self):\n-        varscanHit1 = VarscanHit_v2_2_8_WithTag()\n-        varscanHit1.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit1.setPosition(\'34\')\n-        varscanHit1.setRef(\'A\')\n-        varscanHit1.setCns(\'T\')\n-        varscanHit1.setVar(\'T\')\n-        varscanHit1.setVar(\'EspeceA\')\n-        \n-        varscanHit2 = VarscanHit_v2_2_8_WithTag()\n-        varscanHit2.setChrom(\'C02HBa0291P19_LR48\')\n-        varscanHit2.setPosition(\'34\')\n-        varscanHit2.setRef(\'A\')\n-        varscanHit2.setCns(\'T\')\n-        varscanHit2.setVar(\'T\')\n-        varscanHit2.setVar(\'EspeceA\')\n-        \n-        self.assertTrue(varscanHit1 == varscanHit2)\n-        \n-    def test_convertVarscanHit_v2_2_8_WithTag_To_VarscanHit_WithTag(self):\n-        line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\t0\\t1\\t0\\t0\\t1\\t0\\tC\\tEspeceA\\n"\n-        iVarscanHit_v2_2_8_WithTag = VarscanHit_v2_2_8_WithTag()\n-        iVarscanHit_v2_2_8_WithTag.setAttributesFromString(line)\n-        obsVarcanHit_WithTag = iVarscanHit_v2_2_8_WithTag.convertVarscanHit_v2_2_8_WithTag_To_VarscanHit_WithTag()\n-        \n-        line = "chr1\\t1804\\tT\\tC\\t0\\t1\\t100%\\t0\\t1\\t0\\t53\\t0.98\\tEspeceA\\n"\n-        expVarcanHit_WithTag = VarscanHit_WithTag()\n-        expVarcanHit_WithTag.setAttributesFromString(line)\n-        \n-        self.assertEquals(expVarcanHit_WithTag, obsVarcanHit_WithTag)\n-\n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_VarscanToVCF.py
--- a/commons/core/parsing/test/Test_VarscanToVCF.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,61 +0,0 @@
-import unittest
-from commons.core.parsing.VarscanToVCF import VarscanToVCF
-
-class Test_VarscanToVCF(unittest.TestCase):
-    
-    def setUp(self):
-        self._iVarscanToVCF = VarscanToVCF(doClean = True, verbosity = 2)
-    
-#    def test_convertVarscanLineToVCFRecord(self):
-##        Chrom    Position    Ref    Cons    Reads1    Reads2    VarFreq    Strands1    Strands2    Qual1    Qual2    Pvalue           
-##        chr1    10            C        Y        1        1        50%        1            1            68    69    0.49999999999999994
-##     MapQual1    MapQual2    Reads1Plus    Reads1Minus    Reads2Plus    Reads2Minus    VarAllele
-##             1           1            1            0            1            0                T
-#        varscanLine = "chr1\t10\tC\tY\t1\t1\t50%\t1\t1\t68\t69\t0.49999999999999994\t1\t1\t1\t0\t1\t0\tT"
-#        obsRecord = self._iVarscanToVCF._convertVarscanLineToVCFRecord(varscanLine, 1)
-#        
-#        expInfo = {"AF": "50%",
-#                   "DP": 2,
-#                   "MQ": "1"}
-#
-#        expRecord = vcf.model._Record("chr1", 10, "1", "C", "T", 3.010299957, ".", expInfo, ".", None)
-##        CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, sample_indexes, samples=None
-#        
-#        self.assertEqual(expRecord.ID, obsRecord.ID)
-#        self.assertEqual(expRecord.QUAL, obsRecord.QUAL)
-#        self.assertEqual(expRecord.FILTER, obsRecord.FILTER)
-#        self.assertEqual(expRecord.INFO, obsRecord.INFO)
-#        self.assertEqual(expRecord.FORMAT, obsRecord.FORMAT)
-#        self.assertEqual(expRecord, obsRecord)
-    
-    def test_convertVarscanLineToVCFLine(self):
-#        Chrom    Position    Ref    Cons    Reads1    Reads2    VarFreq    Strands1    Strands2    Qual1    Qual2    Pvalue           
-#        chr1    10            C        Y        1        1        50%        1            1            68    69    0.49999999999999994
-#     MapQual1    MapQual2    Reads1Plus    Reads1Minus    Reads2Plus    Reads2Minus    VarAllele
-#             1           1            1            0            1            0                T
-        varscanLine = "chr1\t10\tC\tY\t1\t1\t50%\t1\t1\t68\t69\t0.49999999999999994\t1\t1\t1\t0\t1\t0\tT"
-        obsVCFLine = self._iVarscanToVCF._convertVarscanLineToVCFRecord(varscanLine, 1)
-        
-        expVCFLine = "chr1\t10\t.\tC\tT\t3.010299957\t.\tAF=0.5000;DP=2;RBQ=68;ABQ=69\n"
-        #        CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
-        self.assertEqual(expVCFLine, obsVCFLine)
-        
-    def test_convertVarscanLineToVCFLine_false_VarAllele(self):
-        varscanLine = "chr1\t10\tC\tY\t1\t1\t50%\t1\t1\t68\t69\t0.49999999999999994\t1\t1\t1\t0\t1\t0\tA"
-        obsVCFLine = self._iVarscanToVCF._convertVarscanLineToVCFRecord(varscanLine, 1)
-        
-        expVCFLine = "chr1\t10\t.\tC\tT\t3.010299957\t.\tAF=0.5000;DP=2;RBQ=68;ABQ=69\n"
-        #        CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
-        self.assertEqual(expVCFLine, obsVCFLine)
-        
-    def test_convertVarscanLineToVCFRecord_empty_line(self):
-        obsMsg = ""
-        try:
-            self._iVarscanToVCF._convertVarscanLineToVCFRecord("", 10)
-        except Exception as e:
-            obsMsg = e.msg
-        
-        self.assertEqual("This varscan line (l.10) is not complete", obsMsg)
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_WigParser.py
--- a/commons/core/parsing/test/Test_WigParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,28 +0,0 @@
-from commons.core.utils.FileUtils import FileUtils
-from commons.core.parsing.WigParser import WigParser
-import unittest
-
-class Test_WigParser(unittest.TestCase):
-    
-    def tearDown(self):
-        FileUtils.removeFilesByPattern("data/.chr*.index")
-
-    def test_GetRange1(self):
-        self.parser = WigParser("data/test.wig")
-        outputRange = [0.0, 1.1, 1.2, 0.0, 1.4, 1.5, 0.0, 1.7, 0.0, 1.9, 0.0]
-        self.assertEqual(self.parser.getRange("chr1", 10, 20), outputRange)
-        outputRange = [0.0, 9.5, 9.6, 0.0]
-        self.assertEqual(self.parser.getRange("chrX", 4, 7), outputRange)
-
-    def test_GetRange2(self):
-        self.parser = WigParser("data/test1.wig")
-        outputRange = [0.0, 1.1, 1.2, 0.0, 1.4, 1.5, 0.0, 1.7, 0.0, 1.9, 0.0]
-        self.assertEqual(self.parser.getRange("chr2", 10, 20), outputRange)
-
-    def test_GetRange3(self):
-        self.parser = WigParser("data/test2.wig")
-        outputRange = [1.4, 1.5]
-        self.assertEqual(self.parser.getRange("chr3", 14, 15), outputRange)
-
-if __name__ == '__main__':
-        unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/Test_pilerTAToGrouperMap.py
--- a/commons/core/parsing/test/Test_pilerTAToGrouperMap.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,108 +0,0 @@
-import unittest
-import os
-from commons.core.parsing.PilerTAToGrouperMap import PilerTAToGrouperMap
-from commons.core.utils.FileUtils import FileUtils
-
-class Test_pilerTAToGrouperMap(unittest.TestCase):
-
-    def setUp(self):
-        self._inputGffFileName = "input.gff"
-        self._inputPYRFileName = "input_pyr.gff"
-        self._inputMOTIFFileName = "input_motif.gff"
-        
-        self._obsOutFileName = "output.info"
-        self._obsGrouperFileName = "input_motif.gff.grp"
-        self._obsGrpMapFileName = "input_motif.gff.grp.map"
-        
-        self._expOutFileName = "exp_output.info"
-        self._expGrouperFileName = "exp_motif.gff.grp"
-        self._expGrpMapFileName = "exp_motif.gff.grp.map"
-        
-    def tearDown(self):
-        os.remove(self._inputGffFileName)
-        os.remove(self._inputPYRFileName)
-        os.remove(self._inputMOTIFFileName)
-        
-        os.remove(self._obsOutFileName)
-        os.remove(self._obsGrouperFileName)
-        os.remove(self._obsGrpMapFileName)
-        
-        os.remove(self._expOutFileName)
-        os.remove(self._expGrouperFileName)
-        os.remove(self._expGrpMapFileName)
-
-    def testRun(self):
-        self._writePilerTAFilePYR(self._inputPYRFileName)
-        self._writePilerTAFileMOTIF(self._inputMOTIFFileName)
-        self._writePilerTAGff(self._inputGffFileName)
-        
-        self._writeExpOutputFile(self._expOutFileName)
-        self._writeExpGrouperFile(self._expGrouperFileName)
-        self._writeExpGrouperMapFile(self._expGrpMapFileName)
-        
-        iPilerTAToGrouperMap = PilerTAToGrouperMap(self._inputGffFileName, self._inputPYRFileName,self._inputMOTIFFileName, self._obsOutFileName)
-        iPilerTAToGrouperMap.run()
-        
-        self.assertTrue(FileUtils.are2FilesIdentical(self._expOutFileName, self._obsOutFileName))
-        self.assertTrue(FileUtils.are2FilesIdentical(self._expGrouperFileName, self._obsGrouperFileName))
-        self.assertTrue(FileUtils.are2FilesIdentical(self._expGrpMapFileName, self._obsGrpMapFileName))
-
-
-    def _writePilerTAGff(self, fileName):
-        f = open(fileName, "w")
-        f.write("chunk21\tpiler\thit\t155146\t156020\t0\t+\t.\tTarget chunk21 150519 151392 ; Pile 510 ; Pyramid 0\n")
-        f.write("chunk21\tpiler\thit\t154790\t156023\t0\t+\t.\tTarget chunk21 150519 151751 ; Pile 510 ; Pyramid 0\n")
-        f.write("chunk21\tpiler\thit\t154078\t156023\t0\t+\t.\tTarget chunk21 150519 152463 ; Pile 510 ; Pyramid 0\n")
-        f.write("chunk21\tpiler\thit\t154256\t156023\t0\t+\t.\tTarget chunk21 150519 152285 ; Pile 510 ; Pyramid 0\n")
-        f.write("chunk21\tpiler\thit\t154434\t156023\t0\t+\t.\tTarget chunk21 150519 152107 ; Pile 510 ; Pyramid 0\n")
-        f.close()
-
-    def _writePilerTAFilePYR(self, fileName):
-        f = open(fileName, "w")
-        f.write("chunk21\tpiler\tpyramid\t150519\t156023\t0\t.\t.\tPyramidIndex 0\n")
-        f.write("chunk21\tpiler\tpyramid\t150519\t156023\t0\t.\t.\tPyramidIndex 1\n")
-        f.write("chunk21\tpiler\tpyramid\t165574\t174424\t0\t.\t.\tPyramidIndex 2\n")
-        f.write("chunk21\tpiler\tpyramid\t166301\t174424\t0\t.\t.\tPyramidIndex 3\n")
-        f.write("chunk21\tpiler\tpyramid\t168967\t174424\t0\t.\t.\tPyramidIndex 4\n")
-        f.write("chunk21\tpiler\tpyramid\t170215\t174424\t0\t.\t.\tPyramidIndex 5\n")
-        f.close()
-        
-    def _writePilerTAFileMOTIF(self, fileName):
-        f = open(fileName, "w")
-        f.write("chunk21\tpiler\ttandemmotif\t155843\t156020\t0\t.\t.\tTarget chunk21 151215 151392 ; Pyramid 0\n")
-        f.write("chunk21\tpiler\ttandemmotif\t151215\t151392\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
-        f.write("chunk21\tpiler\ttandemmotif\t151574\t151751\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
-        f.write("chunk21\tpiler\ttandemmotif\t152286\t152463\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
-        f.write("chunk21\tpiler\ttandemmotif\t152108\t152285\t0\t.\t.\tTarget chunk21 155843 156020 ; Pyramid 0\n")
-        f.close()
-        
-    def _writeExpOutputFile(self, fileName):
-        f = open(fileName, "w")
-        f.write("Pile 510\tPyramid 0\n")
-        f.write("\tPyramid 1\n")
-        f.write("\tPyramid 2\n")
-        f.write("\tPyramid 3\n")
-        f.write("\tPyramid 4\n")
-        f.write("\tPyramid 5\n")
-        f.close()
-    
-    def _writeExpGrouperFile(self, fileName):
-        f = open(fileName, "w")
-        f.write("MbS1Gr0Cl510\tchunk21\tpiler\ttandemmotif\t155843\t156020\t0\t.\t.\tTarget chunk21 151215 151392 \tPile 510\tPyramid 0\n")
-        f.write("MbS2Gr0Cl510\tchunk21\tpiler\ttandemmotif\t151215\t151392\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
-        f.write("MbS3Gr0Cl510\tchunk21\tpiler\ttandemmotif\t151574\t151751\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
-        f.write("MbS4Gr0Cl510\tchunk21\tpiler\ttandemmotif\t152286\t152463\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
-        f.write("MbS5Gr0Cl510\tchunk21\tpiler\ttandemmotif\t152108\t152285\t0\t.\t.\tTarget chunk21 155843 156020 \tPile 510\tPyramid 0\n")
-        f.close()
-        
-    def _writeExpGrouperMapFile(self, fileName):
-        f = open(fileName, "w")
-        f.write("MbS1Gr0Cl510\tchunk21\t155843\t156020\n") 
-        f.write("MbS2Gr0Cl510\tchunk21\t151215\t151392\n") 
-        f.write("MbS3Gr0Cl510\tchunk21\t151574\t151751\n") 
-        f.write("MbS4Gr0Cl510\tchunk21\t152286\t152463\n") 
-        f.write("MbS5Gr0Cl510\tchunk21\t152108\t152285\n") 
-        f.close()
-        
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/ExpPotDooblonsSubSNP.csv
--- a/commons/core/parsing/test/data/ExpPotDooblonsSubSNP.csv Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,832 +0,0 @@\n-SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n-Batch_AU247387_SNP_30_10102;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATCACRGAGGACNNGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTYGCTAGCTTGAGGGCGATTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;1;Sequence;;;9\n-Batch_AU247387_SNP_30_IRELAND;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATTCCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGT;1;12;15;Sequence;;;7\n-Batch_AU247387_SNP_30_POLAND;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGT;1;12;20;Sequence;;;9\n-Batch_AU247387_SNP_30_VIGOR;A;SNP;30;NNNTATAGCTCCTAACATTCCTGAAGTGA;GATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGT;1;12;23;Sequence;;;9\n-Batch_AU247387_SNP_34_10102;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGAAGAT;ACRGAGGACNNGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTYGCTAGCTTGAGGGCGATTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;1;Sequence;;;7\n-Batch_AU247387_SNP_34_IRELAND;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGACGAT;CCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCT;1;12;15;Sequence;;;10\n-Batch_AU247387_SNP_34_POLAND;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGAAGAT;ACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCA;1;12;20;Sequence;;;7\n-Batch_AU247387_SNP_34_VIGOR;A;SNP;34;NNNTATAGCTCCTAACATTCCTGAAGTGAAGAT;ACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCA;1;12;23;Sequence;;;7\n-Batch_AU247387_SNP_35_10102;A;SNP;35;NNNTATAGCTCCTAACATTCCTGAAGTGAAGATC;CRGAGGACNNGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTYGCTAGCTTGAGGGCGATTGGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;1;Sequence;;;9\n-Batch_AU247387_SNP_35_IRELAND;A;SNP;35;NNNTATAGCTCCTAACATTCCTGAAGTGACGATT;CAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCTG;1;12;15;Sequence;;;7\n-Batch_AU247387_SNP_35_POLAND;A;SNP;35;NNNTATAGCTCCTAACATTCCTGAAGTGAAGATC;CGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCAT;1;12;20;Sequence;;;9\n-Batch_AU247387_SNP_35_VIGOR;A;SNP;35;NNNTATAGCTCCTAACA'..b'CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCC;GTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;6\n-Batch_AU247387_SNP_601_CARILLON;A;SNP;601;-----------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCRAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTA;GANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;12;9;Sequence;;;10\n-Batch_AU247387_SNP_601_SPAIN;A;SNP;601;-----------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTA;GAGAAGTACGACGACAAGGTTGATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;10\n-Batch_AU247387_SNP_601_VIGOR;A;SNP;601;TCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTA;GAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;7\n-Batch_AU247387_SNP_613_SPAIN;A;SNP;613;-----------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGA;GACAAGGTTGATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;7\n-Batch_AU247387_SNP_613_VIGOR;A;SNP;613;CAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGA;GACAAGATCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;6\n-Batch_AU247387_SNP_620_SPAIN;A;SNP;620;----------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGACGACAAG;TTGATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;6\n-Batch_AU247387_SNP_620_VIGOR;A;SNP;620;TTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAG;TCGATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;9\n-Batch_AU247387_SNP_622_SPAIN;A;SNP;622;--------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGACGACAAGGT;GATGCTTTTGGTGAGAAG;1;12;21;Sequence;;;10\n-Batch_AU247387_SNP_622_VIGOR;A;SNP;622;GCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGAT;GATGCTTTTGGAGAGAAG;1;12;23;Sequence;;;7\n-Batch_AU247387_SNP_634_SPAIN;A;SNP;634;--CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACATTGGCATATATAGGTAAT------------------TTTAACTTGTGCTGCAACACTTGAGTTCATAACCACCCTAG------TTGTCCATACGAGTTGTGAACTGATGACATCCGTTCTTTTTCCCGAGTGCAGTCTTCGTGGTGCTCTACACGGTGCCAGTTCTGTATGAGAAGTACGACGACAAGGTTGATGCTTTTGG;GAGAAG;1;12;21;Sequence;;;10\n-Batch_AU247387_SNP_634_VIGOR;A;SNP;634;ATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGG;GAGAAG;1;12;23;Sequence;;;9\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/Wig/chr1.wig
--- a/commons/core/parsing/test/data/Wig/chr1.wig Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,9 +0,0 @@
-fixedStep  chrom=chr1  start=11  step=1
-1.1
-1.2
-fixedStep  chrom=chr1  start=14  step=1
-1.4
-1.5
-variableStep chrom=chr1
-17  1.7
-19  1.9
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/realExpBatchLine.csv
--- a/commons/core/parsing/test/data/realExpBatchLine.csv Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,41 +0,0 @@
-IndividualNumber;Pos5;Pos3;BatchNumber;Sequence
-1;;;1;
-2;;;1;
-3;;;1;
-4;;;1;
-5;;;1;
-6;;;1;
-7;;;1;
-8;;;1;
-9;;;1;
-10;;;1;
-11;;;1;
-12;;;1;
-13;;;1;
-14;;;1;
-15;;;1;
-16;;;1;
-17;;;1;
-18;;;1;
-19;;;1;
-20;;;1;
-21;;;1;
-22;;;1;
-23;;;1;
-24;;;1;
-25;;;1;
-26;;;1;
-27;;;1;
-28;;;1;
-29;;;1;
-30;;;1;
-31;;;1;
-32;;;1;
-33;;;1;
-34;;;1;
-35;;;1;
-36;;;1;
-37;;;1;
-38;;;1;
-39;;;1;
-40;;;1;
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/realExpIndividual.csv
--- a/commons/core/parsing/test/data/realExpIndividual.csv Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,41 +0,0 @@
-IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id
-1;Treesnips_40-4-3;;;;;;;;;;Pinus pinaster;;;;;
-2;Treesnips_40-8-3;;;;;;;;;;Pinus pinaster;;;;;
-3;Treesnips_40-1-2;;;;;;;;;;Pinus pinaster;;;;;
-4;Treesnips_40-14-1;;;;;;;;;;Pinus pinaster;;;;;
-5;Treesnips_40-15-2;;;;;;;;;;Pinus pinaster;;;;;
-6;Treesnips_40-20-2;;;;;;;;;;Pinus pinaster;;;;;
-7;Treesnips_40-25-1;;;;;;;;;;Pinus pinaster;;;;;
-8;Treesnips_41-3-3;;;;;;;;;;Pinus pinaster;;;;;
-9;Treesnips_41-8-1;;;;;;;;;;Pinus pinaster;;;;;
-10;Treesnips_41-1-3;;;;;;;;;;Pinus pinaster;;;;;
-11;Treesnips_41-2-1;;;;;;;;;;Pinus pinaster;;;;;
-12;Treesnips_41-3-2;;;;;;;;;;Pinus pinaster;;;;;
-13;Treesnips_41-6-2;;;;;;;;;;Pinus pinaster;;;;;
-14;Treesnips_41-9-1;;;;;;;;;;Pinus pinaster;;;;;
-15;Treesnips_42-1-3;;;;;;;;;;Pinus pinaster;;;;;
-16;Treesnips_42-8-2;;;;;;;;;;Pinus pinaster;;;;;
-17;Treesnips_42-1-2;;;;;;;;;;Pinus pinaster;;;;;
-18;Treesnips_42-2-1;;;;;;;;;;Pinus pinaster;;;;;
-19;Treesnips_42-2-2;;;;;;;;;;Pinus pinaster;;;;;
-20;Treesnips_42-8-1;;;;;;;;;;Pinus pinaster;;;;;
-21;Treesnips_42-9-2;;;;;;;;;;Pinus pinaster;;;;;
-22;Treesnips_43-4-3;;;;;;;;;;Pinus pinaster;;;;;
-23;Treesnips_43-5-3;;;;;;;;;;Pinus pinaster;;;;;
-24;Treesnips_43-1-1;;;;;;;;;;Pinus pinaster;;;;;
-25;Treesnips_43-2-1;;;;;;;;;;Pinus pinaster;;;;;
-26;Treesnips_43-7-2;;;;;;;;;;Pinus pinaster;;;;;
-27;Treesnips_43-9-3;;;;;;;;;;Pinus pinaster;;;;;
-28;Treesnips_43-10-2;;;;;;;;;;Pinus pinaster;;;;;
-29;Treesnips_44-3-3;;;;;;;;;;Pinus pinaster;;;;;
-30;Treesnips_44-6-2;;;;;;;;;;Pinus pinaster;;;;;
-31;Treesnips_44-3-1;;;;;;;;;;Pinus pinaster;;;;;
-32;Treesnips_44-5-2;;;;;;;;;;Pinus pinaster;;;;;
-33;Treesnips_44-7-1;;;;;;;;;;Pinus pinaster;;;;;
-34;Treesnips_44-10-2;;;;;;;;;;Pinus pinaster;;;;;
-35;Treesnips_45-5-3;;;;;;;;;;Pinus pinaster;;;;;
-36;Treesnips_45-8-3;;;;;;;;;;Pinus pinaster;;;;;
-37;Treesnips_45-1-1;;;;;;;;;;Pinus pinaster;;;;;
-38;Treesnips_45-4-1;;;;;;;;;;Pinus pinaster;;;;;
-39;Treesnips_45-7-1;;;;;;;;;;Pinus pinaster;;;;;
-40;Treesnips_45-9-1;;;;;;;;;;Pinus pinaster;;;;;
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/realExpSequences.fsa
--- a/commons/core/parsing/test/data/realExpSequences.fsa Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
->PpHDZ31_ref
-GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAAAGGTAATGAGTGCAATTATTGTGTTTTGCTATTCAGTAATGATTATTTTGTTTCCGAAGGAAGGGATGCTCATGCAAGTTTTCTATTCAGGTTGCAGAGATTCTTAAAGATCGCCCATCTTGGCTTCGTGATTGTCGTTGCCTTGATGTTTTGACTGCGTTTCCTACTGGAAATGGTGGAACAATCGAGCTTTTATACATGCAGGTTTGCTATTCTCTCTGAAATTGATTCTGATAAGTTGCCATAAGAGGTCAAAAATTAGCAAAATCAGATTTATCTTTTTTTTTTTTTTTTTTGTATGATGTGGACTGCAGACATATGCCGCCACTACTTTAGCTTCTGCTAGAGACTTCTGGACTCTGAGATACACAACAGTGTTGGAAGATGGCAGTCTTGTGGTATGTGATAACTGAACAATGACACATGCTTTCATTAATCCCTTTATTTTGTGAGCACAACTGGATTTTCTTCCTTGTTTTTGCAGTAGTGGGGTTTTGCTAATTATAGCTTATCTATGATGTTCTGTAAGGTTTGTGAAAGGTCCTTGAGTGGTACTCAGGGTGGTCCAAGCATACCGCCAGTGCAGCATTTTGTGAGAGCAGAAATGCTTCCCAGTGGATATTTGATACAACCTTGTGAAGGTGGTGGTTCCATAATCCGTATTGTTGATCACATGGATCTAGAGGTACATGCTAGTTGTTGATGGCTAGAAGCTGCAATGTAGTTTATACAATTAAATTCCCAGAGTAGCTATTCTAAGATGGGCTGATCTTTTCATTGATTTGATTATTGCTATTCAGCCATGGAGTGTGCCTGAGGTTTTACGACCACTATATGAATCGTCCACTGTACTTGCCCAGAAAATGACAATTGCAGTAAGGACACCTTTAATGCCATTGTGCAGATTGTATTACAATTCTTCTAAGATTTCCACTGACTGAAATCTTCATGATCAGGCATTGCGTCGATTACGCCAAATTGCACAGGAAGCCACAGGTGAAGTAGTTTTTGGTTGGGGAAGACAGCCAGCTGTTCTGCGAACATTTAGCCAGAGGTTAAGCAGGTAATGTGACTACTGCAGGATTATATCTTCTCCCATATTTGAACCATGATGATTGTGTCTAATAGACCTGTTTTTAAAAATGCAGGGGTTTCAACGAGGCCGTGAATGGATTTACAGATGATGGGTGGTCATTGATGGGTAGTGATGGAGTGGAGGATGTCACTATTGCTATCAATTCATCTCCAAACAAACATTTTGCCTACCAAGTTAATGCTTCTAATGGGCTAACAACTCTTGGTGGTGGCATCCTTTGTGCAAAGGCATCCATGCTCTTACAGGTTCTCAAGCTAGTTATTTAAAAAAAATGTAAACAACATAATTTTATGCAATAATTTTAGAATGCATCTTGGGAGTCTGGAATACTTGTTTCTGAGTTCCGAGTCTTGTTTGATAGAGGAACTGATGACGTGTAATGTAAATACAGAATGTGCCTCCAGCATTACTTGTACGTTTCTTGCGCGAGCACCGATCAGAGTGGGCAGATTCCAACATTGATGCCTATTCTGCAGCTGCTTTAAAATCAAGTCCTTATAGTGTTCCAGGATCAAGAGCAGGGGGCTTTTCAGGGAGTCAAGTAATCCTTCCCTTGGCACATACTGTGGAACATGAGGAGGTGGGGAGTGGTTACTGAGATGCCTGGTTTTGTATTTTGTTGCCTTCAAACTGCATTGGGATGCTTTTCAATATTTTTCCTGGTGTTTTTGGTTCTATATTTTGTTCAAATGTTTTCCTCTCTGTTGGTTTATACAATTTTGAAGCTGAAATAAATGTAACTGCAGTTCTTAGAGGTCATTAAGCTGGAAGGTCATGGCCTTACTCAGGAGGAAGCTGTCCTGTCCAGAGATATGTTTCTCTTACAGGTATCTTGTATTGCCAAAGTTACTTTCTATACCAATGATTGTGCTAGTGTATACTTTTTAAGGTTTATTGTTTAATGTTAACATTATCAACAACTTTGATGGGCAGCTTTGCAGTGGAATTGATGAACATGCAGCTGGAGCATGTGCTGAACTTGTTTTTGCACCAATTGATGAATCCTTTGCTGATGATGCTCCTTTGCTTCCTTCTGGGTTCAGGGTTATTCCTTTGGAATCAAGAACAGTTGAGTATATTCTGCAAACGTTTATGGCATCTAGAATTGATTTTTCATCTATGCTAAAATATCATTCAAAACAACTGGCATCTGGTGCTGCATTACGTATTTATTTCTTGGAGCTTGAAGAAATGAATATATATGATGCAGGATGGATCTGGGGGTCCCAACCGCACACTGGACTTGGCTTCTGCTCTGGAGGTTGGATCAACTGGAACTAGAACGTCTGGTGATTCTGGCACCAACTCGAATCTGAGATCTGTGTTGACTATTGCATTCCAGTTTACTTATGAGAGCCACTCGCGAGAAAATGTGGCAGCTATGGCTCGTCAATATGTGCGTAGTGTTGTAGCATCTGTCCAGAGGGTTGCCATGGCATTAGCTCCTTCTCGACTGAATTCACATGTTGGCCCAAGGCCACCTCCTGGGACTCCAGAAGCACTTACTCTTGCCCGTTGGATTTGTCAGAGCTACAGGTAAATAGGAGGCTTGCATTCAAGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGTCGCAAAAGTA
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/realExpSubSNP.csv
--- a/commons/core/parsing/test/data/realExpSubSNP.csv Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,799 +0,0 @@\n-SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n-INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-4-3;A;SNP;136;NNNNNNNNNNNNNNNNNNGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;1;Sequence;;;3\n-INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-8-3;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;2;Sequence;;;3\n-INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-1-2;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTGTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;3;Sequence;;;1\n-INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-14-1;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTGTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;4;Sequence;;;1\n-INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-15-2;A;SNP;136;GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;5;Sequence;;;3\n-INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-20-2;A;SNP;136;GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTGTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;6;Sequence;;;1\n-INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_40-25-1;A;SNP;136;GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;7;Sequence;;;3\n-INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_41-3-3;A;SNP;136;NNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTAGCTTTCCAAATTATGGGAAACATTATATTGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGCAGAGGAGACCTTGAC;GAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCAGATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTTGCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCCGGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTGCGGTCTTGTAGGATTAGATCCTACAA;1;1;8;Sequence;;;3\n-INRA_Pinus_pinaster_HDZ31-1_SNP_136_Treesnips_41-8-1;A;SNP;136;GCTAG'..b'NRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_44-5-2;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGNNNNNNNNNNN;1;1;32;Sequence;;;1\n-INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_44-7-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;1;33;Sequence;;;3\n-INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_44-10-2;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGTNNNNNNNNNN;1;1;34;Sequence;;;3\n-INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-5-3;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAANNNNNNNNNNNNNN;1;1;35;Sequence;;;3\n-INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-8-3;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGNNNNNNNNNNNN;1;1;36;Sequence;;;1\n-INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-4-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTACACATAGGGGTGGATCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAA-TTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;1;38;Sequence;;;3\n-INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-7-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTGGACAAAATTCTGGATGAAAATGGTCGCAAAAGTA;1;1;39;Sequence;;;1\n-INRA_Pinus_pinaster_HDZ31-1_SNP_3291_Treesnips_45-9-1;A;SNP;3291;AGGCTCTTATGTTTTGCCATTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACAGACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGAAACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACATACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCT;GTTGTGAAGTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCAAATCAAGCAGGGTTGGACATGCTGGAAACGACANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN;1;1;40;Sequence;;;1\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/real_multifasta_input.fasta
--- a/commons/core/parsing/test/data/real_multifasta_input.fasta Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2419 +0,0 @@\n->PpHDZ31_ref\n-GCTAGCCCCGCTGGGTACGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATAT\n-TGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGC\n-AGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCA\n-GATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTT\n-GCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCC\n-GGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTG\n-CGGTCTTGTAGGATTAGATCCTACAAAGGTAATGAGTGCAATTATTGTGTTTTGCTATTC\n-AGTAATGATTATTTTGTTTCCGAAGGAAGGGATGCTCATGCAAGTTTTCTATTCAGGTTG\n-CAGAGATTCTTAAAGATCGCCCATCTTGGCTTCGTGATTGTCGTTGCCTTGATGTTTTGA\n-CTGCGTTTCCTACTGGAAATGGTGGAACAATCGAGCTTTTATACATGCAGGTTTGCTATT\n-CTCTCTGAAATTGATTCTGATAAGTTGCCATAAGAGGTCAAAAATTAGCAAAATCAGATT\n-TATCTTTTTTTTTTTTTTTTTTGTATGATGTGGACTGCAGACATATGCCGCCACTACTTT\n-AGCTTCTGCTAGAGACTTCTGGACTCTGAGATACACAACAGTGTTGGAAGATGGCAGTCT\n-TGTGGTATGTGATAACTGAACAATGACACATGCTTTCATTAATCCCTTTATTTTGTGAGC\n-ACAACTGGATTTTCTTCCTTGTTTTTGCAGTAGTGGGGTTTTGCTAATTATAGCTTATCT\n-ATGATGTTCTGTAAGGTTTGTGAAAGGTCCTTGAGTGGTACTCAGGGTGGTCCAAGCATA\n-CCGCCAGTGCAGCATTTTGTGAGAGCAGAAATGCTTCCCAGTGGATATTTGATACAACCT\n-TGTGAAGGTGGTGGTTCCATAATCCGTATTGTTGATCACATGGATCTAGAGGTACATGCT\n-AGTTGTTGATGGCTAGAAGCTGCAATGTAGTTTATACAATTAAATTCCCAGAGTAGCTAT\n-TCTAAGATGGGCTGATCTTTTCATTGATTTGATTATTGCTATTCAGCCATGGAGTGTGCC\n-TGAGGTTTTACGACCACTATATGAATCGTCCACTGTACTTGCCCAGAAAATGACAATTGC\n-AGTAAGGACACCTTTAATGCCATTGTGCAGATTGTATTACAATTCTTCTAAGATTTCCAC\n-TGACTGAAATCTTCATGATCAGGCATTGCGTCGATTACGCCAAATTGCACAGGAAGCCAC\n-AGGTGAAGTAGTTTTTGGTTGGGGAAGACAGCCAGCTGTTCTGCGAACATTTAGCCAGAG\n-GTTAAGCAGGTAATGTGACTACTGCAGGATTATATCTTCTCCCATATTTGAACCATGATG\n-ATTGTGTCTAATAGACCTGTTTTTAAAAATGCAGGGGTTTCAACGAGGCCGTGAATGGAT\n-TTACAGATGATGGGTGGTCATTGATGGGTAGTGATGGAGTGGAGGATGTCACTATTGCTA\n-TCAATTCATCTCCAAACAAACATTTTGCCTACCAAGTTAATGCTTCTAATGGGCTAACAA\n-CTCTTGGTGGTGGCATCCTTTGTGCAAAGGCATCCATGCTCTTACAGGTTCTCAAGCTAG\n-TTATTTAAAAAAAA-TGTAAACAACATAATTTTATGCAATAATTTTAGAATGCATCTTGG\n-GAGTCTGGAATACTTGTTTCTGAGTTCCGAGTCTTGTTTGATAGAGGAACTGATGACGTG\n-TAATGTAAATACAGAATGTGCCTCCAGCATTACTTGTACGTTTCTTGCGCGAGCACCGAT\n-CAGAGTGGGCAGATTCCAACATTGATGCCTATTCTGCAGCTGCTTTAAAATCAAGTCCTT\n-ATAGTGTTCCAGGATCAAGAGCAGGGGGCTTTTCAGGGAGTCAAGTAATCCTTCCCTTGG\n-CACATACTGTGGAACATGAGGAGGTGGGGAGTGGTTACTGAGATGCCTGGTTTTGTATTT\n-TGTTGCCTTCAAACTGCATTGGGATGCTTTTCAATATTTTTCCTGGTGTTTTTGGTTCTA\n-TATTTTGTTCAAATGTTTTCCTCTCTGTTGGTTTATACAATTTTGAAGCTGAAATAAATG\n-TAACTGCAGTTCTTAGAGGTCATTAAGCTGGAAGGTCATGGCCTTACTCAGGAGGAAGCT\n-GTCCTGTCCAGAGATATGTTTCTCTTACAGGTATCTTGTATTGCCAAAGTTACTTTCTAT\n-ACCAATGATTGTGCTAGTGTATACTTTTTAAGGTTTATTGTTTAATGTTAACATTATCAA\n-CAACTTTGATGGGCAGCTTTGCAGTGGAATTGATGAACATGCAGCTGGAGCATGTGCTGA\n-ACTTGTTTTTGCACCAATTGATGAATCCTTTGCTGATGATGCTCCTTTGCTTCCTTCTGG\n-GTTCAGGGTTATTCCTTTGGAATCAAGAACAGTTGAGTATATTCTGCAAACGTTTATGGC\n-ATCTAGAATTGATTTTTCATCTATGCTAAAATATCATTCAAAACAACTGGCATCTGGTGC\n-TGCATTACGTATTTATTTCTTGGAGCTTGAAGAAATGAATATATATGATGCAGGATGGAT\n-CTGGGGGTCCCAACCGCACACTGGACTTGGCTTCTGCTCTGGAGGTTGGATCAACTGGAA\n-CTAGAACGTCTGGTGATTCTGGCACCAACTCGAATCTGAGATCTGTGTTGACTATTGCAT\n-TCCAGTTTACTTATGAGAGCCACTCGCGAGAAAATGTGGCAGCTATGGCTCGTCAATATG\n-TGCGTAGTGTTGTAGCATCTGTCCAGAGGGTTGCCATGGCATTAGCTCCTTCTCGACTGA\n-ATTCACATGTTGGCCCAAGGCCACCTCCTGGGACTCCAGAAGCACTTACTCTTGCCCGTT\n-GGATTTGTCAGAGCTACAGGTAAATAGGAGGCTTGCATTCAAGGCTCTTATGTTTTGCCA\n-TTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACA\n-GACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGA\n-AACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACAT\n-ACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAA\n-GTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCA\n-AATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTG\n-GACAAAATTCTGGATGAAAATGGTCGCAAAAGTA\n->Treesnips_40-4-3\n-NNNNNNNNNNNNNNNNNNGTGGCATTGTTTTAGCTTTCCAAATTATGGGAAACATTATAT\n-TGTTTTTGTAGCTAGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGC\n-AGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCA\n-GATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTT\n-GCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCC\n-GGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTG\n-CGGT'..b'A\n-TTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACA\n-GACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGA\n-AACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACAT\n-ACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAA\n-GTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCA\n-AATCAAGCAGGGTTGGACATGCTGGAAACGACATTGGTTGCATTGCAAGATATATCTTTG\n-GACAAAATTCTGGATGAAAATGGTCGCAAAAGTA\n->Treesnips_45-9-1\n-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n-NNNNNNNNNNNNNNGGTATAAAAGGACTGTATTTTTCTATTCCAGACTCCTGTCTATAGC\n-AGAGGAGACCTTGACAGAGTTCCTTTCAAAGGCTAAAGGAGCTGCTGTCGATTGGGTCCA\n-GATGCCTGGGATGAAGGTTTGTGTTCCCAAGATCCTCTTCTAAAAATAGATCATGTGCTT\n-GCTTGAATTATTAATCTATGGTGTATTAAGATGCCTGAGCGTGTTTTTGCAGCCTGGTCC\n-GGATTCGATTGGTATTGTAGCTATTTCGAATACTTGTAATGGAGTAGCTGCACGTGCTTG\n-CGGTCTTGTAGGATTAGATCCTACAAAGGTAATGAGTGCAATTATTGTGTTTTGCTATTC\n-AGTAATGATTATTTTGTTTCCGAAGGAAGGGATGCTCATGCAAGTTTTCTATTCAGGTTG\n-CAGAGATTCTTAAAGATCGCCCATCTTGGCTTCGTGATTGTCGTTGCCTTGATGTTTTGA\n-CTGCGTTTCCTACTGGAAATGGTGGAACAATCGAGCTTTTATACATGCAGGTTTGCTATT\n-CTCTCTGAAATTGATTCTGATAAGTTGCCATAAGAGGTCAAAAATTAGCAAAATCAGATT\n-TATCTTTTTTTTTTTTTTTTTTGTATGATGTGGACTGCAGACATATGCCGCCACTACTTT\n-AGCTTCTGCTAGAGACTTCTGGACTCTGAGATACACAACAGTGTTGGAAGATGGCAGTCT\n-TGTGGTATGTGATAACTGAACAATGACACATGCTTTCATTAATCCCTTTATTTTGTGAGC\n-ACAACTGGATTTTCTTCCTTGTTTTTGCAGTAGTGGGGTTTTGCTAATTATAGCTTATCT\n-ATGATGTTCTGTAAGGTTTGTGAAAGGTCCTTGAGTGGTACTCAGGGTGGTCCAAGCATA\n-CCGCCAGTGCAGCATTTTGTGAGAGCAGAAATGCTTCCCAGTGGATATTTGATACAACCT\n-TGTGAAGGTGGTGGTTCCATAATCCGTATTGTTGATCACATGGATCTAGAGGTACATGCT\n-AGTTGTTGATGGCTAGAAGCTGCAATGTAGTTTATACAATTAAATTCCCAGAGTAGCTAT\n-TCTAAGATGGGCTGATCTTTTCATTGATTTGATTATTGCTATTCAGCCATGGAGTGTGCC\n-TGAGGTTTTACGACCACTATATGAATCGTCCACTGTACTTGCCCAGAAAATGACAATTGC\n-AGTAAGGACACCTTTAATGCCATTGTGCAGATTGTATTACAATTCTTCTAAGATTTCCAC\n-TGACTGAAATCTTCATGATCAGGCATTGCGTCGATTACGCCAAATTGCACAGGAAGCCAC\n-AGGTGAAGTAGTTTTTGGTTGGGGAAGACAGCCAGCTGTTCTGCGAACATTTAGCCAGAG\n-GTTAAGCAGGTAATGTGACTACTGCAGGATTATATCTTCTCCCATATTTGAACCATGATG\n-ATTGTGTCTAATAGACCTGTTTTTAAAAATGCAGGGGTTTCAACGAGGCCGTGAATGGAT\n-TTACAGATGATGGGTGGTCATTGATGGGTAGTGATGGAGTGGAGGATGTCACTATTGCTA\n-TCAATTCATCTCCAAACAAACATTTTGCCTACCAAGTTAATGCTTCTAATGGGCTAACAA\n-CTCTTGGTGGTGGCATCCTTTGTGCAAAGGCATCCATGCTCTTACAGGTTCTCAAGCTAG\n-TTATTTAAAAAAAA-TGTAAACAACATAATTTTATGCAATAATTTTAGAATGCATCTTGG\n-GAGTCTGGAATACTTGTTTCTGAGTTCCGAGTCTTGTTTGATAGAGGAACTGATGACGTG\n-TAATGTAAATACAGAATGTGCCTCCAGCATTACTTGTACGTTTCTTGCGCGAGCACCGAT\n-CAGAGTGGGCAGATTCCAACATTGATGCCTATTCTGCAGCTGCTTTAAAATCAAGTCCTT\n-ATAGTGTTCCAGGATCAAGAGCAGGGGGCTTTTCAGGGAGTCAAGTAATCCTTCCCTTGG\n-CACATACTGTGGAACATGAGGAGGTGGGGAGTGGTTACTGAGATGCCTGGTTTTGTATTT\n-TGTTGCCTTCAAACTGCATTGGGATGCTTTTCAATATTTTTCCTGGTGTTTTTGGTTCTA\n-TATTTTGTTCAAATGTTTTCCTCTCTGTTGGTTTATACAATTTTGAAGCTGAAATAAATG\n-TAACTGCAGTTCTTAGAGGTCATTAAGCTGGAAGGTCATGGCCTTACTCAGGAGGAAGCT\n-GTCCTGTCCAGAGATATGTTTCTCTTACAGGTATCTTGTATTGCCAAAGTTACTTTCTAT\n-ACCAATGATTGTGCTAGTGTATACTTTTTAAGGTTTATTGTTTAATGTTAACATTATCAA\n-CAACTTTGATGGGCAGCTTTGCAGTGGAATTGATGAACATGCAGCTGGAGCATGTGCTGA\n-ACTTGTTTTTGCACCAATTGATGAATCCTTTGCTGATGATGCTCCTTTGCTTCCTTCTGG\n-GTTCAGGGTTATTCCTTTGGAATCAAGAACAGTTGAGTATATTCTGCAAACGTTTATGGC\n-ATCTAGAATTGATTTTTCATCTATGCTAAAATATCATTCAAAACAACTGGCATCTGGTGC\n-TGCATTACGTATTTATTTCTTGGAGCTTGAAGAAATGAATATATATGATGCAGGATGGAT\n-CTGGGGGTCCCAACCGCACACTGGACTTGGCTTCTGCTCTGGAGGTTGGATCAACTGGAA\n-CTAGAACGTCTGGTGATTCTGGCACCAACTCGAATCTGAGATCTGTGTTGACTATTGCAT\n-TCCAGTTTACTTATGAGAGCCACTCGCGAGAAAATGTGGCAGCTATGGCTCGTCAATATG\n-TGCGTAGTGTTGTAGCATCTGTCCAGAGGGTTGCCATGGCATTAGCTCCTTCTCGACTGA\n-ATTCACATGTTGGCCCAAGGCCACCTCCTGGGACTCCAGAAGCACTTACTCTTGCCCGTT\n-GGATTTGTCAGAGCTACAGGTAAATAGGAGGCTTGCATTCAAGGCTCTTATGTTTTGCCA\n-TTCTTTATTTCTTAATTTTGAAATATTTTGTACTGAGAGCTGAATGCAAGTTTTTGGACA\n-GACTCCACATAGGTGTGGACCTGTTTCGAGCTGATTGTGAAGCCAGTGAGTCTGTACTGA\n-AACTACTTTGGCACCATTCAGATGCAATCATGTGCTGTTCTGTGAAGGTATCTATTACAT\n-ACAAAATTCTGAAGAAGTATAGCACTTTGGATACCTGCCTTATATTTTTCTGGTTGTGAA\n-GTTACTAAATCTGGCCTATTGCTTGTGAATATGCAGGCGTTGCCTGTTTTTACATTTGCA\n-AATCAAGCAGGGTTGGACATGCTGGAAACGACANNNNNNNNNNNNNNNNNNNNNNNNNNN\n-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/sampleForTestVarscanToVCF.varscan
--- a/commons/core/parsing/test/data/sampleForTestVarscanToVCF.varscan Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,108 +0,0 @@
-Chrom Position Ref Cons Reads1 Reads2 VarFreq Strands1 Strands2 Qual1 Qual2 Pvalue MapQual1 MapQual2 Reads1Plus Reads1Minus Reads2Plus Reads2Minus VarAllele
-chr1 10759 C Y 2 1 33.33% 1 1 65 47 0.98 1 1 0 2 0 1 T
-chr1 12438 C S 1 1 50% 1 1 62 42 0.98 1 1 0 1 0 1 G
-chr1 17432 A M 2 1 33.33% 1 1 55 37 0.98 1 1 0 2 1 0 C
-chr1 20391 A W 2 1 33.33% 1 1 56 37 0.98 1 1 2 0 1 0 T
-chr1 21207 T Y 2 1 33.33% 1 1 55 41 0.98 1 1 2 0 1 0 C
-chr1 26057 T K 11 2 15.38% 2 1 60 37 0.98 1 1 7 4 2 0 G
-chr1 36838 T Y 2 1 33.33% 1 1 61 36 0.98 1 1 2 0 0 1 C
-chr1 37751 A M 3 1 25% 1 1 65 42 0.98 1 1 0 3 0 1 C
-chr1 43500 C S 3 1 25% 2 1 65 40 0.98 1 1 2 1 0 1 G
-chr1 50481 A W 2 1 33.33% 1 1 60 40 0.98 1 1 0 2 1 0 T
-chr1 106849 C S 3 1 25% 2 1 65 40 0.98 1 1 2 1 1 0 G
-chr1 108726 T W 2 1 33.33% 1 1 50 41 0.98 1 1 0 2 0 1 A
-chr1 114204 G S 4 2 33.33% 2 2 60 39 0.98 1 1 2 2 1 1 C
-chr1 115030 A M 3 1 25% 2 1 57 41 0.98 1 1 2 1 1 0 C
-chr1 116173 A R 7 2 22.22% 2 1 58 39 0.98 1 1 5 2 2 0 G
-chr1 118433 G S 6 1 14.29% 2 1 64 50 0.98 1 1 4 2 1 0 C
-chr1 119042 A R 5 1 16.67% 2 1 55 51 0.98 1 1 2 3 1 0 G
-chr1 139219 A R 1 1 50% 1 1 65 46 0.98 1 1 1 0 0 1 G
-chr1 140710 A R 9 2 18.18% 2 1 63 41 0.98 1 1 3 6 0 2 G
-chr1 144419 T Y 3 1 25% 2 1 63 39 0.98 1 1 1 2 1 0 C
-chr1 146099 G S 2 1 33.33% 2 1 64 38 0.98 1 1 1 1 0 1 C
-chr1 146435 T Y 11 7 38.89% 2 1 60 24 0.98 1 1 4 7 7 0 C
-chr1 147232 A W 2 1 33.33% 2 1 64 39 0.98 1 1 1 1 0 1 T
-chr1 158703 A M 2 1 33.33% 2 1 60 55 0.98 1 1 1 1 1 0 C
-chr1 166732 A W 4 2 33.33% 2 2 62 24 0.98 1 1 2 2 1 1 T
-chr1 179887 C M 3 1 25% 2 1 64 39 0.98 1 1 1 2 1 0 A
-chr1 185971 A R 3 1 25% 2 1 55 40 0.98 1 1 2 1 0 1 G
-chr1 211074 A M 3 1 25% 1 1 65 40 0.98 1 1 3 0 0 1 C
-chr1 219573 G S 1 1 50% 1 1 61 41 0.98 1 1 1 0 0 1 C
-chr1 229396 C S 2 1 33.33% 2 1 57 39 0.98 1 1 1 1 0 1 G
-chr1 236388 T K 8 2 20% 2 1 52 32 0.98 1 1 2 6 0 2 G
-chr1 245990 G S 5 1 16.67% 2 1 61 46 0.98 1 1 3 2 1 0 C
-chr1 249155 C S 2 1 33.33% 2 1 62 37 0.98 1 1 1 1 1 0 G
-chr1 261257 T K 3 1 25% 1 1 60 39 0.98 1 1 0 3 0 1 G
-chr1 274692 A R 2 1 33.33% 2 1 63 39 0.98 1 1 1 1 0 1 G
-chr1 283468 G S 3 1 25% 2 1 63 45 0.98 1 1 2 1 1 0 C
-chr1 284288 T W 3 1 25% 2 1 64 39 0.98 1 1 2 1 1 0 A
-chr1 286983 T Y 2 1 33.33% 1 1 64 37 0.98 1 1 0 2 1 0 C
-chr1 287378 C M 2 1 33.33% 2 1 65 37 0.98 1 1 1 1 0 1 A
-chr1 302928 A R 1 1 50% 1 1 64 38 0.98 1 1 0 1 1 0 G
-chr1 305952 C M 2 1 33.33% 1 1 65 38 0.98 1 1 0 2 0 1 A
-chr1 307932 T Y 4 1 20% 2 1 65 42 0.98 1 1 1 3 0 1 C
-chr1 317422 G S 3 1 25% 1 1 57 40 0.98 1 1 0 3 0 1 C
-chr1 321480 A W 2 1 33.33% 1 1 64 38 0.98 1 1 2 0 0 1 T
-chr1 322307 A M 1 1 50% 1 1 65 37 0.98 1 1 1 0 0 1 C
-chr1 328326 G S 5 1 16.67% 2 1 62 52 0.98 1 1 4 1 1 0 C
-chr1 333138 T K 3 2 40% 1 2 63 24 0.98 1 1 0 3 1 1 G
-chr1 333388 T Y 4 1 20% 2 1 64 43 0.98 1 1 2 2 1 0 C
-chr1 335592 T K 2 1 33.33% 1 1 58 39 0.98 1 1 2 0 1 0 G
-chr1 336572 C S 1 1 50% 1 1 58 38 0.98 1 1 0 1 0 1 G
-chr1 347396 T K 5 1 16.67% 2 1 62 52 0.98 1 1 2 3 1 0 G
-chr1 359080 T K 4 1 20% 2 1 61 42 0.98 1 1 1 3 0 1 G
-chr1 360223 A W 5 1 16.67% 2 1 54 52 0.98 1 1 2 3 0 1 T
-chr1 361047 T K 5 1 16.67% 2 1 62 50 0.98 1 1 2 3 0 1 G
-chr1 366048 A M 2 1 33.33% 2 1 65 39 0.98 1 1 1 1 0 1 C
-chr1 368105 A R 3 1 25% 2 1 55 40 0.98 1 1 2 1 0 1 G
-chr1 373782 T W 3 1 25% 2 1 64 41 0.98 1 1 2 1 1 0 A
-chr1 378159 G K 2 1 33.33% 2 1 63 41 0.98 1 1 1 1 0 1 T
-chr1 383945 C S 8 1 11.11% 2 1 58 57 0.98 1 1 2 6 1 0 G
-chr1 389461 A R 1 1 50% 1 1 65 36 0.98 1 1 1 0 1 0 G
-chr1 396860 G K 4 1 20% 1 1 65 43 0.98 1 1 0 4 0 1 T
-chr1 397170 A R 1 1 50% 1 1 65 36 0.98 1 1 1 0 1 0 G
-chr1 399939 A M 5 1 16.67% 2 1 63 46 0.98 1 1 1 4 0 1 C
-chr1 400733 G S 3 1 25% 1 1 65 41 0.98 1 1 3 0 0 1 C
-chr1 401165 A R 3 1 25% 2 1 64 41 0.98 1 1 1 2 0 1 G
-chr1 406774 T K 3 1 25% 2 1 58 39 0.98 1 1 2 1 0 1 G
-chr1 417293 C Y 2 1 33.33% 1 1 65 55 0.98 1 1 2 0 1 0 T
-chr1 417723 G S 2 1 33.33% 1 1 65 49 0.98 1 1 2 0 0 1 C
-chr1 420308 C M 1 1 50% 1 1 65 36 0.98 1 1 1 0 0 1 A
-chr1 435579 C Y 3 1 25% 1 1 64 40 0.98 1 1 0 3 0 1 T
-chr1 437183 G R 2 1 33.33% 1 1 65 39 0.98 1 1 0 2 0 1 A
-chr1 437194 A W 3 1 25% 1 1 64 41 0.98 1 1 0 3 0 1 T
-chr1 438866 G S 6 1 14.29% 2 1 62 52 0.98 1 1 5 1 0 1 C
-chr1 446237 T Y 3 1 25% 2 1 65 39 0.98 1 1 2 1 1 0 C
-chr1 446308 A R 2 1 33.33% 1 1 50 38 0.98 1 1 2 0 1 0 G
-chr1 452322 A M 1 1 50% 1 1 65 36 0.98 1 1 0 1 1 0 C
-chr1 462721 T K 1 1 50% 1 1 58 41 0.98 1 1 0 1 1 0 G
-chr1 477145 C S 2 1 33.33% 2 1 64 39 0.98 1 1 1 1 1 0 G
-chr1 493772 G S 3 1 25% 2 1 65 40 0.98 1 1 1 2 1 0 C
-chr1 498962 C M 2 1 33.33% 2 1 65 41 0.98 1 1 1 1 1 0 A
-chr1 510532 T Y 4 1 20% 2 1 64 53 0.98 1 1 2 2 1 0 C
-chr1 516369 T Y 1 1 50% 1 1 34 40 0.98 1 1 1 0 1 0 C
-chr1 523631 G K 3 1 25% 2 1 64 40 0.98 1 1 1 2 0 1 T
-chr1 524680 C Y 2 1 33.33% 1 1 65 41 0.98 1 1 0 2 0 1 T
-chr1 525898 T K 3 1 25% 2 1 62 49 0.98 1 1 2 1 1 0 G
-chr1 526118 A M 5 1 16.67% 2 1 61 50 0.98 1 1 2 3 0 1 C
-chr1 535762 C Y 3 1 25% 1 1 65 42 0.98 1 1 3 0 1 0 T
-chr1 543235 G K 1 1 50% 1 1 45 41 0.98 1 1 1 0 1 0 T
-chr1 550086 T Y 3 1 25% 1 1 50 41 0.98 1 1 3 0 1 0 C
-chr1 550508 A R 3 1 25% 2 1 55 40 0.98 1 1 1 2 0 1 G
-chr1 551143 G S 2 1 33.33% 1 1 65 39 0.98 1 1 0 2 1 0 C
-chr1 552924 A R 6 2 25% 2 2 62 38 0.98 1 1 4 2 1 1 G
-chr1 553541 A R 7 1 12.5% 2 1 65 52 0.98 1 1 4 3 0 1 G
-chr1 560806 T Y 5 1 16.67% 2 1 65 49 0.98 1 1 2 3 0 1 C
-chr1 562736 C S 5 1 16.67% 1 1 64 52 0.98 1 1 0 5 1 0 G
-chr1 563224 T Y 2 1 33.33% 1 1 51 39 0.98 1 1 2 0 1 0 C
-chr1 564217 T W 3 1 25% 2 1 62 40 0.98 1 1 1 2 0 1 A
-chr1 567288 C Y 2 1 33.33% 2 1 65 40 0.98 1 1 1 1 1 0 T
-chr1 569652 T Y 1 1 50% 1 1 48 42 0.98 1 1 1 0 0 1 C
-chr1 570280 G K 7 1 12.5% 2 1 60 53 0.98 1 1 1 6 0 1 T
-chr1 582185 T Y 1 1 50% 1 1 63 40 0.98 1 1 1 0 1 0 C
-chr1 582453 G S 1 1 50% 1 1 65 38 0.98 1 1 0 1 0 1 C
-chr1 583477 T K 2 1 33.33% 2 1 62 39 0.98 1 1 1 1 1 0 G
-chr1 584179 G K 3 1 25% 2 1 65 41 0.98 1 1 2 1 0 1 T
-chr1 589074 G S 2 1 33.33% 2 1 65 36 0.98 1 1 1 1 0 1 C
-chr1 596641 C S 2 1 33.33% 1 1 65 40 0.98 1 1 2 0 0 1 G
-chr1 599263 G K 2 1 33.33% 1 1 60 38 0.98 1 1 2 0 0 1 T
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/test.wig
--- a/commons/core/parsing/test/data/test.wig Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,12 +0,0 @@
-fixedStep  chrom=chr1  start=11  step=1
-1.1
-1.2
-fixedStep  chrom=chr1  start=14  step=1
-1.4
-1.5
-variableStep chrom=chr1
-17  1.7
-19  1.9
-variableStep chrom=chrX
-5  9.5
-6  9.6
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/test1.wig
--- a/commons/core/parsing/test/data/test1.wig Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,11 +0,0 @@
-fixedStep  chrom=chr2  start=9  step=1
-0
-0
-1.1
-1.2
-fixedStep  chrom=chr2  start=14  step=1
-1.4
-1.5
-variableStep chrom=chr2
-17  1.7
-19  1.9
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/test2.wig
--- a/commons/core/parsing/test/data/test2.wig Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-fixedStep  chrom=chr3  start=14  step=1
-1.4
-1.5
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/testBedParser1.bed
--- a/commons/core/parsing/test/data/testBedParser1.bed Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-track name=reads description="Reads" useScore=0 visibility=full offset=0
-arm_X 1000 3000 test1.1 1000 + 1000 3000 0 2 100,1000, 0,1000,
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/testCoordsParser.coords
--- a/commons/core/parsing/test/data/testCoordsParser.coords Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,19 +0,0 @@
-/home/urgi/genome_alignment/data/banks/arabidopsis/lyrata/ara_lyra_sca1.fa /home/urgi/genome_alignment/data/banks/arabidopsis/thaliana/ara_thal_chr1.fa
-NUCMER
-
-    [S1]     [E1]  |     [S2]     [E2]  |  [LEN 1]  [LEN 2]  |  [% IDY]  | [TAGS]
-=====================================================================================
-       1     6251  |   421251   415029  |     6251     6223  |    89.03  | scaffold_1 gi|240254421:1-30427671
-    9127    11947  |   414945   412123  |     2821     2823  |    90.45  | scaffold_1 gi|240254421:1-30427671
-   12201    12953  |   411933   411173  |      753      761  |    82.56  | scaffold_1 gi|240254421:1-30427671
-   13086    20401  |   411034   403760  |     7316     7275  |    88.56  | scaffold_1 gi|240254421:1-30427671
-   20482    20686  |   403573   403369  |      205      205  |    94.66  | scaffold_1 gi|240254421:1-30427671
-   32288    32623  |   402639   402280  |      336      360  |    76.52  | scaffold_1 gi|240254421:1-30427671
-   32936    33572  |   401974   401308  |      637      667  |    79.80  | scaffold_1 gi|240254421:1-30427671
-   33748    35013  |   401256   400080  |     1266     1177  |    82.77  | scaffold_1 gi|240254421:1-30427671
-   35456    44084  |   399895   391566  |     8629     8330  |    86.23  | scaffold_1 gi|240254421:1-30427671
-   44401    45265  |   391569   390737  |      865      833  |    90.40  | scaffold_1 gi|240254421:1-30427671
-   45374    46243  |   390633   389755  |      870      879  |    71.70  | scaffold_1 gi|240254421:1-30427671
-   46366    48958  |   389607   387128  |     2593     2480  |    82.32  | scaffold_1 gi|240254421:1-30427671
-   55079    55160  |   369603   369683  |       82       81  |    93.90  | scaffold_1 gi|240254421:1-30427671
-   55407    56537  |   369910   371016  |     1131     1107  |    81.69  | scaffold_1 gi|240254421:1-30427671
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/testCoordsParser_showcoord.coords
--- a/commons/core/parsing/test/data/testCoordsParser_showcoord.coords Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,5 +0,0 @@
-/home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A1_scaffolds.fa /home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A2_scaffolds.fa
-NUCMER
-
-[S1] [E1] [S2] [E2] [LEN 1] [LEN 2] [% IDY] [LEN R] [LEN Q] [COV R] [COV Q] [FRM] [TAGS]
-296 2292 1 2001 1997 2001 98.30 175930 60273 1.14 3.32 1 1 mivi_sl_A1_scaffold00001 mivi_sl_A2_scaffold00003
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/testCoordsParser_showcoord_promer.coords
--- a/commons/core/parsing/test/data/testCoordsParser_showcoord_promer.coords Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,5 +0,0 @@
-/home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A1_scaffolds.fa /home/fungisex/Work2011_2012/Gnc/Compare_Genome/SLA1_SLA2/Mivi_sl_A2_scaffolds.fa
-PROMER
-
-[S1] [E1] [S2] [E2] [LEN 1] [LEN 2] [% IDY] [% SIM] [% STP] [LEN R] [LEN Q] [COV R] [COV Q] [FRM] [TAGS]
-1229    291    939    1    939    939    94.25    97.12    3.04    175930    60273    0.53    1.56    -3    -1    mivi_sl_A1_scaffold00001    mivi_sl_A2_scaffold00003
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/parsing/test/data/testGffParser1.gff3
--- a/commons/core/parsing/test/data/testGffParser1.gff3 Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,7 +0,0 @@
-arm_X test test_transcript 1000 2000 1 + . ID=id1-1;Name=test1;field=value1
-arm_X test test_exon 1000 2000 1 + . ID=id1-1-exon1;Name=test1-exon1;Parent=id1-1
-arm_X test test_transcript 10000 20000 1 - . ID=id2-1;Name=test2;field=value2
-arm_X test test_exon 10000 10100 1 - . ID=id2-1-exon1;Name=test2-exon1;Parent=id2-1
-arm_X test test_exon 10500 20000 1 - . ID=id2-1-exon2;Name=test2-exon2;Parent=id2-1
-arm_X test test_transcript 1000 2000 1 + . ID=test1.1-1;Name=test1.1
-arm_X test test_exon 1000 2000 1 + . ID=test1.1-1-exon1;Name=test1.1-exon1;Parent=test1.1-1
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/AlignedBioseqDB.py
--- a/commons/core/seq/AlignedBioseqDB.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,440 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import sys\n-from commons.core.seq.BioseqDB import BioseqDB\n-from commons.core.seq.Bioseq import Bioseq\n-from commons.core.coord.Align import Align\n-from commons.core.coord.Range import Range\n-from commons.core.stat.Stat import Stat\n-from math import log\n-\n-\n-## Multiple Sequence Alignment Representation   \n-#   \n-#\n-class AlignedBioseqDB( BioseqDB ):\n-    \n-    def __init__( self, name="" ):\n-        BioseqDB.__init__( self, name )\n-        seqLength = self.getLength()\n-        if self.getSize() > 1:\n-            for bs in self.db[1:]:\n-                if bs.getLength() != seqLength:\n-                    print "ERROR: aligned sequences have different length"\n-                    \n-                    \n-    ## Get length of the alignment\n-    # \n-    # @return length\n-    # @warning name before migration was \'length\'\n-    #\n-    def getLength( self ):\n-        length = 0\n-        if self.db != []:\n-            length = self.db[0].getLength()\n-        return length\n-    \n-    \n-    ## Get the true length of a given sequence (without gaps)\n-    #\n-    # @param header string header of the sequence to analyze\n-    # @return length integer\n-    # @warning  name before migration was \'true_length\'\n-    #\n-    def getSeqLengthWithoutGaps( self, header ):\n-        bs = self.fetch( header )\n-        count = 0\n-        for pos in xrange(0,len(bs.sequence)):\n-            if bs.sequence[pos] != "-":\n-                count += 1\n-        return count\n-    \n-    def cleanMSA( self ):\n-        #TODO: Refactoring\n-        """clean the MSA"""\n-        i2del = []\n-\n-        # for each sequence in the MSA\n-        for seqi in xrange(0,self.getSize()):\n-            if seqi in i2del:\n-                continue\n-            #define it as the reference\n-            ref = self.db[seqi].sequence\n-            refHeader = self.db[seqi].header\n-            # for each following sequence\n-            for seq_next in xrange(seqi+1,self.getSize()):\n-                if seq_next in i2del:\n-                    continue\n-                keep = 0\n-                # for each position along the MSA\n-                for posx in xrange(0,self.getLength()):\n-                    seq = self.db[seq_next].sequence\n-                    if seq[posx] != \'-\' and ref[posx] != \'-\':\n-                        keep = 1\n-                        break\n-                seqHeader = self.db[s'..b'urn 0.0\n-        else:\n-            freq = nbOcc / float(nbNt)\n-            return - freq * log(freq) / log(2) \n-        \n-        \n-    ## Save the multiple alignment as a matrix with \'0\' if gap, \'1\' otherwise\n-    #\n-    def saveAsBinaryMatrix( self, outFile ):\n-        outFileHandler = open( outFile, "w" )\n-        for bs in self.db:\n-            string = "%s" % ( bs.header )\n-            for nt in bs.sequence:\n-                if nt != "-":\n-                    string += "\\t%i" % ( 1 )\n-                else:\n-                    string += "\\t%i" % ( 0 )\n-            outFileHandler.write( "%s\\n" % ( string ) )\n-        outFileHandler.close()\n-        \n-        \n-    ## Return a list of Align instances corresponding to the aligned regions (without gaps)\n-    #\n-    # @param query string header of the sequence considered as query\n-    # @param subject string header of the sequence considered as subject\n-    #\n-    def getAlignList( self, query, subject ):\n-        lAligns = []\n-        alignQ = self.fetch( query ).sequence\n-        alignS = self.fetch( subject ).sequence\n-        createNewAlign = True\n-        indexAlign = 0\n-        indexQ = 0\n-        indexS = 0\n-        while indexAlign < len(alignQ):\n-            if alignQ[ indexAlign ] != "-" and alignS[ indexAlign ] != "-":\n-                indexQ += 1\n-                indexS += 1\n-                if createNewAlign:\n-                    iAlign = Align( Range( query, indexQ, indexQ ),\n-                                    Range( subject, indexS, indexS ),\n-                                    0,\n-                                    int( alignQ[ indexAlign ] == alignS[ indexAlign ] ),\n-                                    int( alignQ[ indexAlign ] == alignS[ indexAlign ] ) )\n-                    lAligns.append( iAlign )\n-                    createNewAlign = False\n-                else:\n-                    lAligns[-1].range_query.end += 1\n-                    lAligns[-1].range_subject.end += 1\n-                    lAligns[-1].score += int( alignQ[ indexAlign ] == alignS[ indexAlign ] )\n-                    lAligns[-1].identity += int( alignQ[ indexAlign ] == alignS[ indexAlign ] )\n-            else:\n-                if not createNewAlign:\n-                    lAligns[-1].identity = 100 * lAligns[-1].identity / lAligns[-1].getLengthOnQuery()\n-                    createNewAlign = True\n-                if alignQ[ indexAlign ] != "-":\n-                    indexQ += 1\n-                elif alignS[ indexAlign ] != "-":\n-                    indexS += 1\n-            indexAlign += 1\n-        if not createNewAlign:\n-            lAligns[-1].identity = 100 * lAligns[-1].identity / lAligns[-1].getLengthOnQuery()\n-        return lAligns\n-    \n-    \n-    def removeGaps(self):\n-        for iBs in self.db:\n-            iBs.removeSymbol( "-" )\n-    \n-    ## Compute mean per cent identity for MSA. \n-    # First sequence in MSA is considered as reference sequence. \n-    #\n-    #        \n-    def computeMeanPcentIdentity(self):\n-        seqRef = self.db[0]\n-        sumPcentIdentity = 0\n-\n-        for seq in self.db[1:]:\n-            pcentIdentity = self._computePcentIdentityBetweenSeqRefAndCurrentSeq(seqRef, seq) \n-            sumPcentIdentity = sumPcentIdentity + pcentIdentity\n-        \n-        nbSeq = len(self.db[1:])\n-        meanPcentIdentity = round (sumPcentIdentity/nbSeq)\n-        \n-        return meanPcentIdentity\n-\n-    def _computePcentIdentityBetweenSeqRefAndCurrentSeq(self, seqRef, seq):\n-            indexOnSeqRef = 0\n-            sumIdentity = 0\n-            for nuclSeq in seq.sequence:\n-                nuclRef = seqRef.sequence[indexOnSeqRef]\n-            \n-                if nuclRef != "-" and nuclRef == nuclSeq:\n-                    sumIdentity = sumIdentity + 1\n-                indexOnSeqRef = indexOnSeqRef + 1   \n-            \n-            return float(sumIdentity) / float(seqRef.getLength()) * 100       \n-\n- \n-\n-\n-    \n-    \n-    \n-    \n-    \n-    \n-    \n-    \n-    \n-    \n-\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/Bioseq.py
--- a/commons/core/seq/Bioseq.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,735 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import sys\n-import string\n-import re\n-import random\n-import cStringIO\n-from commons.core.coord.Map import Map\n-from commons.core.checker.RepetException import RepetException\n-\n-DNA_ALPHABET_WITH_N = set( [\'A\',\'T\',\'G\',\'C\',\'N\'] )\n-IUPAC = set([\'A\',\'T\',\'G\',\'C\',\'U\',\'R\',\'Y\',\'M\',\'K\',\'W\',\'S\',\'B\',\'D\',\'H\',\'V\',\'N\'])\n-\n-\n-## Record a sequence with its header\n-#\n-class Bioseq( object ):\n-    \n-    header = ""\n-    sequence = ""\n-    \n-    ## constructor\n-    #\n-    # @param name the header of sequence\n-    # @param seq sequence (DNA, RNA, protein)\n-    #\n-    def __init__( self, name="", seq="" ):\n-        self.header = name\n-        self.sequence = seq\n-        \n-        \n-    ## Equal operator\n-    #        \n-    def __eq__( self, o ):\n-        if self.header==o.header and self.sequence==o.sequence:\n-            return True\n-        return False\n-    \n-    \n-    ## overload __repr__\n-    #\n-    def __repr__( self ):\n-        return "%s;%s" % ( self.header, self.sequence )\n-    \n-    \n-    ## set attribute header\n-    #\n-    # @param header a string\n-    #\n-    def setHeader( self, header ):\n-        self.header = header\n-        \n-        \n-    ## get attribute header\n-    #\n-    # @return header\n-    def getHeader(self):\n-        return self.header\n-    \n-    \n-    ## set attribute sequence\n-    #\n-    # @param sequence a string\n-    #\n-    def setSequence( self, sequence ):\n-        self.sequence = sequence\n-        \n-        \n-    def getSequence(self):\n-        return self.sequence\n-        \n-    ## reset\n-    #\n-    def reset( self ):\n-        self.setHeader( "" )\n-        self.setSequence( "" )\n-        \n-        \n-    ## Test if bioseq is empty\n-    #\n-    def isEmpty( self ):\n-        return self.header == "" and self.sequence == ""\n-    \n-    \n-    ## Reverse the sequence\n-    #\n-    def reverse( self ):\n-        tmp = self.sequence\n-        self.sequence = tmp[::-1]\n-        \n-        \n-    ## Turn the sequence into its complement\n-    #  Force upper case letters\n-    #  @warning: old name in pyRepet.Bioseq realComplement\n-    #\n-    def complement( self ):\n-        complement = ""\n-        self.upCase()\n-        for i in xrange(0,len(self.sequence),1):\n-            if self.sequence[i] == "A":\n-                complement += "T"\n-            elif self.sequence[i] == "T":\n-                complement += "A"\n-            elif self.s'..b'etLMapWhithoutGap( self ):\n-        lMaps = []\n-        countSite = 1\n-        countSubseq = 1\n-        inGap = False\n-        startMap = -1\n-        endMap = -1\n-\n-        # initialize with the first site\n-        if self.sequence[0] == "-":\n-            inGap = True\n-        else:\n-            startMap = countSite\n-\n-        # for each remaining site\n-        for site in self.sequence[1:]:\n-            countSite += 1\n-\n-            # if it is a gap\n-            if site == "-":\n-\n-                # if this is the beginning of a gap, record the previous subsequence\n-                if inGap == False:\n-                    inGap = True\n-                    endMap = countSite - 1\n-                    lMaps.append( Map( "%s_subSeq%i" % (self.header,countSubseq), self.header, startMap, endMap ) )\n-                    countSubseq += 1\n-\n-            # if it is NOT a gap\n-            if site != "-":\n-\n-                # if it is the end of a gap, begin the next subsequence\n-                if inGap == True:\n-                    inGap = False\n-                    startMap = countSite\n-\n-                # if it is the last site\n-                if countSite == self.getLength():\n-                    endMap = countSite\n-                    lMaps.append( Map( "%s_subSeq%i" % (self.header,countSubseq), self.header, startMap, endMap ) )\n-\n-        return lMaps\n-    \n-    \n-    ## get the percentage of GC\n-    #\n-    # @return a percentage\n-    # \n-    def getGCpercentage( self ):\n-        tmpSeq = self.getSeqWithOnlyATGCN()\n-        nbGC = tmpSeq.count( "G" ) + tmpSeq.count( "C" )\n-        return 100 * nbGC / float( self.getLength() )\n-    \n-    ## get the percentage of GC of a sequence without counting N in sequence length\n-    #\n-    # @return a percentage\n-    # \n-    def getGCpercentageInSequenceWithoutCountNInLength(self):\n-        tmpSeq = self.getSeqWithOnlyATGCN()\n-        nbGC = tmpSeq.count( "G" ) + tmpSeq.count( "C" )\n-        return 100 * nbGC / float( self.getLength() - self.countNt("N") )\n-    \n-    ## get the 5 prime subsequence of a given length at the given position \n-    #\n-    # @param position integer\n-    # @param flankLength integer subsequence length\n-    # @return a sequence string\n-    # \n-    def get5PrimeFlank(self, position, flankLength):\n-        if(position == 1):\n-            return ""\n-        else:\n-            startOfFlank = 1\n-            endOfFlank = position -1\n-        \n-            if((position - flankLength) > 0):\n-                startOfFlank = position - flankLength\n-            else:\n-                startOfFlank = 1\n-            \n-            return self.subseq(startOfFlank, endOfFlank).sequence\n-            \n-            \n-    ## get the 3 prime subsequence of a given length at the given position \n-    #  In the case of indels, the polymorphism length can be specified\n-    #\n-    # @param position integer\n-    # @param flankLength integer subsequence length\n-    # @param polymLength integer polymorphism length\n-    # @return a sequence string\n-    # \n-    def get3PrimeFlank(self, position, flankLength, polymLength = 1):\n-        if((position + polymLength) > len( self.sequence )):\n-            return ""\n-        else:\n-            startOfFlank = position + polymLength\n-         \n-            if((position+polymLength+flankLength) > len( self.sequence )):\n-                endOfFlank =  len( self.sequence )\n-            else:\n-                endOfFlank =  position+polymLength+flankLength-1\n-        \n-            return self.subseq(startOfFlank, endOfFlank).sequence\n-    \n-    \n-    def _createWordList(self,size,l=[\'A\',\'T\',\'G\',\'C\']):\n-        if size == 1 :\n-            return l\n-        else:\n-            l2 = []\n-            for i in l:\n-                for j in [\'A\',\'T\',\'G\',\'C\']:\n-                    l2.append( i + j )\n-        return self._createWordList(size-1,l2)\n-    \n-    \n-    def removeSymbol( self, symbol ):\n-        tmp = self.sequence.replace( symbol, "" )\n-        self.sequence = tmp\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/Bioseq.pyc
b
Binary file commons/core/seq/Bioseq.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/BioseqDB.py
--- a/commons/core/seq/BioseqDB.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,461 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import sys\n-import re\n-from commons.core.seq.Bioseq import Bioseq\n-from commons.core.stat.Stat import Stat\n-\n-\n-## Handle a collection of a Bioseq (header-sequence) \n-#\n-class BioseqDB( object ):\n-    \n-    def __init__( self, name="" ):\n-        self.idx = {}\n-        self.idx_renamed = {}\n-        self.db = []\n-        self.name = name\n-        if name != "":\n-            faFile = open( name )\n-            self.read( faFile )\n-            faFile.close()\n-        self.mean_seq_lgth = None\n-        self.stat = Stat()\n-        \n-        \n-    ## Equal operator\n-    #\n-    def __eq__( self, o ):\n-        selfSize = self.getSize()\n-        if selfSize != o.getSize():\n-            return False\n-        nbEqualInstances = 0\n-        for i in self.db:\n-            atLeastOneIsEqual = False\n-            for j in o.db:\n-                if i == j:\n-                    atLeastOneIsEqual = True\n-                    continue\n-            if atLeastOneIsEqual:\n-                nbEqualInstances += 1\n-        if nbEqualInstances == selfSize:\n-            return True\n-        return False\n-    \n-    \n-    ## Change the name of the BioseqDB\n-    #\n-    # @param name the BioseqDB name\n-    # \n-    def setName(self, name):\n-        self.name = name\n-        \n-        \n-    ## Record each sequence of the input file as a list of Bioseq instances\n-    #\n-    # @param faFileHandler handler of a fasta file\n-    #\n-    def read( self, faFileHandler ):\n-        while True:\n-            seq = Bioseq()\n-            seq.read( faFileHandler )\n-            if seq.sequence == None:\n-                break\n-            self.add( seq )\n-            \n-            \n-    ## Write all Bioseq of BioseqDB in a formatted fasta file (60 character long)\n-    #\n-    # @param faFileHandler file handler of a fasta file\n-    #\n-    def write( self, faFileHandler ):\n-        for bs in self.db:\n-            bs.writeABioseqInAFastaFile( faFileHandler )\n-            \n-            \n-    ## Write all Bioseq of BioseqDB in a formatted fasta file (60 character long)\n-    #\n-    # @param outFaFileName file name of fasta file\n-    # @param mode \'write\' or \'append\'\n-    #\n-    def save( self, outFaFileName, mode="w" ):\n-        outFaFile = open( outFaFileName, mode )\n-        self.write( outFaFile )\n-        outFaFile.close()\n-        \n-        \n-    ## Read a formatted fasta file and l'..b'on of wished Bioseq header\n-    # @param inFileName name of fasta file in which we want extract the BioseqDB\n-    #\n-    def extractPatternOfFile(self, pattern, inFileName):\n-        if pattern=="" :\n-            return\n-        srch=re.compile(pattern)\n-        file_db=open(inFileName)\n-        numseq=0\n-        nbsave=0\n-        while 1:\n-            seq=Bioseq()\n-            seq.read(file_db)\n-            if seq.sequence==None:\n-                break\n-            numseq+=1\n-            m=srch.search(seq.header)\n-            if m:\n-                self.add(seq)\n-                nbsave+=1\n-        file_db.close()\n-        \n-        \n-    ## Extract a sub BioseqDB from the instance with all Bioseq header containing the specified pattern\n-    #\n-    # @param pattern regular expression of wished Bioseq header\n-    #\n-    # @return a BioseqDB\n-    #\n-    def getByPattern(self,pattern):\n-        if pattern=="" :\n-            return\n-        iBioseqDB=BioseqDB()\n-        srch=re.compile(pattern)\n-        for iBioseq in self.db:\n-            if srch.search(iBioseq.header):\n-                iBioseqDB.add(iBioseq)\n-        return iBioseqDB\n-    \n-    \n-    ## Extract a sub BioseqDB from the instance with all Bioseq header not containing the specified pattern\n-    #\n-    # @param pattern regular expression of not wished Bioseq header\n-    #\n-    # @return a BioseqDB\n-    #\n-    def getDiffFromPattern(self,pattern):\n-        if pattern=="" :\n-            return\n-        iBioseqDB=BioseqDB()\n-        srch=re.compile(pattern)\n-        for iBioseq in self.db:\n-            if not srch.search(iBioseq.header):\n-                iBioseqDB.add(iBioseq)\n-        return iBioseqDB\n-    \n-    #TODO: to run several times to remove all concerned sequences when big data. How to fix it ?\n-    ## Remove from the instance all Bioseq which header contains the specified pattern\n-    #\n-    # @param pattern regular expression of not wished Bioseq header\n-    #\n-    def rmByPattern(self,pattern):\n-        if pattern=="" :\n-            return\n-        srch=re.compile(pattern)\n-        for seq in self.db:\n-            if srch.search(seq.header):\n-                self.db.remove(seq)     \n-                \n-                \n-    ## Copy a part from another BioseqDB in the BioseqDB if Bioseq have got header containing the specified pattern\n-    # \n-    # @warning this method is called extractPattern in pyRepet.seq.BioseqDB\n-    #\n-    # @param pattern regular expression of wished Bioseq header\n-    # @param sourceBioseqDB the BioseqDB from which we want extract Bioseq\n-    #\n-    def addBioseqFromABioseqDBIfHeaderContainPattern(self, pattern, sourceBioseqDB):\n-        if pattern=="" :\n-            return\n-        srch=re.compile(pattern)\n-        for seq in sourceBioseqDB.db:\n-            m=srch.search(seq.header)\n-            if m:\n-                self.add(seq)   \n-                \n-                \n-    ## Up-case the sequence characters in all sequences\n-    # \n-    def upCase( self ):\n-        for bs in self.db:\n-            bs.upCase()\n-            \n-            \n-    ## Split each gapped Bioseq in a list and store all in a dictionary\n-    #\n-    # @return a dict, keys are bioseq headers, values are list of Map instances \n-    #\n-    def getDictOfLMapsWithoutGaps( self ):\n-        dSeq2Maps = {}\n-\n-        for bs in self.db:\n-            dSeq2Maps[ bs.header ] = bs.getLMapWhithoutGap()\n-\n-        return dSeq2Maps\n-\n-    ## Give the list of the sequence length in the bank\n-    #\n-    # @return an list\n-    #\n-    def getListOfSequencesLength( self ):\n-        lLength = []\n-        for iBioseq in self.db:\n-            lLength.append(iBioseq.getLength())\n-\n-        return lLength\n-    \n-    ## Return sequence length for a list of sequence header\n-    #\n-    def getSeqLengthByListOfName( self, lHeaderName ):\n-        lseqLength=[]\n-        for headerName in lHeaderName: \n-            lseqLength.append(self.getSeqLength( headerName ))\n-        return lseqLength\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/BioseqUtils.py
--- a/commons/core/seq/BioseqUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,296 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import math\n-import re\n-from commons.core.seq.Bioseq import Bioseq\n-\n-## Static methods for sequences manipulation\n-#\n-class BioseqUtils(object):\n-    \n-    ## Translate a nucleotide sequence\n-    #\n-    # @param bioSeqInstanceToTranslate a bioseq instance to translate\n-    # @param phase a integer : 1 (default), 2 or 3\n-    # \n-    def translateSequence(bioSeqInstanceToTranslate, phase=1):\n-        pep = ""\n-        #length = math.floor((len(self.sequence)-phase-1)/3)*3\n-        length = int( math.floor( ( len(bioSeqInstanceToTranslate.sequence )-( phase-1 ) )/3 )*3 )\n-        #We need capital letters !\n-        bioSeqInstanceToTranslate.upCase() \n-        sequence = bioSeqInstanceToTranslate.sequence                \n-        for i in xrange(phase-1,length,3):\n-            if (sequence[i:i+3] == "TTT" or sequence[i:i+3] == "TTC"):\n-                pep = pep + "F"\n-            elif ( sequence[i:i+3] == "TTA" or sequence[i:i+3] == "TTG" ):\n-                pep = pep + "L"\n-            elif ( sequence[i:i+2] == "CT" ):\n-                pep = pep + "L"\n-            elif ( sequence[i:i+3] == "ATT" or sequence[i:i+3] == "ATC" or sequence[i:i+3] == "ATA" ):\n-                pep = pep + "I"\n-            elif ( sequence[i:i+3] == "ATG" ):\n-                pep = pep + "M"\n-            elif ( sequence[i:i+2] == "GT" ):\n-                pep = pep + "V"\n-            elif ( sequence[i:i+2] == "TC" ) :\n-                pep = pep + "S"\n-            elif ( sequence[i:i+2] == "CC" ) :\n-                pep = pep + "P"\n-            elif ( sequence[i:i+2] == "AC" ) :\n-                pep = pep + "T"\n-            elif ( sequence[i:i+2] == "GC" ) :\n-                pep = pep + "A"\n-            elif ( sequence[i:i+3] == "TAT" or sequence[i:i+3] == "TAC" ) :\n-                pep = pep + "Y"\n-            elif ( sequence[i:i+3] == "TAA" or sequence[i:i+3] == "TAG" ) :\n-                pep = pep + "*"\n-            elif ( sequence[i:i+3] == "CAT" or sequence[i:i+3] == "CAC" ) :\n-                pep = pep + "H"\n-            elif ( sequence[i:i+3] == "CAA" or sequence[i:i+3] == "CAG" ) :\n-                pep = pep + "Q"\n-            elif ( sequence[i:i+3] == "AAT" or sequence[i:i+3] == "AAC" ) :\n-                pep = pep + "N"\n-            elif ( sequence[i:i+3] == "AAA" or sequence[i:i+3] == "AAG" ) :\n-                pep = pep + "K"\n-            elif ( se'..b'\n-    writeBioseqListIntoFastaFile = staticmethod( writeBioseqListIntoFastaFile )\n-    \n-    ## read in a fasta file and create a list of bioseq instances\n-    #\n-    # @param fileName string\n-    # @return a list of bioseq\n-    #\n-    def extractBioseqListFromFastaFile( fileName ):\n-        file = open( fileName )\n-        lBioseq = []\n-        currentHeader = ""\n-        while currentHeader != None:\n-            bioseq = Bioseq()\n-            bioseq.read(file)\n-            currentHeader = bioseq.header\n-            if currentHeader != None:\n-                lBioseq.append(bioseq)\n-        return lBioseq\n-    \n-    extractBioseqListFromFastaFile = staticmethod( extractBioseqListFromFastaFile )\n-    \n-    ## Give the length of a sequence search by name\n-    #\n-    # @param lBioseq a list of bioseq instances\n-    # @param seqName string\n-    # @return an integer\n-    #\n-    def getSeqLengthWithSeqName( lBioseq, seqName ):\n-        length = 0\n-        for bioseq in lBioseq:\n-            if bioseq.header == seqName:\n-                length = bioseq.getLength()\n-                break        \n-        return length\n-\n-    getSeqLengthWithSeqName = staticmethod( getSeqLengthWithSeqName )\n-\n-    def _translateInPositiveFrames( bioSeqInstanceToTranslate ):\n-        seq1 = bioSeqInstanceToTranslate.copyBioseqInstance()\n-        BioseqUtils.setFrameInfoOnHeader(seq1, 1)\n-        BioseqUtils.translateSequence(seq1, 1)\n-        seq2 = bioSeqInstanceToTranslate.copyBioseqInstance()\n-        BioseqUtils.setFrameInfoOnHeader(seq2, 2)\n-        BioseqUtils.translateSequence(seq2, 2)\n-        seq3 = bioSeqInstanceToTranslate.copyBioseqInstance()\n-        BioseqUtils.setFrameInfoOnHeader(seq3, 3)\n-        BioseqUtils.translateSequence(seq3, 3)\n-        return [seq1, seq2, seq3]\n-    \n-    _translateInPositiveFrames = staticmethod( _translateInPositiveFrames )\n-    \n-    def _translateInNegativeFrames(bioSeqInstanceToTranslate):\n-        seq4 = bioSeqInstanceToTranslate.copyBioseqInstance()\n-        seq4.reverseComplement()\n-        BioseqUtils.setFrameInfoOnHeader(seq4, 4)\n-        BioseqUtils.translateSequence(seq4, 1)\n-        seq5 = bioSeqInstanceToTranslate.copyBioseqInstance()\n-        seq5.reverseComplement()\n-        BioseqUtils.setFrameInfoOnHeader(seq5, 5)\n-        BioseqUtils.translateSequence(seq5, 2)\n-        seq6 = bioSeqInstanceToTranslate.copyBioseqInstance()\n-        seq6.reverseComplement()\n-        BioseqUtils.setFrameInfoOnHeader(seq6, 6)\n-        BioseqUtils.translateSequence(seq6, 3)\n-        return [seq4, seq5, seq6]\n-    \n-    _translateInNegativeFrames = staticmethod( _translateInNegativeFrames )\n-    \n-    \n-    ## Return a dictionary which keys are sequence headers and values sequence lengths.\n-    #\n-    def getLengthPerSeqFromFile( inFile ):\n-        dHeader2Length = {}\n-        inFileHandler = open( inFile, "r" )\n-        while True:\n-            iBs = Bioseq()\n-            iBs.read( inFileHandler )\n-            if iBs.sequence == None:\n-                break\n-            dHeader2Length[ iBs.header ] = iBs.getLength()\n-        inFileHandler.close()\n-        return dHeader2Length\n-    \n-    getLengthPerSeqFromFile = staticmethod( getLengthPerSeqFromFile )\n-    \n-    \n-    ## Return the list of Bioseq instances, these being sorted in decreasing length\n-    #\n-    def getBioseqListSortedByDecreasingLength( lBioseqs ):\n-        return sorted( lBioseqs, key=lambda iBs: ( iBs.getLength() ), reverse=True )\n-    \n-    getBioseqListSortedByDecreasingLength = staticmethod( getBioseqListSortedByDecreasingLength )\n-    \n-    \n-    ## Return the list of Bioseq instances, these being sorted in decreasing length (without gaps)\n-    #\n-    def getBioseqListSortedByDecreasingLengthWithoutGaps( lBioseqs ):\n-        return sorted( lBioseqs, key=lambda iBs: ( len(iBs.sequence.replace("-","")) ), reverse=True )\n-    \n-    getBioseqListSortedByDecreasingLengthWithoutGaps = staticmethod( getBioseqListSortedByDecreasingLengthWithoutGaps )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/ClusterConsensusCollection.py
--- a/commons/core/seq/ClusterConsensusCollection.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,66 +0,0 @@
-import re
-from commons.core.seq.BioseqDB import BioseqDB
-
-## Record a collection of bioseqDB representing cluster consensus
-#
-class ClusterConsensusCollection(object):
-
-    ## constructor
-    #
-    # @param clusterFileName string name of file containing the cluster of consensus
-    #
-    def __init__(self, clusterFileName):
-        self._clusterFileName = clusterFileName
-        self._lClusterConsensus = []
-
-    def __eq__(self, o):
-        return self._clusterFileName == o._clusterFileName and self._lClusterConsensus == o._lClusterConsensus
-
-    def getLClusterConsensus(self):
-        return self._lClusterConsensus
-    
-    def fillCollection(self):
-        iBioseqDBAllCluster = BioseqDB()
-        fClusterFile = open(self._clusterFileName, "r")
-        iBioseqDBAllCluster.read(fClusterFile)
-        fClusterFile.close()
-        lHeader = iBioseqDBAllCluster.getHeaderList()
-        firstHeader = lHeader[0]
-        previousClusterName, seqHeader = self._getClusterNameAndSeqHeader(firstHeader)
-        clusterConsensus = BioseqDB()
-        clusterConsensus.setName(previousClusterName)
-        self._addBioseqInClusterConsensus(iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus)
-        for header in lHeader[1:]:
-            clusterName, seqHeader = self._getClusterNameAndSeqHeader(header)
-            if clusterName != previousClusterName:
-                self._lClusterConsensus.append(clusterConsensus)
-                previousClusterName = clusterName
-                clusterConsensus = BioseqDB()
-                clusterConsensus.setName(previousClusterName)
-            self._addBioseqInClusterConsensus(iBioseqDBAllCluster, header, seqHeader, clusterConsensus)
-        self._lClusterConsensus.append(clusterConsensus)
-                
-    def _getClusterNameAndSeqHeader(self, header):
-        m = re.match("(\D*)(\d+)Mb\d+\s.*", header)
-        clusterNumber = m.group(2)
-        clusterName = m.group(1) + clusterNumber
-        lPartsHeaderheader = header.split(" ")
-        seqHeader = lPartsHeaderheader[1]
-        return clusterName, seqHeader
-
-    def _addBioseqInClusterConsensus(self, iBioseqDBAllCluster, firstHeader, seqHeader, clusterConsensus):
-        ibioseq = iBioseqDBAllCluster.fetch(firstHeader)
-        ibioseq.setHeader(seqHeader)
-        clusterConsensus.add(ibioseq)
-        
-    def getNumClusterForAConsensus(self, seqName):
-        nbCluster = 1
-        for bioseqDB in self._lClusterConsensus:
-            if seqName in bioseqDB.getHeaderList():
-                return nbCluster
-            nbCluster += 1
-            
-    def getNumConsensusInCluster(self, numCluster):
-        return self._lClusterConsensus[numCluster - 1].getSize()
-
-    
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/FastaUtils.py
--- a/commons/core/seq/FastaUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1197 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import os\n-import sys\n-import string\n-import math\n-import shutil\n-import re\n-import glob\n-from operator import itemgetter\n-from commons.core.seq.BioseqDB import BioseqDB\n-from commons.core.seq.Bioseq import Bioseq\n-from commons.core.coord.MapUtils import MapUtils\n-from commons.core.coord.Range import Range\n-from commons.core.checker.CheckerUtils import CheckerUtils\n-from commons.core.launcher.LauncherUtils import LauncherUtils\n-from commons.core.coord.ConvCoord import ConvCoord\n-from commons.core.parsing.FastaParser import FastaParser\n-\n-\n-## Static methods for fasta file manipulation\n-#\n-class FastaUtils( object ):\n-    \n-    ## Count the number of sequences in the input fasta file\n-    #\n-    # @param inFile name of the input fasta file\n-    #\n-    # @return integer number of sequences in the input fasta file\n-    #\n-    @staticmethod\n-    def dbSize( inFile ):\n-        nbSeq = 0\n-        inFileHandler = open( inFile, "r" )\n-        line = inFileHandler.readline()\n-        while line:\n-            if line[0] == ">":\n-                nbSeq = nbSeq + 1\n-            line = inFileHandler.readline()\n-        inFileHandler.close()\n-        \n-        return nbSeq\n-    \n-    \n-    ## Compute the cumulative sequence length in the input fasta file\n-    #\n-    # @param inFile handler of the input fasta file\n-    #\n-    @staticmethod\n-    def dbCumLength( inFile ):\n-        cumLength = 0\n-        line = inFile.readline()\n-        while line:\n-            if line[0] != ">":\n-                cumLength += len(string.rstrip(line))\n-            line = inFile.readline()\n-    \n-        return cumLength\n-    \n-    \n-    ## Return a list with the length of each sequence in the input fasta file\n-    #\n-    # @param inFile string name of the input fasta file\n-    #\n-    @staticmethod\n-    def dbLengths( inFile ):\n-        lLengths = []\n-        inFileHandler = open( inFile, "r" )\n-        currentLength = 0\n-        line = inFileHandler.readline()\n-        while line:\n-            if line[0] == ">":\n-                if currentLength != 0:\n-                    lLengths.append( currentLength )\n-                currentLength = 0\n-            else:\n-                currentLength += len(line[:-1])\n-            line = inFileHandler.readline()\n-        lLengths.append( currentLength )\n-        inFileHandler.close()\n-        return lLengths\n-    \n-    \n-   '..b'f:\n-            line = f.readline()\n-            while line:\n-                lineWithoutLastChar = line.rstrip()\n-                lHeaders = lineWithoutLastChar.split("\\t")\n-                clusterId += 1\n-                if verbosity > 0:\n-                    print "%i sequences in cluster %i" % (len(lHeaders), clusterId)\n-                memberId = 0\n-                for header in lHeaders:\n-                    memberId += 1\n-                    dHeader2ClusterClusterMember[header] = (clusterId, memberId)\n-                line = f.readline()\n-            if verbosity > 0:\n-                print "%i clusters" % clusterId\n-        return dHeader2ClusterClusterMember, clusterId\n-    \n-    @staticmethod\n-    def convertClusteredFastaFileToMapFile(fastaFileNameFromClustering, outMapFileName = ""):\n-        """\n-        Write a map file from fasta output of clustering tool.\n-        Warning: only works if input fasta headers are formated like LTRharvest fasta output.\n-        """\n-        if not outMapFileName:\n-            outMapFileName = "%s.map" % (os.path.splitext(fastaFileNameFromClustering)[0])\n-        \n-        fileDb = open(fastaFileNameFromClustering , "r")\n-        fileMap = open(outMapFileName, "w")\n-        seq = Bioseq()\n-        numseq = 0\n-        while 1:\n-            seq.read(fileDb)\n-            if seq.sequence == None:\n-                break\n-            numseq = numseq + 1\n-            ID = seq.header.split(\' \')[0].split(\'_\')[0]\n-            chunk = seq.header.split(\' \')[0].split(\'_\')[1]\n-            start = seq.header.split(\' \')[-1].split(\',\')[0][1:]\n-            end = seq.header.split(\' \')[-1].split(\',\')[1][:-1]\n-            line = \'%s\\t%s\\t%s\\t%s\' % (ID, chunk, start, end)\n-            fileMap.write(line + "\\n")\n-    \n-        fileDb.close()\n-        fileMap.close()\n-        print "saved in %s" % outMapFileName\n-\n-    @staticmethod\n-    def writeNstreches(fastaFileName, nbN = 2, outFileName = "", outFormat = "map"):\n-        outFormat = outFormat.lower()\n-        if outFormat in ["gff", "gff3"]:\n-            outFormat = "gff3"\n-        else:\n-            outFormat = "map"\n-            \n-        lTNstretches = []\n-        if nbN != 0:\n-            iBSDB = BioseqDB(fastaFileName)\n-            for iBS in iBSDB.db:\n-                nbNFound = 0\n-                start = 1\n-                pos = 1\n-                lastPos = 0\n-                \n-                while pos <= iBS.getLength():\n-                    if nbNFound == 0:\n-                        start = pos\n-                        \n-                    while pos <= iBS.getLength() and iBS.getNtFromPosition(pos).lower() in [\'n\', \'x\']:\n-                        nbNFound += 1\n-                        pos += 1\n-                        lastPos = pos\n-                    \n-                    if pos - lastPos >= nbN:\n-                        if nbNFound >= nbN:\n-                            lTNstretches.append((iBS.getHeader(), start, lastPos - 1))\n-                        nbNFound = 0\n-                    pos += 1\n-                \n-                if nbNFound >= nbN:\n-                    lTNstretches.append((iBS.getHeader(), start, lastPos - 1))\n-    \n-            lTNstretches.sort(key = itemgetter(0, 1, 2))\n-        \n-        if outFileName == "":\n-            outFileName = "%s_Nstretches.%s" % (os.path.splitext(os.path.split(fastaFileName)[1])[0], outFormat)\n-        \n-        with open(outFileName, "w") as fH:\n-            if outFormat == "gff3":\n-                fH.write("##gff-version 3\\n")\n-            for item in lTNstretches:\n-                seq = item[0]\n-                start = item[1]\n-                end = item[2]\n-                if outFormat == "gff3":\n-                    fH.write("%s\\tFastaUtils\\tN_stretch\\t%s\\t%s\\t.\\t.\\t.\\tName=N_stretch_%s-%s\\n" % (seq, start, end, start, end))\n-                else:\n-                    fH.write("N_stretch\\t%s\\t%s\\t%s\\n" % (seq, start, end))\n-                \n-                \n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/__init__.pyc
b
Binary file commons/core/seq/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/test/TestClusterConsensusCollection.py
--- a/commons/core/seq/test/TestClusterConsensusCollection.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,67 +0,0 @@
-import unittest
-import os
-from commons.core.seq.ClusterConsensusCollection import ClusterConsensusCollection
-from commons.core.seq.Bioseq import Bioseq
-from commons.core.seq.BioseqDB import BioseqDB
-
-class TestClusterConsensusCollection(unittest.TestCase):
-
-    def setUp(self):
-        self._clusterSequencesFileName = "clusterSequences.fa"
-        self._ClusterConsensusCollection = ClusterConsensusCollection(self._clusterSequencesFileName)
-        self._createClusterConsensusFile()
-
-    def tearDown(self):
-        os.remove(self._clusterSequencesFileName)
-        
-    def test_fillCollection(self):
-        expClusterConsensusCollection = ClusterConsensusCollection(self._clusterSequencesFileName)
-        expClusterConsensusCollection._clusterFileName = self._clusterSequencesFileName
-        bioseq1 = Bioseq("seq1", "ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT")
-        bioseq2 = Bioseq("seq2", "ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT")
-        bioseq3 = Bioseq("seq3", "ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT")
-        iBioseqDB1 = BioseqDB()
-        iBioseqDB2 = BioseqDB()
-        iBioseqDB1.setData([bioseq1, bioseq2])
-        iBioseqDB2.setData([bioseq3])
-        expClusterConsensusCollection._lClusterConsensus = [iBioseqDB1, iBioseqDB2]
-        self._ClusterConsensusCollection.fillCollection()
-        self.assertEqual(expClusterConsensusCollection, self._ClusterConsensusCollection)
-        
-    def test_getNumClusterForAConsensus_for_seq2(self):
-        self._ClusterConsensusCollection.fillCollection()
-        expClusterNumber = 1
-        obsClusterNumber = self._ClusterConsensusCollection.getNumClusterForAConsensus ("seq2")
-        self.assertEqual(expClusterNumber, obsClusterNumber)
-        
-    def test_getNumClusterForAConsensus_for_seq3(self):
-        self._ClusterConsensusCollection.fillCollection()
-        expClusterNumber = 2
-        obsClusterNumber = self._ClusterConsensusCollection.getNumClusterForAConsensus ("seq3")
-        self.assertEqual(expClusterNumber, obsClusterNumber)
-        
-    def test_getNumConsensusInCluster_1(self):
-        self._ClusterConsensusCollection.fillCollection()
-        expConsensusNumber = 2
-        obsConsensusNumber = self._ClusterConsensusCollection.getNumConsensusInCluster (1)
-        self.assertEqual(expConsensusNumber, obsConsensusNumber)
-        
-    def test_getNumConsensusInCluster_2(self):
-        self._ClusterConsensusCollection.fillCollection()
-        expConsensusNumber = 1
-        obsConsensusNumber = self._ClusterConsensusCollection.getNumConsensusInCluster (2)
-        self.assertEqual(expConsensusNumber, obsConsensusNumber)
-    
-    def _createClusterConsensusFile(self):
-        fCluster = open(self._clusterSequencesFileName, "w")
-        fCluster.write(">BlastclustCluster1Mb1 seq1\n")
-        fCluster.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\n")
-        fCluster.write(">BlastclustCluster1Mb2 seq2\n")
-        fCluster.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\n")
-        fCluster.write(">BlasterGrouperCluster3Mb1 seq3\n")
-        fCluster.write("ACCAAAGACACTAGAATAACAAGATGCGTAACGCCATACGATTTTTTGGCACACTATTTT\n")
-        fCluster.close()
-
-
-if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/test/TestSuite_seq.py
--- a/commons/core/seq/test/TestSuite_seq.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-import unittest
-import sys
-import Test_AlignedBioseqDB
-import Test_Bioseq
-import Test_BioseqDB
-import Test_BioseqUtils
-import Test_FastaUtils
-
-
-def main():
-    
-        TestSuite_seq = unittest.TestSuite()
-        
-        TestSuite_seq.addTest( unittest.makeSuite( Test_AlignedBioseqDB.Test_AlignedBioseqDB, "test" ) )
-        TestSuite_seq.addTest( unittest.makeSuite( Test_Bioseq.Test_Bioseq, "test" ) )
-        TestSuite_seq.addTest( unittest.makeSuite( Test_BioseqDB.Test_BioseqDB, "test" ) )
-        TestSuite_seq.addTest( unittest.makeSuite( Test_BioseqUtils.Test_BioseqUtils, "test" ) )
-        TestSuite_seq.addTest( unittest.makeSuite( Test_FastaUtils.Test_FastaUtils, "test" ) )
-        
-        runner = unittest.TextTestRunner( sys.stderr, 2, 2 )
-        runner.run( TestSuite_seq )
-
-      
-if __name__ == "__main__":
-    main()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/test/Test_AlignedBioseqDB.py
--- a/commons/core/seq/test/Test_AlignedBioseqDB.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,773 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import sys\n-import os\n-import time\n-from commons.core.seq.AlignedBioseqDB import AlignedBioseqDB\n-from commons.core.seq.Bioseq import Bioseq\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.coord.Align import Align\n-from commons.core.coord.Range import Range\n-from commons.core.stat.Stat import Stat\n-\n-\n-class Test_AlignedBioseqDB( unittest.TestCase ):\n-    \n-    def setUp( self ):\n-        self._i = AlignedBioseqDB()\n-        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n-        \n-        \n-    def tearDown( self ):\n-        self._i = None\n-        self._uniqId = ""\n-        \n-        \n-    def test_getLength(self):\n-        iAlignedBioseqDB = AlignedBioseqDB()\n-\n-        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        iAlignedBioseqDB.setData([iBioseq1])\n-        \n-        expLenght = 29\n-        obsLength = iAlignedBioseqDB.getLength() \n-\n-        self.assertEquals(expLenght, obsLength)\n-        \n-        \n-    def test_getSeqLengthWithoutGaps( self ):\n-        iAlignedBioseqDB = AlignedBioseqDB()\n-        iAlignedBioseqDB.add( Bioseq( "seq3",\n-                                      "AGCG-GACGATGCAGCAT--GCGAATGA--CGAT" ) )\n-        expLenght = 29\n-        obsLength = iAlignedBioseqDB.getSeqLengthWithoutGaps( "seq3" )\n-        \n-        self.assertEquals(expLenght, obsLength)\n-        \n-        \n-    def test_getListOccPerSite(self):\n-        iBioseq1 = Bioseq( "seq1", "AGAAA")\n-        iBioseq2 = Bioseq( "seq2", "TCAAG")\n-        iBioseq3 = Bioseq( "seq3", "GGTAC")\n-        iBioseq4 = Bioseq( "seq4", "CCTTA")\n-        \n-        iAlignedBioseqDB = AlignedBioseqDB()\n-        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3, iBioseq4])\n-\n-        expList = [\n-                \n-                {"A":1, "T":1, "G":1, "C":1},\n-\n-                {"G":2, "C":2},\n-                   \n-                {"A":2, "T":2 },\n-                \n-                {"A":3, "T":1 },   \n-                \n-                {"A":2, "G":1, "C":1}\n-            ]\n-                \n-        obsList = iAlignedBioseqDB.getListOccPerSite()\n-       \n-        self.assertEquals(expList, obsList)\n-        \n-        \n-    def test_getListOccPerSite_with_none_sequence(self):\n-        iBioseq1 = Bioseq( "seq1", "AGAAA")\n-        iBioseq2 = Bioseq( "seq2", "TCAAG")\n-        iBi'..b'\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------\n-#>BlastclustCluster2Mb2_chunk7 (dbseq-nr 1) [99136,100579]\n-#GTAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA\n-#ATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATA\n-#ATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATC\n-#ATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATA\n-#ATCATAATAATCATAATAATCATAATAATCATAATAATAATAATAATCATAATCATAATC\n-#ATAATAAGCGATAAAAAAATTAAAAAATAAAAATTAAAACCCACTGCAATCACGTTGGAC\n-#GGCGAGTCACAGACGTCAGAATAGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCA\n-#AGAAGGTTTTTATTGAACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATC\n-#ATAAT---AATCATAATAATCATAATAATCATAATAATCATAATAAT-------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#-----------------------------------------------CATA-ATAATCAT\n-#AATAAT--CATAATAATCATA-ATAATCATAATAATCATAATAATCATAATAATCATAAT\n-#AATCATAATAATCATAATAATCATAA----TAATCATAATAATCATAATAATCATAATAA\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------TCATAA-TAATCATAATAATCGTAA---TAATCATAA----TAATCATAATAAT\n-#CATAATAATCATAA-TAAT----CAT-----AATAATCAT-----AATAATCATAATAAT\n-#CATAATAATCATAATAATCATAATAATCATAATAATCATAAT-AA-TCAT--AA--TAAT\n-#-----CATAATAATCATAATAA--TCA----TAATAATC---AT---AATAATCATAATA\n-#-AT---CATAATAATCATAATAATC-----------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#-----------------------------------ATAATAATCATAAT-AATCA-----\n-#TAATAA------TCATAAT----AATCATAAT-AATCATAATAA-TCA-TAATAATCATA\n-#ATAATCATAATAATCATAATAATAATAATAATCATAATCATAATCATAATAAGCATAAAA\n-#AAAT--------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#------------------------------------------------------------\n-#TAAAAAATAAAAATTAAAACCCACTGCAA---TCACGTTGGACGGCGAGTCACAGACGTC\n-#A-GAAT-AGTGGTGCGTAAATCCAACGCCGAGAAGAATTACTTCAAGAAGGTTTTTATTG\n-#AACTTCTTTATTCGGATATCAGTTTAAGACTAAAAATTAATAATCATAAT---AATCATA\n-#ATAA---TCA-TAATAATCAT-AATAATCATAATAATCATAA-----TAA-TCATA-ATA\n-#ATCATAATAATCATAATAA--TCATAATA-ATCA-TAATAATCATAATAATCATAATCAT\n-#CATAATAATCATAATAAT--CATAA-T-------AATC--ATAATAATCATAATAATCAT\n-#AATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAAT\n-#CATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAATAATCATAAT\n-#AATCATAATAAT\n-\n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_AlignedBioseqDB ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/test/Test_Bioseq.py
--- a/commons/core/seq/test/Test_Bioseq.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1051 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-import sys\n-from commons.core.seq.Bioseq import Bioseq \n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.coord.Map import Map\n-from commons.core.checker.RepetException import RepetException\n-\n-\n-class Test_Bioseq( unittest.TestCase ):\n-    \n-    def setUp(self):\n-        self._bs = Bioseq()\n-\n-\n-    def test_isEmpty_True(self):\n-        self._bs.setHeader( "" )\n-        self._bs.setSequence( "" )\n-        exp = True\n-        obs = self._bs.isEmpty()\n-        self.assertEquals( exp, obs )\n-\n-        \n-    def test_isEmpty_False(self):\n-        self._bs.setHeader( "seq1" )\n-        self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        exp = False\n-        obs = self._bs.isEmpty()\n-        self.assertEquals( exp, obs )\n-        \n-        \n-    def test___eq__(self):\n-        self._bs.setHeader( "seq1" )\n-        self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        self.assertEquals( self._bs, obs )\n-        \n-        \n-    def test___ne__Header(self):\n-        self._bs.setHeader( "seq2" )\n-        self._bs.setSequence( "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        self.assertNotEquals( self._bs, obs )\n-        \n-        \n-    def test___ne__Sequence(self):\n-        self._bs.setHeader( "seq1" )\n-        self._bs.setSequence( "GGACGATGCAGCATGCGAATGACGAT" )\n-        obs = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        self.assertNotEquals( self._bs, obs )\n-        \n-        \n-    def test_reverse(self):\n-        self._bs.setHeader( "seq1" )\n-        self._bs.setSequence( "TGCGGA" )\n-        exp = "AGGCGT"\n-        self._bs.reverse()\n-        obs = self._bs.sequence\n-        self.assertEqual( obs, exp )\n-        \n-        \n-    def test_complement(self):\n-        self._bs.setHeader( "seq1" )\n-        self._bs.setSequence( "TGCGGA" )\n-        exp = "ACGCCT"\n-        self._bs.complement()\n-        obs = self._bs.sequence\n-        self.assertEqual( obs, exp )\n-        \n-        \n-    def test_complement_with_unknown_symbol(self):\n-        self._bs.setHeader( "seq1" )\n-        self._bs.setSequence( "TGCGGAFMRWTYSKVHDBN" )\n-        exp = "ACGCCTNKYWARSMBDHVN"\n-        self._bs.complement()\n-        obs = self._bs.sequence\n-        '..b'       bioseq = Bioseq()\n-        bioseq.sequence = "ATGCNRATGCN\\rATGCAAT\\rTATA\\r"\n-        bioseq.checkEOF()\n-        obsSequence = bioseq.sequence\n-        expSequence = "ATGCNRATGCNATGCAATTATA"\n-        \n-        self.assertEquals(expSequence, obsSequence)\n-        \n-        \n-    def test_getLMapWhithoutGap(self):\n-        iBioseq = Bioseq()\n-        iBioseq.header = "header"\n-        iBioseq.sequence = "ATGC-RA-GCT"\n-        obsLMap = iBioseq.getLMapWhithoutGap()\n-        expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]\n-        \n-        self.assertEquals(expLMap, obsLMap)\n-        \n-        \n-    def test_getLMapWhithoutGap_seqStartsWithGap(self):\n-        iBioseq = Bioseq()\n-        iBioseq.header = "header"\n-        iBioseq.sequence = "-TGC-RA-GCT"\n-        obsLMap = iBioseq.getLMapWhithoutGap()\n-        expLMap = [Map( "header_subSeq1", "header", 2, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 11 )]\n-        \n-        self.assertEquals(expLMap, obsLMap)\n-        \n-        \n-    def test_getLMapWhithoutGap_seqEndsWithGap(self):\n-        iBioseq = Bioseq()\n-        iBioseq.header = "header"\n-        iBioseq.sequence = "ATGC-RA-GC-"\n-        obsLMap = iBioseq.getLMapWhithoutGap()\n-        expLMap = [Map( "header_subSeq1", "header", 1, 4 ), Map( "header_subSeq2", "header", 6, 7 ), Map( "header_subSeq3", "header", 9, 10 )]\n-        \n-        self.assertEquals(expLMap, obsLMap)\n-        \n-    def test_getGCpercentage_onlyATGC( self ):\n-        iBs = Bioseq( "seq", "TGCAGCT" )\n-        exp = 100 * 4 / 7.0\n-        obs = iBs.getGCpercentage()\n-        self.assertEqual( exp, obs )\n-        \n-    def test_getGCpercentageInSequenceWithoutCountNInLength( self ):\n-        iBs = Bioseq( "seq", "TGCAGCTNNNNN" )\n-        exp = 100 * 4 / 7.0\n-        obs = iBs.getGCpercentageInSequenceWithoutCountNInLength()\n-        self.assertEqual( exp, obs )    \n-        \n-    def test_get5PrimeFlank(self):\n-        bs = Bioseq( "line1", "AACTTTCCAGAA" )\n-        position = 7\n-        obsFlank = bs.get5PrimeFlank(position, 3)\n-        expFlank = "TTT"\n-        self.assertEquals(expFlank, obsFlank)\n-        \n-    def test_get5PrimeFlank_flank_length_truncated(self):\n-        bs = Bioseq( "line1", "AACTTTCCAGAA" )\n-        position = 7\n-        obsFlank = bs.get5PrimeFlank(position, 15)\n-        expFlank = "AACTTT"\n-        self.assertEquals(expFlank, obsFlank)\n-        \n-    def test_get5PrimeFlank_flank_of_first_base(self):\n-        bs = Bioseq( "line1", "AACTTTCCAGAA" )\n-        position = 1\n-        obsFlank = bs.get5PrimeFlank(position, 15)\n-        expFlank = ""\n-        self.assertEquals(expFlank, obsFlank)                \n-   \n-    def test_get3PrimeFlank(self):\n-        bs = Bioseq( "line1", "AACTTTCCAGAA" )\n-        position = 7\n-        obsFlank = bs.get3PrimeFlank(position, 3)\n-        expFlank = "CAG"\n-        self.assertEquals(expFlank, obsFlank)\n-        \n-    def test_get3PrimeFlank_flank_length_truncated(self):\n-        bs = Bioseq( "line1", "AACTTTCCAGAA" )\n-        position = 7\n-        obsFlank = bs.get3PrimeFlank(position, 15)\n-        expFlank = "CAGAA"\n-        self.assertEquals(expFlank, obsFlank)\n-        \n-    def test_get3PrimeFlank_flank_of_last_base(self):\n-        bs = Bioseq( "line1", "AACTTTCCAGAA" )\n-        position = 12\n-        obsFlank = bs.get3PrimeFlank(position, 15)\n-        expFlank = ""\n-        self.assertEquals(expFlank, obsFlank)\n-        \n-    def test_get3PrimeFlank_polymLength_different_of_1(self):\n-        bs = Bioseq( "line1", "AACTTTCCAGAA" )\n-        position = 7\n-        obsFlank = bs.get3PrimeFlank(position, 3, 2)\n-        expFlank = "AGA"\n-        self.assertEquals(expFlank, obsFlank) \n-        \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_Bioseq ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/test/Test_BioseqDB.py
--- a/commons/core/seq/test/Test_BioseqDB.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,974 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-import time\n-from commons.core.seq.BioseqDB import BioseqDB\n-from commons.core.seq.Bioseq import Bioseq\n-from commons.core.utils.FileUtils import FileUtils\n-from commons.core.coord.Map import Map\n-\n-\n-class Test_BioseqDB( unittest.TestCase ):\n-    \n-    def setUp( self ):\n-        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S") , os.getpid() )\n-        \n-        \n-    def tearDown( self ):\n-        if os._exists("dummyBioseqDB.fa"):\n-            os.remove("dummyBioseqDB.fa")\n-            \n-            \n-    def test__eq__(self):\n-        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n-        expBioseqDB = BioseqDB()\n-        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n-        \n-        iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n-        obsBioseqDB = BioseqDB()\n-        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n-        \n-        self.assertEquals( expBioseqDB, obsBioseqDB )\n-        \n-        \n-    def test__eq__instances_with_different_header(self):\n-        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n-        expBioseqDB = BioseqDB()\n-        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n-        \n-        iBioseq3 = Bioseq( "seq3", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        iBioseq4 = Bioseq( "seq4", "GCGATGCGATCGATGCGATAGCA" )\n-        obsBioseqDB = BioseqDB()\n-        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n-        \n-        self.assertNotEquals( expBioseqDB, obsBioseqDB )\n-        \n-        \n-    def test__eq__instances_with_different_sequences(self):\n-        iBioseq1 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        iBioseq2 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCA" )\n-        expBioseqDB = BioseqDB()\n-        expBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n-        \n-        iBioseq3 = Bioseq( "seq1", "AGCGGACGATGCAGCATGCGAATGACGAT" )\n-        iBioseq4 = Bioseq( "seq2", "GCGATGCGATCGATGCGATAGCATATATATATATATATATATATAT" )\n-        obsBioseqDB = BioseqDB()\n-        obsBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n-        \n-        self.assertNotEquals( expBioseqDB, obsBioseqDB )\n-        \n-        \n-    def test__eq__instance'..b'9, iBioseq10, iBioseq11] )\n-       \n-        obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("consensus.*", inBioseqDB)\n-        self.assertEquals(expBioseqDB, obsBioseqDB)\n-        \n-        \n-    def test_addBioseqFromABioseqDBIfHeaderContainPattern_with_no_existing_pattern (self):\n-        iBioseq1 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")\n-        iBioseq2 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")\n-        iBioseq3 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")\n-        iBioseq4 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")\n-        obsBioseqDB = BioseqDB()\n-        obsBioseqDB.setData( [ iBioseq1, iBioseq2, iBioseq3, iBioseq4] )\n-        \n-        iBioseq5 = Bioseq("Sequence4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")\n-        iBioseq6 = Bioseq("consensus6","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")\n-        inBioseqDB = BioseqDB()\n-        inBioseqDB.setData( [ iBioseq5, iBioseq6 ])\n-\n-        iBioseq7 = Bioseq("consensus4","GAGATGGCTCATGGAGTACCGCGAGTGCGGTACCTATGGCCCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGCGAGATGGCTCATGGAGTACCGC")\n-        iBioseq8 = Bioseq("consensus1","TGCCTGAGATGGCTCATGGAGTACCTGCCT")\n-        iBioseq9 = Bioseq("consensus7","TGCCTTGCATGACTGCATGGAGTACCTGCCTG")\n-        iBioseq10 = Bioseq("consensus11","TGCCTGATGGCTCATGGAGTACCTGCCT")\n-        \n-        expBioseqDB = BioseqDB()\n-        expBioseqDB.setData( [ iBioseq7, iBioseq8, iBioseq9, iBioseq10] )\n-       \n-        obsBioseqDB.addBioseqFromABioseqDBIfHeaderContainPattern("noExistingPattern", inBioseqDB)\n-        self.assertEquals(expBioseqDB, obsBioseqDB)\n-        \n-        \n-    def test_upCase (self):\n-        iBioseq1 = Bioseq("consensus4","atgacGatgca")\n-        iBioseq2 = Bioseq("consensus1","atgcgaT")\n-        obsBioseqDB = BioseqDB()\n-        obsBioseqDB.setData( [ iBioseq1, iBioseq2 ] )\n-        iBioseq3 = Bioseq("consensus4","ATGACGATGCA")\n-        iBioseq4 = Bioseq("consensus1","ATGCGAT")\n-        expBioseqDB = BioseqDB()\n-        expBioseqDB.setData( [ iBioseq3, iBioseq4 ] )\n-        obsBioseqDB.upCase()\n-        self.assertEquals(expBioseqDB, obsBioseqDB)\n-        \n-        \n-    def test_getMap(self):\n-        iBioseq1 = Bioseq("header1","ATGC-RA-GCT")\n-        iBioseq2 = Bioseq("header2","-TGC-RA-GCT")\n-        iBioseq3 = Bioseq("header3","ATGC-RA-GC-")\n-\n-        iAlignedBioseqDB = BioseqDB()\n-        iAlignedBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])\n-        \n-        obsDict = iAlignedBioseqDB.getDictOfLMapsWithoutGaps()\n-        \n-        expLMap1 = [Map( "header1_subSeq1", "header1", 1, 4 ), Map( "header1_subSeq2", "header1", 6, 7 ), Map( "header1_subSeq3", "header1", 9, 11 )]\n-        expLMap2 = [Map( "header2_subSeq1", "header2", 2, 4 ), Map( "header2_subSeq2", "header2", 6, 7 ), Map( "header2_subSeq3", "header2", 9, 11 )]\n-        expLMap3 = [Map( "header3_subSeq1", "header3", 1, 4 ), Map( "header3_subSeq2", "header3", 6, 7 ), Map( "header3_subSeq3", "header3", 9, 10 )]    \n-        \n-        expDict = {\n-                   "header1": expLMap1,\n-                   "header2": expLMap2,\n-                   "header3": expLMap3\n-                   } \n-        \n-        self.assertEquals(expDict, obsDict)\n-\n-    def test_getSeqLengthByListOfName(self):\n-        iBioseq1 = Bioseq("header1","ATGC-RA-GCT")\n-        iBioseq2 = Bioseq("header2","-TGC-RAR")\n-        iBioseq3 = Bioseq("header3","ATGC")\n-\n-        iBioseqDB = BioseqDB()\n-        iBioseqDB.setData([iBioseq1, iBioseq2, iBioseq3])\n-        \n-        expList =  [11, 4]\n-        obsList = iBioseqDB.getSeqLengthByListOfName(["header1", "header3"])\n-        \n-        self.assertEquals( expList, obsList )        \n-        \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_BioseqDB ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/test/Test_BioseqUtils.py
--- a/commons/core/seq/test/Test_BioseqUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,498 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import unittest\n-import os\n-from commons.core.seq.Bioseq import Bioseq\n-from commons.core.seq.BioseqUtils import BioseqUtils\n-from commons.core.utils.FileUtils import FileUtils\n-\n-\n-class Test_BioseqUtils( unittest.TestCase ):\n-    \n-    def test_translateSequence_one_nt( self ):\n-        bioseq = Bioseq()\n-        bioseq.sequence = "G"\n-        BioseqUtils.translateSequence(bioseq, 1)\n-        expSequence = ""\n-        obsSequence = bioseq.sequence\n-        self.assertEqual(expSequence, obsSequence)\n-        \n-        \n-    def test_translateSequence_frame1( self ):\n-        bioseq = Bioseq()\n-        bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"\n-        BioseqUtils.translateSequence(bioseq, 1)\n-        expSequence = "XGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n-        obsSequence = bioseq.sequence\n-        self.assertEqual(expSequence, obsSequence)\n-        \n-        \n-    def test_translateSequence_frame2( self ):\n-        bioseq = Bioseq()\n-        bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"\n-        BioseqUtils.translateSequence(bioseq, 2)\n-        expSequence = "VASS*SVYDHNDFT*VSRGSD*STI*CE*SL"\n-        obsSequence = bioseq.sequence\n-        self.assertEqual(expSequence, obsSequence)\n-        \n-        \n-    def test_translateSequence_frame3( self ):\n-        bioseq = Bioseq()\n-        bioseq.sequence = "NGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"\n-        BioseqUtils.translateSequence(bioseq, 3)\n-        expSequence = "WLLVDQFMITMISRRCLVAPTNQQYNASRA*"\n-        obsSequence = bioseq.sequence\n-        self.assertEqual(expSequence, obsSequence)\n-        \n-        \n-    def test_setFrameInfoOnHeader(self):\n-        bioseq = Bioseq()\n-        bioseq.header = "header1 description1 description2"\n-        BioseqUtils.setFrameInfoOnHeader(bioseq,1)\n-        expHeader = "header1_1 description1 description2"\n-        obsHeader = bioseq.header\n-        self.assertEquals(expHeader,obsHeader)\n-        \n-        \n-    def test_setFrameInfoOnHeader_header_without_space(self):\n-        bioseq = Bioseq()\n-        bioseq.header = "header"\n-        BioseqUtils.setFrameInfoOnHeader(bioseq,1)\n-        expHeader = "header_1"\n-        obsHeader = bioseq.header\n-  '..b'   bioseq2.header = "header2"\n-        bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT"\n-        \n-        lBioseq = [bioseq1, bioseq2]\n-        \n-        obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header1 description")\n-        expLength = 31\n-        \n-        self.assertEquals( expLength, obsLength)\n-        \n-        \n-    def test_getSeqLengthWithSeqName_second_item ( self ):\n-        bioseq1 = Bioseq()\n-        bioseq1.header = "header1 description"\n-        bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n-        \n-        bioseq2 = Bioseq()\n-        bioseq2.header = "header2"\n-        bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT"\n-        \n-        lBioseq = [bioseq1, bioseq2]\n-        \n-        obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2")\n-        expLength = 44\n-        \n-        self.assertEquals( expLength, obsLength)\n-        \n-        \n-    def test_getSeqLengthWithSeqName_empty_list ( self ):\n-        lBioseq = []\n-        \n-        obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2")\n-        expLength = 0\n-        \n-        self.assertEquals( expLength, obsLength)\n-        \n-        \n-    def test_getSeqLengthWithSeqName_empty_sequence ( self ):\n-        bioseq1 = Bioseq()\n-        bioseq1.header = "header1 description"\n-        bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n-        \n-        bioseq2 = Bioseq()\n-        bioseq2.header = "header2"\n-        bioseq2.sequence = ""\n-        \n-        lBioseq = [bioseq1, bioseq2]\n-        \n-        obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header2")\n-        expLength = 0\n-        \n-        self.assertEquals( expLength, obsLength)\n-        \n-        \n-    def test_getSeqLengthWithSeqName_sequence_unknown ( self ):\n-        bioseq1 = Bioseq()\n-        bioseq1.header = "header1 description"\n-        bioseq1.sequence = "CGF*LISL*SQ*FHVGVSWLRLINNIMRVEL"\n-        \n-        bioseq2 = Bioseq()\n-        bioseq2.header = "header2"\n-        bioseq2.sequence = "ATGCGTGCGTAAATGCGTATGCGTATGCGTTCGCGAATGCGTGT"\n-        \n-        lBioseq = [bioseq1, bioseq2]\n-        \n-        obsLength = BioseqUtils.getSeqLengthWithSeqName(lBioseq, "header3")\n-        expLength = 0\n-        \n-        self.assertEquals( expLength, obsLength)\n-        \n-        \n-    def test_getLengthPerSeqFromFile( self ):\n-        inFile = "dummyInFile"\n-        inFileHandler = open( inFile, "w" )\n-        inFileHandler.write( ">seq1\\nAGCGATGCAGCTA\\n" )\n-        inFileHandler.write( ">seq2\\nGCGATGCGCATCGACGCGA\\n" )\n-        inFileHandler.close()\n-        \n-        dExp = { "seq1": 13, "seq2": 19 }\n-        \n-        dObs = BioseqUtils.getLengthPerSeqFromFile( inFile )\n-        \n-        self.assertEqual( dExp, dObs )\n-        \n-        os.remove( inFile )\n-        \n-        \n-    def test_getBioseqListSortedByDecreasingLength( self ):\n-        lBioseqs = [ Bioseq( "TE2", "ACC" ),\n-                    Bioseq( "TE3", "TA" ),\n-                    Bioseq( "TE1", "AGCG" ) ]\n-        lExp = [ Bioseq( "TE1", "AGCG" ),\n-                Bioseq( "TE2", "ACC" ),\n-                Bioseq( "TE3", "TA" ) ]\n-        lObs = BioseqUtils.getBioseqListSortedByDecreasingLength( lBioseqs )\n-        self.assertEquals( lExp, lObs )\n-        \n-        \n-    def test_getBioseqListSortedByDecreasingLengthWithoutGaps( self ):\n-        lBioseqs = [ Bioseq( "TE2", "-ACC-" ),\n-                    Bioseq( "TE3", "TA---" ),\n-                    Bioseq( "TE1", "-AGCG" ) ]\n-        lExp = [ Bioseq( "TE1", "-AGCG" ),\n-                Bioseq( "TE2", "-ACC-" ),\n-                Bioseq( "TE3", "TA---" ) ]\n-        lObs = BioseqUtils.getBioseqListSortedByDecreasingLengthWithoutGaps( lBioseqs )\n-        self.assertEquals( lExp, lObs )\n-        \n-        \n-test_suite = unittest.TestSuite()\n-test_suite.addTest( unittest.makeSuite( Test_BioseqUtils ) )\n-if __name__ == "__main__":\n-    unittest.TextTestRunner(verbosity=2).run( test_suite )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/test/Test_FastaUtils.py
--- a/commons/core/seq/test/Test_FastaUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,1694 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-from commons.core.seq.FastaUtils import FastaUtils\n-from commons.core.seq.test.Utils_for_T_FastaUtils import Utils_for_T_FastaUtils\n-from commons.core.utils.FileUtils import FileUtils\n-import glob\n-import os\n-import shutil\n-import unittest\n-\n-\n-class Test_FastaUtils( unittest.TestCase ):\n-    \n-        \n-    def test_dbSize_for_empty_file(self):\n-        fileName = "dummyFastaFile.fa"\n-        Utils_for_T_FastaUtils._createFastaFile_for_empty_file(fileName)\n-        \n-        obsNb = FastaUtils.dbSize( fileName )\n-        \n-        expNb = 0\n-        os.remove(fileName)\n-        self.assertEquals(expNb, obsNb)\n-        \n-        \n-    def test_dbSize_one_sequence(self):\n-        fileName = "dummyFastaFile.fa"\n-        Utils_for_T_FastaUtils._createFastaFile_one_sequence(fileName)\n-        \n-        obsNb = FastaUtils.dbSize( fileName )\n-        \n-        expNb = 1\n-        os.remove(fileName)\n-        self.assertEquals(expNb, obsNb)\n-        \n-        \n-    def test_dbSize_four_sequences(self):\n-        fileName = "dummyFastaFile.fa"\n-        Utils_for_T_FastaUtils._createFastaFile_four_sequences(fileName)\n-        \n-        obsNb = FastaUtils.dbSize( fileName )\n-        \n-        expNb = 4\n-        os.remove(fileName)\n-        self.assertEquals(expNb, obsNb)\n-        \n-        \n-    def test_dbChunks(self):\n-        inFileName = "dummyBigSeqFastaFile.fa"\n-        expChunksFileName = \'exp\' + inFileName +\'_chunks.fa\'\n-        expChunksMapFileName = \'exp\' + inFileName +\'_chunks.map\'\n-        expCutFileName = \'exp\' + inFileName +\'_cut\'\n-        expNStretchFileName = \'exp\' + inFileName +\'.Nstretch.map\'\n-        Utils_for_T_FastaUtils._createFastaFile_big_sequence(inFileName)\n-        Utils_for_T_FastaUtils._createFastaFile_of_Chunks(expChunksFileName)\n-        Utils_for_T_FastaUtils._createMapFile_of_Chunks(expChunksMapFileName)\n-        Utils_for_T_FastaUtils._createFastaFile_of_cut(expCutFileName)\n-        Utils_for_T_FastaUtils._createFastaFile_of_Nstretch(expNStretchFileName)\n-        \n-        FastaUtils.dbChunks(inFileName, \'60\', \'10\', \'11\', \'\', False, 0)\n-        \n-        obsChunksFileName = inFileName +\'_chunks.fa\'\n-        obsChunksMapFileName = inFileName +\'_chunks.map\'\n-        obsCutFileName = inFileName +\'_cut\'\n-        obsNStretchFileName = inFileName +\'.Nstretch.map\'\n-        \n-        self.assertTrue(FileUtils.are2'..b'")\n-            f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\\n")\n-            f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\\n")\n-            f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATANN\\n")\n-            \n-        FastaUtils.writeNstreches(fileName, 0)\n-        obsFileName = "%s_Nstretches.map" % os.path.splitext(os.path.split(fileName)[1])[0]\n-        \n-        expFileName = "expNstretches.map"\n-        with open(expFileName, "w") as f:\n-            pass\n-        \n-        self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName))\n-        \n-        os.remove(obsFileName)\n-        os.remove(expFileName)\n-        os.remove(fileName)\n-        \n-    def test_getNstreches_2_GFF(self):\n-        fileName = "dummy.fa"\n-        with open(fileName, "w") as f:\n-            f.write(">seq2\\n")\n-            f.write("NNNNxxNNnNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n-            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n-            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n-            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n-            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n-            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\\n")\n-            f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\\n")\n-            f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\\n")\n-            f.write("\\n")\n-            f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\\n")\n-            f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\\n")\n-            f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\\n")\n-            f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\\n")\n-            f.write("\\n")\n-            f.write(">seq1\\n")\n-            f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\\n")\n-            f.write("TTGCGGATCATxxxxxxxxxxxxxxxTTTGTTTGAACAACCGACAATGCGACCAATTTCA\\n")\n-            f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\\n")\n-            f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\\n")\n-            f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\\n")\n-            f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\\n")\n-            f.write("\\n")\n-            f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\\n")\n-            f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\\n")\n-            f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\\n")\n-            f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATAxx\\n")\n-            f.write("\\n")\n-            f.write("")\n-            \n-        FastaUtils.writeNstreches(fileName, 2, outFormat = "gff")\n-        obsFileName = "%s_Nstretches.gff3" % os.path.splitext(os.path.split(fileName)[1])[0]\n-        \n-        expFileName = "expNstretches.gff3"\n-        with open(expFileName, "w") as f:\n-            f.write("##gff-version 3\\n")\n-            f.write("seq1\\tFastaUtils\\tN_stretch\\t72\\t86\\t.\\t.\\t.\\tName=N_stretch_72-86\\n")\n-            f.write("seq1\\tFastaUtils\\tN_stretch\\t261\\t262\\t.\\t.\\t.\\tName=N_stretch_261-262\\n")\n-            f.write("seq1\\tFastaUtils\\tN_stretch\\t510\\t542\\t.\\t.\\t.\\tName=N_stretch_510-542\\n")\n-            f.write("seq1\\tFastaUtils\\tN_stretch\\t592\\t593\\t.\\t.\\t.\\tName=N_stretch_592-593\\n")\n-            f.write("seq2\\tFastaUtils\\tN_stretch\\t1\\t10\\t.\\t.\\t.\\tName=N_stretch_1-10\\n")\n-            \n-        self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName))\n-        \n-        os.remove(obsFileName)\n-        os.remove(expFileName)\n-        os.remove(fileName)\n-            \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/seq/test/Utils_for_T_FastaUtils.py
--- a/commons/core/seq/test/Utils_for_T_FastaUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,857 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-class Utils_for_T_FastaUtils( object ):\n-    \n-    def _createFastaFile_for_empty_file(fileName):\n-        f = open(fileName, \'w\')\n-        f.write("")\n-        f.close()\n-        \n-    _createFastaFile_for_empty_file = staticmethod ( _createFastaFile_for_empty_file )\n-    \n-    \n-    def _createFastaFile_one_sequence(fileName):\n-        f = open(fileName, \'w\')\n-        f.write(">seq 1\\n")\n-        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n-        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n-        f.write("ATATTCG\\n")\n-        f.close()\n-        \n-    _createFastaFile_one_sequence = staticmethod ( _createFastaFile_one_sequence )\n-    \n-    \n-    def createFastaFile_twoSequences( fileName ):\n-        f = open( fileName, "w" )\n-        f.write( ">seq 1\\n" )\n-        f.write( "ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n" )\n-        f.write( "ATATTCG\\n" )\n-        f.write( ">seq 2\\n" )\n-        f.write( "ATATTCTTTCATCGATCGATCGGCGGCTATATGCTAGTGACGAAGCTAGTGTGAGTAGTA\\n" )\n-        f.write( "ATATTCG\\n" )\n-        f.close()\n-        \n-    createFastaFile_twoSequences = staticmethod ( createFastaFile_twoSequences )\n-    \n-    \n-    def createFastaFile_seq_1( fileName ):\n-        f = open( fileName, "w" )\n-        f.write( ">seq 1\\n" )\n-        f.write( "ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n" )\n-        f.write( "ATATTCG\\n" )\n-        f.close()\n-        \n-    createFastaFile_seq_1 = staticmethod( createFastaFile_seq_1 )\n-    \n-    \n-    def createFastaFile_seq_2( fileName ):\n-        f = open( fileName, "w" )\n-        f.write( ">seq 2\\n" )\n-        f.write( "ATATTCTTTCATCGATCGATCGGCGGCTATATGCTAGTGACGAAGCTAGTGTGAGTAGTA\\n" )\n-        f.write( "ATATTCG\\n" )\n-        f.close()\n-        \n-    createFastaFile_seq_2 = staticmethod( createFastaFile_seq_2 )\n-    \n-    \n-    def _createFastaFile_sequence_without_header(fileName):\n-        f = open(fileName, \'w\')\n-        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n-        f.write("ATATTCGCGCATCGATCGATCGGCGGCTATATGCTAGTCAGCTAGCTAGTGTGAGTAGTA\\n")\n-        f.write("ATATTCG\\n")\n-        f.close()\n-        \n-    _createFastaFile_sequence_without_header = staticmethod ( _createFastaFile_sequence_without_header )\n-    \n-        \n-    def _createFastaFile_four_sequences'..b'CCTCGATGAAATGGTCGCG\\n")\n-        f.write("CGCGTACGATAATGCGGGCCTGGCTCACGGATGCGCGCCTTTCCCTATCGTCAGTCACGC\\n")\n-        f.write("AAATGTAGGCTTCCATCTGGAACGCTGCTTGATGGCCTAAGAATGGGCCGTCACGGAACA\\n")\n-        f.write("GCTCACCGCCTGCAGACACGAACGGCCGTGGCGGTCATGGAAGGATCTGAACGTGTCGCC\\n")\n-        f.write("CCATACGATTGACGAAGAGATGTAAGCTCCCTTGGTA\\n")\n-        f.close()\n-    \n-    _createFastaFile_three_sequences_with_ORFs = staticmethod ( _createFastaFile_three_sequences_with_ORFs )\n-    \n-    \n-    def _createFastaFile_three_sequences_with_ORFs_expected(fileName):  \n-        f = open(fileName, \'w\') \n-        f.write("ORF|1|662\\tMivi_sl_Blaster_Grouper_1_Map_3\\t307\\t969\\n")\n-        f.write("ORF|-3|254\\tMivi_sl_Blaster_Grouper_1_Map_3\\t793\\t539\\n")\n-        f.write("ORF|2|197\\tMivi_sl_Blaster_Grouper_1_Map_3\\t356\\t553\\n")\n-        f.write("ORF|3|176\\tMivi_sl_Blaster_Grouper_1_Map_3\\t288\\t464\\n")\n-        f.write("ORF|-1|176\\tMivi_sl_Blaster_Grouper_1_Map_3\\t786\\t610\\n")\n-        f.write("ORF|3|143\\tMivi_sl_Blaster_Grouper_1_Map_3\\t672\\t815\\n")\n-        f.write("ORF|1|131\\tMivi_sl_Blaster_Grouper_1_Map_3\\t175\\t306\\n")\n-        f.write("ORF|-2|131\\tMivi_sl_Blaster_Grouper_1_Map_3\\t797\\t666\\n")\n-        f.write("ORF|2|128\\tMivi_sl_Blaster_Grouper_1_Map_3\\t167\\t295\\n")\n-        f.write("ORF|-2|119\\tMivi_sl_Blaster_Grouper_1_Map_3\\t242\\t123\\n")\n-        f.write("ORF|1|464\\tMivi_sl_Blaster_Grouper_2_Map_3\\t304\\t768\\n")\n-        f.write("ORF|3|305\\tMivi_sl_Blaster_Grouper_2_Map_3\\t669\\t974\\n")\n-        f.write("ORF|-3|251\\tMivi_sl_Blaster_Grouper_2_Map_3\\t1094\\t843\\n")\n-        f.write("ORF|-2|245\\tMivi_sl_Blaster_Grouper_2_Map_3\\t531\\t286\\n")\n-        f.write("ORF|-3|224\\tMivi_sl_Blaster_Grouper_2_Map_3\\t791\\t567\\n")\n-        f.write("ORF|-2|215\\tMivi_sl_Blaster_Grouper_2_Map_3\\t1098\\t883\\n")\n-        f.write("ORF|2|197\\tMivi_sl_Blaster_Grouper_2_Map_3\\t353\\t550\\n")\n-        f.write("ORF|3|173\\tMivi_sl_Blaster_Grouper_2_Map_3\\t288\\t461\\n")\n-        f.write("ORF|-1|173\\tMivi_sl_Blaster_Grouper_2_Map_3\\t1087\\t914\\n")\n-        f.write("ORF|-1|143\\tMivi_sl_Blaster_Grouper_2_Map_3\\t310\\t167\\n")\n-        f.write("ORF|3|626\\tMivi_sl_Blaster_Grouper_3_Map_3\\t141\\t767\\n")\n-        f.write("ORF|2|434\\tMivi_sl_Blaster_Grouper_3_Map_3\\t164\\t598\\n")\n-        f.write("ORF|3|365\\tMivi_sl_Blaster_Grouper_3_Map_3\\t768\\t1133\\n")\n-        f.write("ORF|-3|359\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1514\\t1155\\n")\n-        f.write("ORF|-1|320\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1879\\t1559\\n")\n-        f.write("ORF|3|272\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1299\\t1571\\n")\n-        f.write("ORF|-2|248\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1503\\t1255\\n")\n-        f.write("ORF|1|236\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1576\\t1812\\n")\n-        f.write("ORF|-1|227\\tMivi_sl_Blaster_Grouper_3_Map_3\\t1423\\t1196\\n")\n-        f.write("ORF|-3|227\\tMivi_sl_Blaster_Grouper_3_Map_3\\t368\\t141\\n")\n-        f.close()\n-\n-    _createFastaFile_three_sequences_with_ORFs_expected = staticmethod ( _createFastaFile_three_sequences_with_ORFs_expected )\n-    \n-    \n-    def _createLinkFile_four_sequences_with_new_headers(fileName):\n-        f = open(fileName, \'w\')\n-        f.write("seq 1\\tReconCluster1Mb155 chunk183 {Fragment} 1..5506\\t1\\t127\\n")\n-        f.write("seq 2\\tMbQ3Gr2Cl0 chunk440 {Fragment} 2678..3645\\t1\\t307\\n")\n-        f.write("seq 3\\tMbS2Gr2Cl0 chunk622 {Fragment} 104..1078\\t1\\t427\\n")\n-        f.write("seq 4\\tPilerCluster3.574Mb796 chunk0117 {Fragment} 51582..50819\\t1\\t307\\n")\n-        \n-    _createLinkFile_four_sequences_with_new_headers = staticmethod ( _createLinkFile_four_sequences_with_new_headers )\n-    \n-    \n-    def _createLinkFile_four_sequences_same_headers(fileName):\n-        f = open(fileName, \'w\')\n-        f.write("seq 1\\tseq 1\\t1\\t127\\n")\n-        f.write("seq 2\\tseq 2\\t1\\t307\\n")\n-        f.write("seq 3\\tseq 3\\t1\\t427\\n")\n-        f.write("seq 4\\tseq 4\\t1\\t307\\n")\n-        \n-    _createLinkFile_four_sequences_same_headers = staticmethod ( _createLinkFile_four_sequences_same_headers )\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/utils/FileUtils.py
--- a/commons/core/utils/FileUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,445 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import os\n-import glob\n-import shutil\n-import sys\n-import re\n-import math\n-try:\n-    import hashlib\n-except:\n-    pass\n-\n-\n-class FileUtils( object ):\n-    \n-    ## Return the number of lines in the given file\n-    #\n-    def getNbLinesInSingleFile( fileName ):\n-        fileHandler = open( fileName, "r" )\n-        lines = fileHandler.readlines()\n-        fileHandler.close()\n-        if (len(lines)>0 and lines[-1]== "\\n"):\n-            return (len(lines)-1)\n-        else :\n-            return len(lines)\n-    \n-    getNbLinesInSingleFile = staticmethod( getNbLinesInSingleFile )\n-    \n-    ## Return the number of lines in the files in the given list\n-    #\n-    def getNbLinesInFileList( lFileNames ):\n-        count = 0\n-        for fileName in lFileNames:\n-            count += FileUtils.getNbLinesInSingleFile( fileName )\n-        return count\n-    \n-    getNbLinesInFileList = staticmethod( getNbLinesInFileList )\n-    \n-    ## Return True if the given file exists, False otherwise\n-    #\n-    def isRessourceExists( fileName ):\n-        return os.path.exists( fileName )\n-    \n-    isRessourceExists = staticmethod( isRessourceExists )\n-    \n-    ## Return True if the given file is empty, False otherwise\n-    #\n-    def isEmpty( fileName ):\n-        return 0 == FileUtils.getNbLinesInSingleFile( fileName )\n-    \n-    isEmpty = staticmethod( isEmpty )\n-    \n-    ## Return True if both files are identical, False otherwise\n-    #\n-    def are2FilesIdentical( file1, file2 ):\n-        tmpFile = "diff_%s_%s" % ( os.path.basename(file1), os.path.basename(file2) )\n-        cmd = "diff %s %s >> %s" % ( file1, file2, tmpFile )\n-        returnStatus = os.system( cmd )\n-        if returnStatus != 0:\n-            print "WARNING: \'diff\' returned \'%i\'" % returnStatus\n-            os.remove( tmpFile )\n-            return False\n-        if FileUtils.isEmpty( tmpFile ):\n-            os.remove( tmpFile )\n-            return True\n-        else:\n-            os.remove( tmpFile )\n-            return False\n-        \n-    are2FilesIdentical = staticmethod( are2FilesIdentical )\n-    \n-    ## Return a string with all the content of the files in the given list\n-    #\n-    def getFileContent( lFiles ):\n-        content = ""\n-        lFiles.sort()\n-        for fileName in lFiles:\n-            currentFile = open( fileName, "r" )\n-            content += currentFile.re'..b'   \n-    ## Give the list of file names found in the given directory\n-    #\n-    # @param dirPath string absolute path of the given directory\n-    #\n-    # @return lFilesInDir list of file names\n-    #\n-    def getFileNamesList( dirPath, patternFileFilter = ".*" ):\n-        lFilesInDir = []\n-        lPaths = glob.glob( dirPath + "/*" )\n-        for ressource in lPaths:\n-            if os.path.isfile( ressource ):\n-                fileName = os.path.basename( ressource )\n-                if re.match(patternFileFilter, fileName):\n-                    lFilesInDir.append( fileName )\n-        return lFilesInDir\n-    \n-    getFileNamesList = staticmethod( getFileNamesList )\n-    \n-    ## Return the MD5 sum of a file\n-    #\n-    def getMd5SecureHash( inFile ):\n-        if "hashlib" in sys.modules:\n-            md5 = hashlib.md5()\n-            inFileHandler = open( inFile, "r" )\n-            while True:\n-                line = inFileHandler.readline()\n-                if line == "":\n-                    break\n-                md5.update( line )\n-            inFileHandler.close()\n-            return md5.hexdigest()\n-        else:\n-            return ""\n-        \n-    getMd5SecureHash = staticmethod( getMd5SecureHash )\n-    \n-    ## Cat all files of a given directory\n-    #\n-    # @param dir string directory name\n-    # @param outFileName string output file name\n-    #\n-    def catFilesOfDir(dir, outFileName):\n-        lFiles = FileUtils.getFileNamesList(dir)\n-        lFile2 = []\n-        for file in lFiles:\n-            lFile2.append(dir + "/" + file)\n-        FileUtils.catFilesFromList(lFile2, outFileName)\n-        \n-    catFilesOfDir = staticmethod(catFilesOfDir)\n-    \n-    ## Return True if size file > 0 octet\n-    #\n-    # @param fileName string file name\n-    #\n-    def isSizeNotNull(fileName):\n-        size = os.path.getsize(fileName)\n-        if size > 0:\n-            return True\n-        return False\n-        \n-    isSizeNotNull = staticmethod(isSizeNotNull)\n-    \n-    ## Split one file into N Files by lines\n-    #\n-    # @param fileName string file name\n-    # @param N int number of files to create\n-    # \n-    @staticmethod\n-    def splitFileIntoNFiles(fileName, N):\n-        nbLine = FileUtils.getNbLinesInSingleFile(fileName)\n-        nbLinesInEachFile = nbLine\n-        if N > nbLine:\n-            N = nbLine\n-        if N != 0:\n-            nbLinesInEachFile = math.ceil(float(nbLine) / N)\n-        else:\n-            N = 1\n-        filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))\n-        fileHandler = open(fileName, "r")\n-        for i in range(1,N+1):\n-            with open("%s-%s%s" %(filePrefix, i, fileExt), "w") as f:\n-                j = 0\n-                while j < nbLinesInEachFile:\n-                    j += 1\n-                    f.write(fileHandler.readline())\n-        fileHandler.close()            \n-            \n-    ## Split one file into files of N lines\n-    #\n-    # @param fileName string input file name\n-    # @param N int lines number per files\n-    # \n-    @staticmethod\n-    def splitFileAccordingToLineNumber(fileName, N):\n-        filePrefix, fileExt = os.path.splitext(os.path.basename(fileName))\n-        with open(fileName) as inF:\n-            fileNb = 1\n-            line = inF.readline()\n-            if not line or N == 0:\n-                outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)\n-                f = open(outFileName, "wb")\n-                shutil.copyfileobj(open(fileName, "rb"), f)\n-                f.close()\n-            else:\n-                while line:\n-                    outFileName = "%s-%s%s" %(filePrefix, fileNb, fileExt)\n-                    with open(outFileName, "w") as outF:\n-                        lineNb = 1\n-                        while lineNb <= N and line:\n-                            outF.write(line)\n-                            line = inF.readline()\n-                            lineNb += 1\n-                    fileNb += 1\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/utils/PipelineStepFTests.py
--- a/commons/core/utils/PipelineStepFTests.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-import sys
-import os
-import shutil
-from commons.core.utils.FileUtils import FileUtils
-
-class PipelineStepFTests(object):
-    
-    def __init__(self, pipelineName, packageDir, workingDir, projectName, config = "", clean = True):
-        self._pipelineName = pipelineName
-        self._packageDir = packageDir
-        self._workingDir = workingDir
-        self._projectName = projectName
-        self._clean = clean
-        self._configFileName = config
-        
-    def run(self):
-        self.launchStep()
-        self.assertStep()
-
-#    def replaceInFile(self, fileName, oldPattern, newPattern, newFileName = ""):
-#        if newFileName == "":
-#            newFileName = "%s.new" % fileName
-#        f = open(newFileName, "w")
-#        for line in fileinput.input(fileName, inplace=1):
-#            newLine = line.replace(oldPattern, newPattern)
-#            f.write(newLine)
-#        f.close()
-#        fileinput.close()
-
-    def _checkIfFileExist(self, fileName):
-        if not FileUtils.isRessourceExists(fileName):
-            print "%s do not exists\n" % fileName
-            return False
-        return True
-        
-    def _printMessageAndClean(self, msg):
-        print "%s in %s functional test\n" % (msg, self._pipelineName)
-        sys.stdout.flush()
-        os.chdir("../")
-        if self._clean:
-            shutil.rmtree(self._workingDir)
-                
-    def _areTwoFilesIdenticalByScript( self, expFileName, obsFileName, scriptName):
-        cmd = "%s -v 1 -r %s -t %s 2>/dev/null" % (scriptName, expFileName, obsFileName)
-        log = os.system(cmd)
-        print
-        sys.stdout.flush()
-        if log != 0:
-            return False
-        else:
-            return True
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/utils/RepetConfigParser.py
--- a/commons/core/utils/RepetConfigParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,38 +0,0 @@
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-from ConfigParser import ConfigParser
-
-
-class RepetConfigParser(ConfigParser):
-
-    def optionxform(self, optionstr):
-        return optionstr
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/utils/RepetOptionParser.py
--- a/commons/core/utils/RepetOptionParser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,79 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Class overriding optparse.OptionParser default epilog formatter.
-The resulting epilog display format is the same as if the corresponding string was printed. 
-"""
-
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-from optparse import OptionParser
-from optparse import BadOptionError
-from optparse import OptionValueError
-SUPPRESS_USAGE = "SUPPRESS"+"USAGE"
-
-class RepetOptionParser(OptionParser):
-
-    def parse_args(self, args=None, values=None):
-        rargs = self._get_args(args)
-        if not rargs:
-            rargs = ["-h"]
-        if values is None:
-            values = self.get_default_values()
-        self.rargs = rargs
-        self.largs = largs = [] 
-        self.values = values
-        try: 
-            self._process_args(largs, rargs, values)
-        except (BadOptionError, OptionValueError), err: 
-            self.error(str(err))
-        args = largs + rargs
-        return self.check_values(values, args)
-
-    def set_usage(self, usage):
-        if not usage or usage is SUPPRESS_USAGE:
-            self.usage = None
-        elif usage.lower().startswith("usage: "):
-            self.usage = usage[7:]
-        else:
-            self.usage = usage
-    
-    def format_epilog(self, formatter):
-        if self.epilog != None:
-            return self.epilog
-        else :
-            return ""
-    
-    def format_description(self, formatter):
-        if self.description != None:
-            return self.description
-        else :
-            return ""
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/utils/RepetOptionParser.pyc
b
Binary file commons/core/utils/RepetOptionParser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/utils/__init__.pyc
b
Binary file commons/core/utils/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/utils/test/TestSuite_utils.py
--- a/commons/core/utils/test/TestSuite_utils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-
-# Copyright INRA (Institut National de la Recherche Agronomique)
-# http://www.inra.fr
-# http://urgi.versailles.inra.fr
-#
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software.  You can  use, 
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info". 
-#
-# As a counterpart to the access to the source code and  rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty  and the software's author,  the holder of the
-# economic rights,  and the successive licensors  have only  limited
-# liability. 
-#
-# In this respect, the user's attention is drawn to the risks associated
-# with loading,  using,  modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean  that it is complicated to manipulate,  and  that  also
-# therefore means  that it is reserved for developers  and  experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or 
-# data to be ensured and,  more generally, to use and operate it in the 
-# same conditions as regards security. 
-#
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-
-
-import unittest
-import sys
-import Test_FileUtils
-
-
-def main():
-    
-    TestSuite_utils = unittest.TestSuite() 
-    
-    TestSuite_utils.addTest( unittest.makeSuite( Test_FileUtils.Test_FileUtils, "test" ) )
-    
-    runner = unittest.TextTestRunner(sys.stderr, 2, 2)
-    runner.run( TestSuite_utils )
-    
-    
-if __name__ == "__main__":
-    main()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/utils/test/Test_FileUtils.py
--- a/commons/core/utils/test/Test_FileUtils.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,883 +0,0 @@\n-# Copyright INRA (Institut National de la Recherche Agronomique)\n-# http://www.inra.fr\n-# http://urgi.versailles.inra.fr\n-#\n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software.  You can  use, \n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info". \n-#\n-# As a counterpart to the access to the source code and  rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty  and the software\'s author,  the holder of the\n-# economic rights,  and the successive licensors  have only  limited\n-# liability. \n-#\n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading,  using,  modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean  that it is complicated to manipulate,  and  that  also\n-# therefore means  that it is reserved for developers  and  experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or \n-# data to be ensured and,  more generally, to use and operate it in the \n-# same conditions as regards security. \n-#\n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-\n-\n-import os\n-import sys\n-import unittest\n-import time\n-import shutil\n-from commons.core.utils.FileUtils import FileUtils\n-\n-\n-class Test_FileUtils( unittest.TestCase ):\n-    \n-    def setUp( self ):\n-        self._uniqId = "%s_%s" % ( time.strftime("%Y%m%d%H%M%S"), os.getpid() )\n-        \n-    def tearDown( self ):\n-        self._uniqId = ""\n-        \n-    def test_getNbLinesInSingleFile_non_empty( self ):\n-        file = "dummyFile_%s" % ( self._uniqId )\n-        f = open( file, "w" )\n-        f.write( "line1\\n" )\n-        f.write( "line2\\n" )\n-        f.write( "line3" )\n-        f.close()\n-        exp = 3\n-        obs = FileUtils.getNbLinesInSingleFile( file )\n-        self.assertEquals( exp, obs )\n-        os.remove( file )\n-        \n-    def test_getNbLinesInSingleFile_non_empty_endEmptyLine( self ):\n-        file = "dummyFile_%s" % ( self._uniqId )\n-        f = open( file, "w" )\n-        f.write( "line1\\n" )\n-        f.write( "line2\\n" )\n-        f.write( "line3\\n" )\n-        f.write( "\\n" )\n-        f.close()\n-        exp = 3\n-        obs = FileUtils.getNbLinesInSingleFile( file )\n-        self.assertEquals( exp, obs )\n-        os.remove( file )\n-        \n-    def test_getNbLinesInSingleFile_empty( self ):\n-        file = "dummyFile_%s" % ( self._uniqId )\n-        os.system( "touch %s" % ( file ) )\n-        exp = 0\n-        obs = FileUtils.getNbLinesInSingleFile( file )\n-        self.assertEquals( exp, obs )\n-        os.remove( file )\n-        \n-    def test_getNbLinesInFileList_non_empty( self ):\n-        f = open("dummy1.txt", "w")\n-        f.write("line1\\n")\n-        f.write("line2\\n")\n-        f.write("line3")\n-        f.close()\n-        f = open("dummy2.txt", "w")\n-        f.write("line1\\n")\n-        f.write("line2\\n")\n-        f.write("line3")\n-        f.close()\n-        f = open("dummy3.txt", "w")\n-        f.write("line1\\n")\n-        f.write("line2\\n")\n-        f.write("line3")\n-        f.close()\n-        lFiles = [ "dummy1.txt", "dummy2.txt", "dummy3.txt" ]\n-        exp = 9\n-        obs = FileUtils.getNbLinesInFileList( lFiles )\n-        self.assertEqual( exp, obs )\n-        for f in lFiles:\n-            os.remove( f )\n-            \n-    def test_catFilesByPattern( self ):\n-        f = open("dummy1.txt", "w")\n-        f.write("line11\\n")\n-        f.write("line12\\n")\n-        f.write("line13")\n-        f.close()\n-    '..b'ead()\n-        \n-        self.assertEqual(exp1, obs1)\n-        self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n-        FileUtils.removeFilesByPattern("dummy*")\n-            \n-    def test_splitFileAccordingToLineNumber_more_maxLines_than_lines(self):\n-        inputFile = "dummy.txt"\n-        obsFile1 = "dummy-1.txt"\n-        \n-        f = open(inputFile, "w")\n-        f.write("line1\\n")\n-        f.write("line2\\n")\n-        f.write("line3\\n")\n-        f.close()\n-\n-        exp1 = "line1\\nline2\\nline3\\n"\n-        \n-        FileUtils.splitFileAccordingToLineNumber(inputFile, 10)\n-        \n-        obs1 = open(obsFile1).read()\n-\n-        self.assertEqual(exp1, obs1)\n-        self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n-        FileUtils.removeFilesByPattern("dummy*")\n-            \n-    def test_splitFileAccordingToLineNumber_empty_file(self):\n-        inputFile = "dummy.txt"\n-        obsFile1 = "dummy-1.txt"\n-\n-        os.system( "touch %s" % ( inputFile ) )\n-\n-        exp1 = ""\n-        \n-        FileUtils.splitFileAccordingToLineNumber(inputFile, 10)\n-        \n-        obs1 = open(obsFile1).read()\n-        \n-        self.assertEqual(exp1, obs1)\n-        self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n-        FileUtils.removeFilesByPattern("dummy*")\n-            \n-    def test_splitFileAccordingToLineNumber_0_lines(self):\n-        inputFile = "dummy.txt"\n-        obsFile1 = "dummy-1.txt"\n-        \n-        f = open(inputFile, "w")\n-        f.write("line1\\n")\n-        f.write("line2\\n")\n-        f.write("line3\\n")\n-        f.close()\n-\n-        exp1 = "line1\\nline2\\nline3\\n"\n-        \n-        FileUtils.splitFileAccordingToLineNumber(inputFile, 0)\n-        \n-        obs1 = open(obsFile1).read()\n-        \n-        self.assertEqual(exp1, obs1)\n-        self.assertFalse(FileUtils.isRessourceExists("dummy-2.txt"))\n-        FileUtils.removeFilesByPattern("dummy*")\n-    \n-    def _writeFile( self, fileName ):\n-        inFile = open(fileName, \'w\')\n-        inFile.write(">Sequence_de_reference\\n")\n-        inFile.write("ATTTTGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n-        inFile.write(">Lignee1_mismatch\\n")\n-        inFile.write("ATTTTGCAGACTTATTCGAG-----GCCATTGCT\\n")\n-        inFile.write(">Lignee2_insertion\\n")\n-        inFile.write("ATTTTGCAGTCTTATTCGAGATTACGCCATTGCT\\n")\n-        inFile.write(">Lignee3_deletion\\n")\n-        inFile.write("A---TGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n-        inFile.close()      \n-        \n-    def _writeFileWithEmptyLine( self, fileName ):\n-        fileWithEmptyLine = open(fileName, \'w\')\n-        fileWithEmptyLine.write(">Sequence_de_reference\\n")\n-        fileWithEmptyLine.write("ATTTTGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n-        fileWithEmptyLine.write("\\n\\n")\n-        fileWithEmptyLine.write(">Lignee1_mismatch\\n")\n-        fileWithEmptyLine.write("ATTTTGCAGACTTATTCGAG-----GCCATTGCT\\n")\n-        fileWithEmptyLine.write("\\n\\n")\n-        fileWithEmptyLine.write(">Lignee2_insertion\\n")\n-        fileWithEmptyLine.write("ATTTTGCAGTCTTATTCGAGATTACGCCATTGCT\\n")\n-        fileWithEmptyLine.write("\\n")\n-        fileWithEmptyLine.write(">Lignee3_deletion\\n")\n-        fileWithEmptyLine.write("A---TGCAGTCTTATTCGAG-----GCCATTGCT\\n")\n-        fileWithEmptyLine.close() \n-        \n-    def _writeFileWithRepeatedBlanks( self, fileName ):\n-        fileWithRepeatedBlanks = open(fileName, \'w\')\n-        fileWithRepeatedBlanks.write(">Sequ  ence_de     _reference\\n")\n-        fileWithRepeatedBlanks.write("ATTTT  GCAGTCTT TTCGAG-  ----GCCATT  GCT\\n")\n-        fileWithRepeatedBlanks.close() \n-        \n-    def _writeFileWithoutRepeatedBlanks( self, fileName ):\n-        fileWithoutRepeatedBlanks = open(fileName, \'w\')\n-        fileWithoutRepeatedBlanks.write(">Sequ ence_de _reference\\n")\n-        fileWithoutRepeatedBlanks.write("ATTTT GCAGTCTT TTCGAG- ----GCCATT GCT\\n")\n-        fileWithoutRepeatedBlanks.close()\n-        \n-if __name__ == "__main__":\n-    unittest.main()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/BedWriter.py
--- a/commons/core/writer/BedWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,100 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-
-
-class BedWriter(TranscriptListWriter):
-    """
-    A class that writes a transcript list into a file with BED format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle:     handle to the file
-    @type handle:     file handle
-    @ivar header:     first lines of the file
-    @type header:     string
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName:    name of the file 
-        @type    fileName:    string
-        @param verbosity: verbosity
-        @type    verbosity: int
-        """
-        self.header = "track name=reads description=\"Reads\" useScore=0 visibility=full offset=0\n"
-        super(BedWriter, self).__init__(fileName, verbosity)
-
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["bed"]
-
-
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "bed"
-        
-        
-    def setTitle(self, title):
-        """
-        Set the title of the track
-        @param title: the title of the track
-        @type    title: string
-        """
-        if title != None:
-            self.header = "track name=%s description=\"%s\" useScore=0 visibility=full offset=0\n" % (title, title)
-
-
-    def copyProperties(self, bedParser):
-        """
-        Copy the properties collected by a parser, to produce a similar output
-        @param bedParser: a BED Parser parser
-        @type    bedParser: class L{BedParser<BedParser>}
-        """
-        self.setTitle(bedParser.title)
-        
-
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript with GBrowse format
-        @param transcript: transcript to be printed
-        @type    transcript: class L{Transcript<Transcript>}
-        @return:                     a string
-        """
-        return transcript.printBed()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/BedWriter.pyc
b
Binary file commons/core/writer/BedWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/CsvWriter.py
--- a/commons/core/writer/CsvWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,153 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import os
-import random
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-from SMART.Java.Python.misc.Progress import Progress
-
-class CsvWriter(TranscriptListWriter):
-    """
-    A class that writes a transcript list into a file with CSV (Excel) format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        super(CsvWriter, self).__init__(fileName, verbosity)
-        self.header   = ""
-        self.title    = "chromosome,start,end,strand,exons,tags\n"
-        self.modified = False
-            
-
-    def __del__(self):
-        """
-        Destructor
-        (Trick to write 1 tag per column)
-        """
-        if self.handle != None:
-            self.modifyCsv()
-        super(CsvWriter, self).__del__()
-
-
-    def close(self):
-        if self.handle != None:
-            self.modifyCsv()
-        super(CsvWriter, self).close()
-
-
-    def modifyCsv(self):
-        """
-        Clean CSV file so that there is one column per tag
-        """
-        if self.modified:
-            return
-
-        # read all the tags
-        self.handle.close()
-        self.handle = open(self.fileName)
-        nbFirstFields = 5
-        tags = set()
-        if self.verbosity >= 10:
-            print "Modifying CSV file..."
-        number = -1
-        for number, line in enumerate(self.handle):
-            if number != 0:
-                theseTags = line.strip().split(",")[nbFirstFields:]
-                for tag in theseTags:
-                    if tag.find("=") != -1:
-                        (key, value) = tag.split("=", 1)
-                        if value != None:
-                            tags.add(key)
-        if self.verbosity >= 10:
-            print " ...done"
-
-        # re-write the file
-        tmpFileName = "tmpFile%d.csv" % (random.randint(0, 100000))
-        tmpFile = open(tmpFileName, "w")
-        self.handle.seek(0)
-        progress = Progress(number + 1, "Re-writting CSV file", self.verbosity)
-        tmpFile.write(self.title.replace("tags", ",".join(sorted(tags))))
-        for line in self.handle:
-            tagValues = dict([(key, None) for key in tags])
-            tmpFile.write(",".join(line.strip().split(",")[:nbFirstFields]))
-            for tag in line.strip().split(",")[nbFirstFields:]:
-                if tag.find("=") != -1:
-                    key = tag.split("=", 1)[0]
-                    tagValues[key] = tag.split("=", 1)[1]
-                else:
-                    tagValues[key] += ";%s" % (tag)
-            for key in sorted(tagValues.keys()):
-                tmpFile.write(",%s" % (tagValues[key]))
-            tmpFile.write("\n")
-            progress.inc()
-        tmpFile.close()
-
-        # replace former file
-        import shutil
-        shutil.move(tmpFile.name, self.fileName)
-        progress.done()
-        self.modified = True
-
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["csv", "xls", "excel"]
-
-
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "csv"
-        
-        
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript with GFF2 format
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        return transcript.printCsv()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/CsvWriter.pyc
b
Binary file commons/core/writer/CsvWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/EmblWriter.py
--- a/commons/core/writer/EmblWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,116 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-
-
-class EmblWriter(TranscriptListWriter):
-    """
-    A class that writes a transcript list into several files with EMBL format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.fileName = fileName
-        self.verbosity = verbosity
-        self.handles = {}
-        self.handle = None
-
-
-    def __del__(self):
-        """
-        Destructor
-        Trick to append the sequences at the end of the EMBL files
-        """
-        handle                = open(self.sequenceFileName)
-        currentHandle = None
-        for line in handle:
-            if line[0] == ">":
-                chromosome = line[1:].strip()
-                if chromosome in self.handles:
-                    currentHandle = self.handles[chromosome]
-                else:
-                    currentHandle = None
-            else:
-                if currentHandle != None:
-                    currentHandle.write(line)
-        handle.close()
-        for handle in self.handles.values():
-            handle.close()
-            
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["embl"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "embl"
-
-
-    def addTranscript(self, transcript):
-        """
-        Add a transcript to the list of transcripts to be written
-        @param transcript: transcript to be written
-        @type    transcript: class L{Transcript<Transcript>}
-        """
-        chromosome = transcript.getChromosome()
-        if chromosome not in self.handles:
-            self.handles[chromosome] = open("%s%s.embl" % (self.fileName[:-len(".embl")], chromosome.title()), "w")
-        self.handles[chromosome].write(self.printTranscript(transcript))
-
-
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript with GFF2 format
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        return transcript.printEmbl()
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/EmblWriter.pyc
b
Binary file commons/core/writer/EmblWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/FastaWriter.py
--- a/commons/core/writer/FastaWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,77 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.SequenceListWriter import SequenceListWriter
-
-
-class FastaWriter(SequenceListWriter):
-    """
-    A class that writes a sequence list into a file with FASTA format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    @ivar header: first lines of the file
-    @type header: string
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        super(FastaWriter, self).__init__(fileName, verbosity)
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["fasta", "mfa"]
-
-
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "fasta"
-        
-        
-    def getLine(self, sequence):
-        """
-        Convert a sequence
-        @param sequence: sequence to be written
-        @type    sequence: class L{Sequence<Sequence>}
-        """
-        return sequence.printFasta()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/FastaWriter.pyc
b
Binary file commons/core/writer/FastaWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/FastqWriter.py
--- a/commons/core/writer/FastqWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,78 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.SequenceListWriter import SequenceListWriter
-
-
-class FastqWriter(SequenceListWriter):
-    """
-    A class that writes a sequence list into a file with FASTQ format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    @ivar header: first lines of the file
-    @type header: string
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        super(FastqWriter, self).__init__(fileName, verbosity)
-        
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["fastq", "mfq"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "fastq"
-        
-        
-    def getLine(self, sequence):
-        """
-        Convert a sequence
-        @param sequence: sequence to be written
-        @type    sequence: class L{Sequence<Sequence>}
-        """
-        return sequence.printFastq()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/FastqWriter.pyc
b
Binary file commons/core/writer/FastqWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/GbWriter.py
--- a/commons/core/writer/GbWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,102 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-
-
-class GbWriter(TranscriptListWriter):
-    """
-    A class that writes a transcript list into a file with GBrowse format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    @ivar header: first lines of the file
-    @type header: string
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.header = "[READS]\nbgcolor = red\nstrand_arrow = 1\n\n"
-        super(GbWriter, self).__init__(fileName, verbosity)
-     
-        
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["gb", "gbrowse"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "gb"
-        
-        
-    def setColor(self, color):
-        """
-        Set the color of the track
-        @param color: the color of the track
-        @type    color: string
-        """
-        if color != None:
-            self.header = "[READS]\nbgcolor= %s\nstrand_arrow = 1\n\n" % (color)
-        
-        
-    def copyProperties(self, gbParser):
-        """
-        Copy the properties collected by a parser, to produce a similar output
-        @param gbParser: a GBrowse parser
-        @type    gbParser: class L{GbParser<GbParser>}
-        """
-        self.setColor(gbParser.color)
-        
-
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript with GBrowse format
-        Possibly skip the reference if already put
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        if self.lastChromosome != None and self.lastChromosome == transcript.getChromosome():
-            return transcript.printGBrowseLine()
-        return transcript.printGBrowse()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/GbWriter.pyc
b
Binary file commons/core/writer/GbWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/Gff2Writer.py
--- a/commons/core/writer/Gff2Writer.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,89 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-
-
-class Gff2Writer(TranscriptListWriter):
-    """
-    A class that writes a transcript list into a file with GFF2 format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.header = ""
-        self.title = ""
-        super(Gff2Writer, self).__init__(fileName, verbosity)
-            
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["gff2"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "gff2"
-        
-        
-    def setTitle(self, title):
-        """
-        Set the title of the transcripts
-        @param title: the title of the transcripts
-        @type    title: string
-        """
-        self.title = title
-
-
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript with GFF2 format
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        return transcript.printGff2(self.title)
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/Gff2Writer.pyc
b
Binary file commons/core/writer/Gff2Writer.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/Gff3Writer.py
--- a/commons/core/writer/Gff3Writer.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,130 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-
-
-class Gff3Writer(TranscriptListWriter):
-    """
-    A class that writes a transcript list into a file with GFF3 format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    """
-
-
-    def __init__(self, fileName, verbosity = 0, title="S-MART", feature="transcript", featurePart="exon"):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.header = ""
-        self.title    = title
-        self.feature = feature
-        self.featurePart = featurePart
-        super(Gff3Writer, self).__init__(fileName, verbosity)
-            
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["gff3", "gff"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "gff3"
-        
-        
-    def setTitle(self, title):
-        """
-        Set the title of the transcripts
-        @param title: the title of the transcripts
-        @type    title: string
-        """
-        self.title = title
-        
-    def setFeature(self, feature):
-        """
-        Set the name of the feature
-        @param title: the title of the feature
-        @type    feature: string
-        """
-        self.feature = feature
-        
-    def setFeaturePart(self, featurePart):
-        """
-        Set the name of the feature part
-        @param title: the title of the feature part
-        @type    featurePart: string
-        """
-        self.featurePart = featurePart
-
-
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript with GFF2 format
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        direction = "+"
-        if transcript.getDirection() == -1:
-            direction = "-"
-        transcript.sortExonsIncreasing()
-        if "ID" not in transcript.getTagValues():
-            transcript.setTagValue("ID", transcript.getUniqueName())
-        feature = self.feature
-        tags = transcript.tags
-        if "feature" in transcript.getTagNames():
-            feature = transcript.getTagValue("feature")
-            del transcript.tags["feature"]
-        score = "."
-        if "score" in transcript.getTagNames():
-            score = "%d" % (int(transcript.getTagValue("score")))
-            del transcript.tags["score"]
-        comment = transcript.getTagValues(";", "=")
-        string = "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\t%s\n" % (transcript.getChromosome(), self.title, feature, transcript.getStart(), transcript.getEnd(), score, direction, comment)
-        if len(transcript.exons) > 1:
-            for i, exon in enumerate(transcript.getExons()):
-                if "score" in exon.getTagNames():
-                    score = "%d" % (int(exon.getTagValue("score")))
-                string += "%s\t%s\t%s\t%d\t%d\t%s\t%s\t.\tID=%s-%s%d;Name=%s-%s%d;Parent=%s\n" % (transcript.getChromosome(), self.title,self.featurePart, exon.getStart(), exon.getEnd(), score, direction, transcript.getTagValue("ID"),self.featurePart, i+1, transcript.name,self.featurePart, i+1, transcript.getTagValue("ID"))
-        self.tags = tags
-        return string
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/Gff3Writer.pyc
b
Binary file commons/core/writer/Gff3Writer.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/GtfWriter.py
--- a/commons/core/writer/GtfWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,89 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-
-
-class GtfWriter(TranscriptListWriter):
-    """
-    A class that writes a transcript list into a file with GTF format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.header = ""
-        self.title    = "S-MART"
-        super(GtfWriter, self).__init__(fileName, verbosity)
-            
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["gtf", "gtf2"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "gtf"
-        
-        
-    def setTitle(self, title):
-        """
-        Set the title of the transcripts
-        @param title: the title of the transcripts
-        @type    title: string
-        """
-        self.title = title
-
-
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript with GTF format
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        return transcript.printGtf(self.title)
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/GtfWriter.pyc
b
Binary file commons/core/writer/GtfWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/MapWriter.py
--- a/commons/core/writer/MapWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,100 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-
-
-class MapWriter(TranscriptListWriter):
-    """
-    A class that writes a transcript list into a file with GFF3 format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    """
-
-
-    def __init__(self, fileName, verbosity = 0, title="S-MART"):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.header = ""
-        self.title    = title
-        TranscriptListWriter.__init__(self, fileName, verbosity)
-            
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["map"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "map"
-        
-        
-    def setTitle(self, title):
-        """
-        Set the title of the transcripts
-        @param title: the title of the transcripts
-        @type    title: string
-        """
-        self.title = title
-
-
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript to map format
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        name = transcript.name
-        if "nbOccurrences" in transcript.getTagNames() and transcript.getTagValue("nbOccurrences") != 1 and transcript.getTagValue("occurrences"):
-            name = "%s-%d" % (name, transcript.getTagValue("occurrence"))
-        sizes   = []
-        starts  = []
-        transcript.sortExonsIncreasing()
-        for exon in transcript.getExons():
-            sizes.append("%d" % (exon.getSize()))
-            starts.append("%d" % (exon.getStart() - transcript.getStart()))
-        return "%s\t%s\t%d\t%d\n" % (name, transcript.getChromosome(), transcript.getStart(), transcript.getEnd()+1)
-
-
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/MapWriter.pyc
b
Binary file commons/core/writer/MapWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/MySqlTranscriptWriter.py
--- a/commons/core/writer/MySqlTranscriptWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,215 +0,0 @@\n-#\n-# Copyright INRA-URGI 2009-2010\n-# \n-# This software is governed by the CeCILL license under French law and\n-# abiding by the rules of distribution of free software. You can use,\n-# modify and/ or redistribute the software under the terms of the CeCILL\n-# license as circulated by CEA, CNRS and INRIA at the following URL\n-# "http://www.cecill.info".\n-# \n-# As a counterpart to the access to the source code and rights to copy,\n-# modify and redistribute granted by the license, users are provided only\n-# with a limited warranty and the software\'s author, the holder of the\n-# economic rights, and the successive licensors have only limited\n-# liability.\n-# \n-# In this respect, the user\'s attention is drawn to the risks associated\n-# with loading, using, modifying and/or developing or reproducing the\n-# software by the user in light of its specific status of free software,\n-# that may mean that it is complicated to manipulate, and that also\n-# therefore means that it is reserved for developers and experienced\n-# professionals having in-depth computer knowledge. Users are therefore\n-# encouraged to load and test the software\'s suitability as regards their\n-# requirements in conditions enabling the security of their systems and/or\n-# data to be ensured and, more generally, to use and operate it in the\n-# same conditions as regards security.\n-# \n-# The fact that you are presently reading this means that you have had\n-# knowledge of the CeCILL license and that you accept its terms.\n-#\n-import os\n-import random\n-from SMART.Java.Python.mySql.MySqlTable import MySqlTable\n-from SMART.Java.Python.mySql.MySqlTranscriptTable import MySqlTranscriptTable\n-from SMART.Java.Python.misc.Progress import Progress\n-\n-class MySqlTranscriptWriter(object):\n-    """\n-    A class that writes a transcript list into a mySQL table\n-    @ivar name:                      name of the tables \n-    @type name:                      string\n-    @ivar tables:                    the tables\n-    @type tables:                    dict of L{MySqlTranscriptTable<MySqlTranscriptTable>}\n-    @ivar mySqlConnection:           connection to a MySQL database\n-    @type mySqlConnection:           class L{MySqlConnection<MySqlConnection>}\n-    @ivar tmpTranscriptFileHandles:  files where transcripts are temporary stored, before copy into database\n-    @type tmpTranscriptFileHandles:  dict of file handles\n-    @ivar nbTranscriptsByChromosome: number of transcripts written\n-    @type nbTranscriptsByChromosome: dict of int (one for each chromosome)\n-    @ivar randomNumber:              a random number, used for having a unique name for the tables\n-    @type randomNumber:              int\n-    @ivar toBeWritten:               there exists transcripts to be copied into database\n-    @type toBeWritten:               bool                \n-    @ivar verbosity:                 verbosity\n-    @type verbosity:                 int        \n-    """\n-\n-\n-    def __init__(self, connection, name = None, verbosity = 0):\n-        """\n-        Constructor\n-        @param name:      name of the file \n-        @type  name:      string\n-        @param verbosity: verbosity\n-        @type  verbosity: int\n-        """\n-        self.name                      = name\n-        self.verbosity                 = verbosity\n-        self.tables                    = {}\n-        self.indices                   = {}\n-        self.tmpTranscriptFileHandles  = {}\n-        self.nbTranscriptsByChromosome = {}\n-        self.toBeWritten               = False\n-        self.randomNumber              = random.randint(0, 100000)\n-        self.mySqlConnection           = connection\n-        self.nbTmpFiles                = 100\n-        self.transcriptValues          = {}\n-        self.nbTranscriptValues        = 1000\n-        if self.name != None:\n-            pos = self.name.rfind(os.sep)\n-            if pos != -1:\n-                self.name = self.name[pos+1:]\n-            \n-\n-    def __del__(self):\n-       '..b'-        if chromosome not in self.transcriptValues:\n-            self.transcriptValues[chromosome] = []\n-            \n-        self.transcriptValues[chromosome].append(transcript.getSqlValues())\n-\n-        self.nbTranscriptsByChromosome[chromosome] += 1\n-        self.toBeWritten                            = True\n-        if sum([len(transcripts) for transcripts in self.transcriptValues.values()]) > self.nbTranscriptValues:\n-            self.write() \n-\n-\n-    def addElement(self, element):\n-        """\n-        Same as "addTranscript"\n-        @param element: transcript to be written\n-        @type  element: class L{Transcript<Transcript>}\n-        """\n-        self.addTranscript(element)\n-\n-\n-#   def addTranscriptList(self, transcriptListParser):\n-#       """\n-#       Add a list of transcripts to the transcripts to be written\n-#       @param transcriptListParser: transcripts to be written\n-#       @type  transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}\n-#       """\n-#       progress = Progress(transcriptListParser.getNbTranscripts(), "Storing %s into database" % (transcriptListParser.fileName), self.verbosity)\n-#       for transcript in transcriptListParser.getIterator():\n-#           self.addTranscript(transcript)\n-#           progress.inc()\n-#       progress.done()\n-            \n-            \n-    def addTranscriptList(self, transcriptListParser):\n-        """\n-        Add a list of transcripts to the transcripts to be written\n-        @param transcriptListParser: transcripts to be written\n-        @type  transcriptListParser: class L{TranscriptListParser<TranscriptListParser>}\n-        """\n-        self.transcriptListParser = transcriptListParser\n-        self.mySqlConnection.executeManyFormattedQueriesIterator(self)\n-            \n-            \n-    def getIterator(self):\n-        """\n-        Iterator to the SQL commands to insert the list\n-        """\n-        progress = Progress(self.transcriptListParser.getNbTranscripts(), "Storing %s into database" % (self.transcriptListParser.fileName), self.verbosity)\n-        for transcript in self.transcriptListParser.getIterator():\n-            chromosome = transcript.getChromosome()\n-            if chromosome not in self.tables:\n-                self.createTable(chromosome)\n-            self.nbTranscriptsByChromosome[chromosome] = self.nbTranscriptsByChromosome.get(chromosome, 0) + 1\n-            values = transcript.getSqlValues()\n-            #yield "INSERT INTO \'%s\' (%s) VALUES (%s)" % (self.tables[chromosome].name, ", ".join(self.tables[chromosome].variables), ", ".join([MySqlTable.formatSql(values[variable], self.tables[chromosome].types[variable], self.tables[chromosome].sizes[variable]) for variable in self.tables[chromosome].variables]))\n-            yield ("INSERT INTO \'%s\' (%s) VALUES (%s)" % (self.tables[chromosome].name, ", ".join(self.tables[chromosome].variables), ", ".join(["?"] * len(self.tables[chromosome].variables))), [values[variable] for variable in self.tables[chromosome].variables])\n-            progress.inc()\n-        progress.done()\n-            \n-            \n-    def write(self):\n-        """\n-        Copy the content of the files into the database\n-        (May add transcripts to already created databases)\n-        """\n-        for chromosome in self.transcriptValues:\n-            if chromosome in self.transcriptValues:\n-                self.tables[chromosome].insertManyFormatted(self.transcriptValues[chromosome])\n-        self.transcriptValues = {}\n-        self.toBeWritten      = False\n-            \n-            \n-    def getTables(self):\n-        """\n-        Get the tables\n-        @return: the mySQL tables\n-        """\n-        if self.toBeWritten:\n-            self.write()\n-        return self.tables\n-\n-            \n-            \n-    def removeTables(self):\n-        """\n-        Drop the tables\n-        """\n-        for chromosome in self.tables:\n-            self.tables[chromosome].remove()\n\\ No newline at end of file\n'
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/MySqlTranscriptWriter.pyc
b
Binary file commons/core/writer/MySqlTranscriptWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/SamWriter.py
--- a/commons/core/writer/SamWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,101 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import os
-import random
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-
-
-class SamWriter(TranscriptListWriter):
-    """
-    A class that writes a transcript list into a file with SAM format
-    @ivar sizes: estimated sizes of the chromosomes
-    @type sizes: dict of string to int
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName:    name of the file 
-        @type fileName:     string
-        @param verbosity: verbosity
-        @type verbosity:    int
-        """
-        super(SamWriter, self).__init__(fileName, verbosity)
-        self.sizes         = {}
-        self.headerWritten = False
-        
-        
-    def close(self):
-        """
-        Close file (trick to add header)
-        """
-        super(SamWriter, self).close()
-        if self.headerWritten:
-            return
-        tmpFileName = "tmpFile%d.sam" % (random.randint(0, 100000))
-        tmpHandle = open(tmpFileName, "w")
-        for chromosome, size in self.sizes.iteritems():
-            tmpHandle.write("@SQ\tSN:%s\tLN:%d\n" % (chromosome, size))
-        self.handle = open(self.fileName)
-        for line in self.handle:
-            tmpHandle.write(line)
-        tmpHandle.close()
-        self.handle.close()
-        os.rename(tmpFileName, self.fileName)
-        self.headerWritten = True
-
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["sam"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "sam"
-        
-        
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript with GBrowse format
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        self.sizes[transcript.getChromosome()] = max(transcript.getEnd(), self.sizes.get(transcript.getChromosome(), 0))
-        return transcript.printSam()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/SamWriter.pyc
b
Binary file commons/core/writer/SamWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/SequenceListWriter.py
--- a/commons/core/writer/SequenceListWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,94 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-
-class SequenceListWriter(object):
-    """
-    An interface that writes a list of sequences into a file
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    @ivar header: first lines of the file
-    @type header: string
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.fileName = fileName
-        self.verbosity = verbosity
-        self.handle = open(self.fileName, "w")
-
-
-    def __del__(self):
-        """
-        Destructor
-        """
-        self.close()
-        
-
-    def write(self):
-        """
-        No-op
-        """
-        pass
-        
-        
-    def close(self):
-        """
-        Close writer
-        """
-        if self.handle != None:
-            self.handle.close()
-        
-        
-    def addSequence(self, sequence):
-        """
-        Add a sequence to the list of sequence to be written
-        @param sequence: sequence to be written
-        @type    sequence: class L{Sequence<Sequence>}
-        """
-        self.handle.write(self.getLine(sequence))
-
-
-    def addElement(self, element):
-        """
-        Same as "addSequence"
-        @param element: sequence to be written
-        @type    element: class L{Sequence<Sequence>}
-        """
-        self.addSequence(element)
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/SequenceListWriter.pyc
b
Binary file commons/core/writer/SequenceListWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/TranscriptListWriter.py
--- a/commons/core/writer/TranscriptListWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,163 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from SMART.Java.Python.misc.Progress import Progress
-
-class TranscriptListWriter(object):
-    """
-    An interface that writes a transcript list into a file
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    @ivar header: first lines of the file
-    @type header: string
-    @ivar started: whether some transcripts have already been writted
-    @type started: boolean
-    @ivar lastChromosome: the chromosome on which the transcript which was inserted last
-    @type lastChromosome: string
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.fileName = fileName
-        self.verbosity = verbosity
-        self.handle = open(self.fileName, "w")
-        self.started = False
-        self.lastChromosome = None
-        self.header = ""
-        self.sequenceFileName = None
-
-
-    def __del__(self):
-        """
-        Destructor
-        """
-        self.close()
-
-
-    def close(self):
-        """
-        Close writer
-        """
-        if self.handle != None and not self.handle.closed:
-            self.handle.close()
-        self.handle = None
-
-
-    def addTranscript(self, transcript):
-        """
-        Add a transcript to the list of transcripts to be written
-        @param transcript: transcript to be written
-        @type    transcript: class L{Transcript<Transcript>}
-        """
-        if not self.started:
-            self.handle.write(self.header)
-            self.started = True
-
-        self.handle.write(self.printTranscript(transcript))
-        self.lastChromosome = transcript.getChromosome()
-
-
-    def addElement(self, element):
-        """
-        Same as "addTranscript"
-        @param element: transcript to be written
-        @type    element: class L{Transcript<Transcript>}
-        """
-        self.addTranscript(element)
-
-
-    def addTranscriptList(self, transcriptList):
-        """
-        Add a list of transcripts to the transcripts to be written
-        @param transcriptList: transcripts to be written
-        @type    transcriptList: class L{TranscriptList<TranscriptList>}
-        """
-        progress = Progress(transcriptList.getNbTranscripts(), "Writing transcripts", self.verbosity)
-        for transcript in transcriptList.getIterator():
-            self.addTranscript(transcript)
-            progress.inc()
-        progress.done()
-            
-
-    def addTranscriptTable(self, transcriptTable):
-        """
-        Add a list of transcripts in a mySQL table to the transcripts to be written
-        @param transcriptTable: transcripts to be written
-        @type    transcriptTable: class L{MySqlTranscriptTable<MySqlTranscriptTable>}
-        """
-        for transcript in transcriptTable.getIterator():
-            self.addTranscript(transcript)
-            
-            
-    def setTitle(self, title):
-        """
-        Possibly write a title for the list (by default, do nothing)
-        @param title: a title for the list
-        @type title:    string
-        """
-        pass
-    
-    def setFeature(self, feature):
-        """
-        Set the name of the feature
-        @param title: the title of the feature
-        @type    feature: string
-        """
-        pass
-        
-    def setFeaturePart(self, featurePart):
-        """
-        Set the name of the feature part
-        @param title: the title of the feature part
-        @type    featurePart: string
-        """
-        pass
-
-
-    def addSequenceFile(self, fileName):
-        """
-        Get the multi-fasta file of the sequences
-        """
-        self.sequenceFileName = fileName
-        
-        
-    def write(self):
-        """
-        No-op
-        """
-        pass
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/TranscriptListWriter.pyc
b
Binary file commons/core/writer/TranscriptListWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/TranscriptWriter.py
--- a/commons/core/writer/TranscriptWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,189 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import os
-import sys
-from commons.core.writer.WriterChooser import WriterChooser
-from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter
-
-class TranscriptWriter(object):
-    """
-    An interface class that writes a list of transcripts, handle different formats
-    @ivar container: container of the data
-    @type container: L{TranscriptContainer<TranscriptContainer>}
-    @ivar format: format of the data to be printed
-    @type format: string     
-    @ivar file: the file where to print
-    @type file: string 
-    @ivar type: type of the data (transcripts, mappings or mySQL)
-    @type type: string
-    @ivar writer: a transcript list writer
-    @type writer: L{TranscriptListWriter<TranscriptListWriter>} or None
-    @ivar mode: use a container or enter transcript one by one
-    @type mode: string
-    @ivar verbosity: verbosity
-    @type verbosity: int        
-    """
-
-    def __init__(self, file, format, verbosity = 0):
-        """
-        Constructor
-        @param container: container of the data
-        @type container: string
-        @param format: format of the data
-        @type format: string
-        @param file: file where to print
-        @type file: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.container = None
-        self.format = format
-        self.file = file 
-
-        self.verbosity = verbosity
-        self.type = None
-        self.writer = None
-        self.mode = None
-        if self.format == None:
-            sys.exit("Error! Writer input format is empty!")
-
-        if self.format == "sql":
-            self.type = "sql"
-            pos = self.file.rfind(os.sep)
-            if pos > -1:
-                self.file = self.file[pos+1:]
-            self.writer = MySqlTranscriptWriter(self.file, self.verbosity)
-        else:
-            writerChooser = WriterChooser(self.verbosity)
-            writerChooser.findFormat(self.format)
-            self.writer = writerChooser.getWriter(self.file)
-            self.type = writerChooser.getType()
-            
-            
-    def close(self):
-        """
-        Close writer
-        """
-        if self.writer != None:
-            self.writer.close()
-
-
-    def setContainer(self, container):
-        """
-        Set a container for the data
-        @param container: container of the data
-        @type container: class L{TranscriptContainer<TranscriptContainer>}
-        """
-        self.container = container
-        if self.mode == "transcript":
-            raise Exception("Error! TranscriptWriter '%s' on 'transcript' mode is currently used on 'container' mode." % (self.file))
-        self.mode = "container"
-
-
-    def addTranscript(self, transcript):
-        """
-        Add a transcript to write
-        @param transcript: a transcript
-        @type transcript: class L{Transcript<Transcript>}
-        """
-        self.writer.addTranscript(transcript)
-        if self.mode == "container":
-            sys.exit("Error! TranscriptWriter '%s' on 'container' mode is currently used on 'transcript' mode." % (self.file))
-        self.mode = "transcript"
-        
-        
-    def addElement(self, transcript):
-        """
-        Same as addTranscript
-        """
-        self.addTranscript(transcript)
-    
-
-    def setTitle(self, title):
-        """
-        Possibly write a title for the list
-        @param title: a title for the list
-        @type title: string
-        """
-        if self.writer != None:
-            self.writer.setTitle(title)
-
-    def setFeature(self, feature):
-        """
-        Possibly Set the name of the feature
-        @param title: the title of the feature
-        @type    feature: string
-        """
-        if self.writer != None:
-            self.writer.setFeature(feature)
-        
-    def setFeaturePart(self, featurePart):
-        """
-        Possibly Set the name of the feature part
-        @param title: the title of the feature part
-        @type    featurePart: string
-        """
-        if self.writer != None:
-            self.writer.setFeaturePart(featurePart)    
-        
-    def setStrands(self, strands):
-        """
-        Possibly consider both strands separately
-        @param strands: whether both strands should be considered separately
-        @type  strands: boolean
-        """
-        if self.writer != None:
-            self.writer.setStrands(strands)
-            
-        
-    def write(self):
-        """
-        Write the content and possibly convert data
-        """        
-        if self.type == "transcript" or self.type == "sequence":
-            if self.mode == "container":
-                self.writer.addTranscriptList(self.container)
-            return
-
-        if self.mode == "transcript" or self.type == "sequence":
-            self.writer.write()
-            return
-
-        if self.container.format != "sql":
-            self.container.storeIntoDatabase()
-        tables = self.container.getTables()
-        for chromosome in tables:
-            tables[chromosome].rename("%s_%s" % (self.file, chromosome))
-        return
-        
-
-    def addSequenceFile(self, fileName):
-        self.writer.addSequenceFile(fileName)
-            
\ No newline at end of file
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/TranscriptWriter.pyc
b
Binary file commons/core/writer/TranscriptWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/UcscWriter.py
--- a/commons/core/writer/UcscWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,73 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.BedWriter import BedWriter
-
-class UcscWriter(BedWriter):
-    """
-    A class that writes a transcript list into a file with UCSC BED format (minor differences with BED format)
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        super(UcscWriter, self).__init__(fileName, verbosity)
-        
-
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["ucsc"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "bed"
-
-        
-    def printTranscript(self, transcript):
-        """
-        Export the given transcript with GBrowse format
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        return transcript.printUcsc()
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/UcscWriter.pyc
b
Binary file commons/core/writer/UcscWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/WigWriter.py
--- a/commons/core/writer/WigWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,139 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-
-
-class WigWriter(TranscriptListWriter):
-    """
-    A class that writes a transcript list into a file with WIGGLE format
-    @ivar fileName: name of the file 
-    @type fileName: string
-    @ivar handle: handle to the file
-    @type handle: file handle
-    @ivar header: first lines of the file
-    @type header: string
-    """
-
-
-    def __init__(self, fileName, verbosity = 0):
-        """
-        Constructor
-        @param fileName: name of the file 
-        @type fileName: string
-        @param verbosity: verbosity
-        @type verbosity: int
-        """
-        self.fileName  = fileName
-        self.verbosity = verbosity
-        self.data      = {-1: {}, 0: {}, 1: {}}
-        self.title     = "Reads"
-        self.strands   = False
-        self.handle    = None
-
-
-    def __del__(self):
-        """
-        Destructor
-        Actually print the file
-        """
-        strand2string = {-1: "-", 1: "+", 0: ""}
-        self.handle   = open(self.fileName, "w")
-        self.handle.write("track type=wiggle_0 name=\"%s\"\n" % (self.title))
-        for strand in self.data:
-            for chromosome in sorted(self.data[strand]):
-                 self.handle.write("variableStep chrom=%s%s\n" % (chromosome, strand2string[strand]))
-                 for pos in sorted(self.data[strand][chromosome]):
-                     self.handle.write("%d\t%d\n" % (pos, self.data[strand][chromosome][pos]))
-        self.handle.close()
-
-        
-    @staticmethod
-    def getFileFormats():
-        """
-        Get the format of the file
-        """
-        return ["wig", "wiggle"]
-        
-        
-    @staticmethod
-    def getExtension():
-        """
-        Get the usual extension for the file
-        """
-        return "wig"
-        
-        
-    def setTitle(self, title):
-        """
-        Set the title of the track
-        @param title: the title of the track
-        @type    title: string
-        """
-        if title != None:
-            self.title = title
-
-
-    def setStrands(self, strands):
-        """
-        Consider each strand separately
-        @param boolean: whether each strand should be considered separately
-        @type  boolean: boolean
-        """
-        self.strands = strands
-
-
-    def copyProperties(self, parser):
-        """
-        Copy the properties collected by a parser, to produce a similar output
-        @param bedParser: a parser
-        @type    bedParser: class L{TranscriptListWriter<TranscriptListWriter>}
-        """
-        self.setTitle(parser.title)
-        
-
-    def addTranscript(self, transcript):
-        """
-        Export the given transcript with GBrowse format
-        @param transcript: transcript to be printed
-        @type transcript: class L{Transcript<Transcript>}
-        @return: a string
-        """
-        chromosome = transcript.getChromosome()
-        direction  = transcript.getDirection()
-        if not self.strands:
-            direction = 0
-        if chromosome not in self.data[direction]:
-            self.data[direction][chromosome] = {}
-        for exon in transcript.getExons():
-            for pos in range(exon.getStart(), exon.getEnd()+1):
-                if pos not in self.data[direction][chromosome]:
-                    self.data[direction][chromosome][pos]  = 1
-                else:
-                    self.data[direction][chromosome][pos] += 1
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/WigWriter.pyc
b
Binary file commons/core/writer/WigWriter.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/WriterChooser.py
--- a/commons/core/writer/WriterChooser.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,127 +0,0 @@
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import sys
-from commons.core.writer.TranscriptListWriter import TranscriptListWriter
-from commons.core.writer.SequenceListWriter import SequenceListWriter
-from commons.core.writer.BedWriter import BedWriter
-from commons.core.writer.CsvWriter import CsvWriter
-from commons.core.writer.EmblWriter import EmblWriter
-from commons.core.writer.FastaWriter import FastaWriter
-from commons.core.writer.FastqWriter import FastqWriter
-from commons.core.writer.GbWriter import GbWriter
-from commons.core.writer.Gff2Writer import Gff2Writer
-from commons.core.writer.SamWriter import SamWriter
-from commons.core.writer.UcscWriter import UcscWriter
-from commons.core.writer.WigWriter import WigWriter
-from commons.core.writer.Gff3Writer import Gff3Writer
-from commons.core.writer.GtfWriter import GtfWriter
-from commons.core.writer.MapWriter import  MapWriter
-

-class WriterChooser(object):
-    """
-    A class that finds the correct writer
-    @ivar type: transcript / sequence writer
-    @type type: string
-    @ivar format: the format of the writer
-    @type format: string
-    @ivar writerClass: the class of the writer
-    @type writerClass: string
-    @ivar extension: default extension of the file
-    @type extension: string
-    @ivar verbosity: verbosity
-    @type verbosity: int        
-    """
-
-    def __init__(self, verbosity = 0):
-        """
-        Constructor
-        @param verbosity: verbosity
-        @type    verbosity: int
-        """
-        self.type = None
-        self.format = None
-        self.writerClass = None
-        self.extension = None
-        self.verbosity = verbosity
-    
-
-    def findFormat(self, format, type = None):
-        """
-        Find the correct parser
-        @ivar format: the format
-        @type format: string
-        @ivar type: transcript sequence parser (None is all)
-        @type type: string
-        @return: a parser
-        """
-        classes = {}
-        if (type == "transcript"):
-            classes = {TranscriptListWriter: "transcript"}
-        elif (type == "sequence"):
-            classes = {SequenceListWriter: "sequence"}
-        elif (type == None):
-            classes = {TranscriptListWriter: "transcript", SequenceListWriter: "sequence"}
-        else:
-            sys.exit("Do not understand format type '%s'" % (type))
-
-        for classType in classes:
-            for writerClass in classType.__subclasses__():
-                if format in writerClass.getFileFormats():
-                    self.writerClass = writerClass
-                    self.extension = writerClass.getExtension()
-                    self.type = classes[classType]
-                    return
-        sys.exit("Cannot get writer for format '%s'" % (format))
-
-
-    def getWriter(self, fileName):
-        """
-        Get the writer previously found
-        @return: the writer
-        """
-        return self.writerClass(fileName, self.verbosity)
-
-
-    def getType(self):
-        """
-        Get the type of writer previously found
-        @return: the type of writer
-        """
-        return self.type
-
-
-    def getExtension(self):
-        """
-        Get the default extension of writer previously found
-        @return: the extension
-        """
-        return self.extension
-
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/WriterChooser.pyc
b
Binary file commons/core/writer/WriterChooser.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/__init__.pyc
b
Binary file commons/core/writer/__init__.pyc has changed
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/test/Test_Gff3Writer.py
--- a/commons/core/writer/test/Test_Gff3Writer.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,101 +0,0 @@
-from commons.core.writer.Gff3Writer import Gff3Writer
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.structure.Interval import Interval
-import unittest
-import os
-from SMART.Java.Python.misc import Utils
-
-class Test_Gff3Writer(unittest.TestCase):
-  
-    def test_writer(self):
-        obsFileName = "testGffWriter1.gff3"
-        writer = Gff3Writer(obsFileName)
-        
-        transcript = Transcript()
-        transcript.setName("test1.1")
-        transcript.setChromosome("arm_X")
-        transcript.setStart(1000)
-        transcript.setEnd(4000)
-        transcript.setDirection("+")
-        transcript.setTagValue("ID", "test1.1-1")
-        transcript.setTagValue("occurrence", 1)
-        transcript.setTagValue("nbOccurrences", 2)
-        
-        exon1 = Interval()
-        exon1.setChromosome("arm_X")
-        exon1.setStart(1000)
-        exon1.setEnd(2000)
-        exon1.setDirection("+")
-        
-        exon2 = Interval()
-        exon2.setChromosome("arm_X")
-        exon2.setStart(3000)
-        exon2.setEnd(4000)
-        exon2.setDirection("+")
-        
-        transcript.addExon(exon1)
-        transcript.addExon(exon2)
-        
-        writer.addTranscript(transcript)
-        writer.write()
-        writer.close()
-        
-        expFileName = "expFile.gff3"
-        f = open(expFileName, "w")
-        f.write("arm_X\tS-MART\ttranscript\t1000\t4000\t.\t+\t.\tnbOccurrences=2;ID=test1.1-1;occurrence=1;Name=test1.1\n")
-        f.write("arm_X\tS-MART\texon\t1000\t2000\t.\t+\t.\tID=test1.1-1-exon1;Name=test1.1-exon1;Parent=test1.1-1\n")
-        f.write("arm_X\tS-MART\texon\t3000\t4000\t.\t+\t.\tID=test1.1-1-exon2;Name=test1.1-exon2;Parent=test1.1-1\n")
-        f.close()
-        
-        self.assertTrue(Utils.diff(expFileName, obsFileName))
-        
-        os.remove(expFileName)
-        os.remove(obsFileName)
-        
-    def test_writerAltNames(self):
-        obsFileName = "testGffWriter1.gff3"
-        writer = Gff3Writer(obsFileName,title="ALTSOURCE", feature="Match", featurePart="Match-Part")
-        
-        transcript = Transcript()
-        transcript.setName("test1.1")
-        transcript.setChromosome("arm_X")
-        transcript.setStart(1000)
-        transcript.setEnd(4000)
-        transcript.setDirection("+")
-        transcript.setTagValue("ID", "test1.1-1")
-        transcript.setTagValue("occurrence", 1)
-        transcript.setTagValue("nbOccurrences", 2)
-        
-        exon1 = Interval()
-        exon1.setChromosome("arm_X")
-        exon1.setStart(1000)
-        exon1.setEnd(2000)
-        exon1.setDirection("+")
-        
-        exon2 = Interval()
-        exon2.setChromosome("arm_X")
-        exon2.setStart(3000)
-        exon2.setEnd(4000)
-        exon2.setDirection("+")
-        
-        transcript.addExon(exon1)
-        transcript.addExon(exon2)
-        
-        writer.addTranscript(transcript)
-        writer.write()
-        writer.close()
-        
-        expFileName = "expFile.gff3"
-        f = open(expFileName, "w")
-        f.write("arm_X\tALTSOURCE\tMatch\t1000\t4000\t.\t+\t.\tnbOccurrences=2;ID=test1.1-1;occurrence=1;Name=test1.1\n")
-        f.write("arm_X\tALTSOURCE\tMatch-Part\t1000\t2000\t.\t+\t.\tID=test1.1-1-Match-Part1;Name=test1.1-Match-Part1;Parent=test1.1-1\n")
-        f.write("arm_X\tALTSOURCE\tMatch-Part\t3000\t4000\t.\t+\t.\tID=test1.1-1-Match-Part2;Name=test1.1-Match-Part2;Parent=test1.1-1\n")
-        f.close()
-        
-        self.assertTrue(Utils.diff(expFileName, obsFileName))
-        
-        os.remove(expFileName)
-        os.remove(obsFileName)
-
-if __name__ == '__main__':
-    unittest.main()
b
diff -r 529e3e6a0954 -r d94018ca4ada commons/core/writer/test/Test_MapWriter.py
--- a/commons/core/writer/test/Test_MapWriter.py Tue Apr 30 14:35:27 2013 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,61 +0,0 @@
-from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.structure.Interval import Interval
-import unittest
-import os
-from SMART.Java.Python.misc import Utils
-from commons.core.writer.MapWriter import MapWriter
-from commons.core.utils.FileUtils import FileUtils
-
-class Test_MapWriter(unittest.TestCase):
-    
-    def setUp(self):
-        self.expFileName = "expMapWriter.map"
-        self.obsFileName = "testMapWriter1.map"
-        
-    def tearDown(self):
-        os.remove(self.expFileName)
-        os.remove(self.obsFileName)
-        
-    def test_writer(self):
-        self.write_ExpMapFileName()
-        writer = MapWriter(self.obsFileName)
-        
-        transcript = Transcript()
-        transcript.setName("test1.1")
-        transcript.setChromosome("arm_X")
-        transcript.setStart(1000)
-        transcript.setEnd(4000)
-        transcript.setDirection("+")
-        transcript.setTagValue("ID", "test1.1-1")
-        transcript.setTagValue("occurrence", 1)
-        transcript.setTagValue("nbOccurrences", 2)
-        
-        exon1 = Interval()
-        exon1.setChromosome("arm_X")
-        exon1.setStart(1000)
-        exon1.setEnd(2000)
-        exon1.setDirection("+")
-        
-        exon2 = Interval()
-        exon2.setChromosome("arm_X")
-        exon2.setStart(3000)
-        exon2.setEnd(4000)
-        exon2.setDirection("+")
-        
-        transcript.addExon(exon1)
-        transcript.addExon(exon2)
-        
-        writer.addTranscript(transcript)
-        writer.write()
-        writer.close()
-        
-        self.assertTrue(FileUtils.are2FilesIdentical(self.expFileName, self.obsFileName))
-        
-
-    def write_ExpMapFileName(self):
-        f = open(self.expFileName, "w")
-        f.write("test1.1\tarm_X\t1000\t4001\n")
-        f.close()
-
-if __name__ == '__main__':
-    unittest.main()